summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--SConstruct228
-rw-r--r--configs/linux-cell6
-rw-r--r--progs/demos/gears.c37
-rw-r--r--src/mesa/Makefile9
-rw-r--r--src/mesa/SConscript436
-rw-r--r--src/mesa/drivers/common/driverfuncs.c2
-rw-r--r--src/mesa/drivers/dri/SConscript48
-rw-r--r--src/mesa/drivers/dri/intel_winsys/SConscript41
-rw-r--r--src/mesa/drivers/dri/intel_winsys/intel_winsys_pipe.c5
-rw-r--r--src/mesa/main/dd.h6
-rw-r--r--src/mesa/main/fbobject.c2
-rw-r--r--src/mesa/main/mipmap.c1
-rw-r--r--src/mesa/main/mipmap.h1
-rw-r--r--src/mesa/main/texstore.c32
-rw-r--r--src/mesa/pipe/README.portability43
-rw-r--r--src/mesa/pipe/SConscript9
-rw-r--r--src/mesa/pipe/cell/common.h101
-rw-r--r--src/mesa/pipe/cell/ppu/Makefile1
-rw-r--r--src/mesa/pipe/cell/ppu/cell_batch.c144
-rw-r--r--src/mesa/pipe/cell/ppu/cell_batch.h9
-rw-r--r--src/mesa/pipe/cell/ppu/cell_clear.c34
-rw-r--r--src/mesa/pipe/cell/ppu/cell_context.c21
-rw-r--r--src/mesa/pipe/cell/ppu/cell_context.h20
-rw-r--r--src/mesa/pipe/cell/ppu/cell_flush.c5
-rw-r--r--src/mesa/pipe/cell/ppu/cell_spu.c4
-rw-r--r--src/mesa/pipe/cell/ppu/cell_state_blend.c16
-rw-r--r--src/mesa/pipe/cell/ppu/cell_state_emit.c53
-rw-r--r--src/mesa/pipe/cell/ppu/cell_state_fs.c4
-rw-r--r--src/mesa/pipe/cell/ppu/cell_state_sampler.c14
-rw-r--r--src/mesa/pipe/cell/ppu/cell_texture.c119
-rw-r--r--src/mesa/pipe/cell/ppu/cell_texture.h11
-rw-r--r--src/mesa/pipe/cell/ppu/cell_vbuf.c136
-rw-r--r--src/mesa/pipe/cell/ppu/cell_vertex_shader.c120
-rw-r--r--src/mesa/pipe/cell/spu/Makefile9
-rw-r--r--src/mesa/pipe/cell/spu/spu_blend.c62
-rw-r--r--src/mesa/pipe/cell/spu/spu_blend.h37
-rw-r--r--src/mesa/pipe/cell/spu/spu_colorpack.h110
-rw-r--r--src/mesa/pipe/cell/spu/spu_exec.c1948
-rw-r--r--src/mesa/pipe/cell/spu/spu_exec.h172
-rw-r--r--src/mesa/pipe/cell/spu/spu_main.c473
-rw-r--r--src/mesa/pipe/cell/spu/spu_main.h70
-rw-r--r--src/mesa/pipe/cell/spu/spu_render.c301
-rw-r--r--src/mesa/pipe/cell/spu/spu_render.h38
-rw-r--r--src/mesa/pipe/cell/spu/spu_texture.c217
-rw-r--r--src/mesa/pipe/cell/spu/spu_texture.h47
-rw-r--r--src/mesa/pipe/cell/spu/spu_tile.c14
-rw-r--r--src/mesa/pipe/cell/spu/spu_tile.h28
-rw-r--r--src/mesa/pipe/cell/spu/spu_tri.c802
-rw-r--r--src/mesa/pipe/cell/spu/spu_tri.h2
-rw-r--r--src/mesa/pipe/cell/spu/spu_util.c165
-rw-r--r--src/mesa/pipe/cell/spu/spu_vertex_fetch.c393
-rw-r--r--src/mesa/pipe/cell/spu/spu_vertex_shader.c231
-rw-r--r--src/mesa/pipe/cell/spu/spu_vertex_shader.h61
-rw-r--r--src/mesa/pipe/cell/spu/spu_ztest.h135
-rw-r--r--src/mesa/pipe/draw/Makefile2
-rw-r--r--src/mesa/pipe/draw/draw_clip.c136
-rw-r--r--src/mesa/pipe/draw/draw_context.c9
-rw-r--r--src/mesa/pipe/draw/draw_prim.c185
-rw-r--r--src/mesa/pipe/draw/draw_private.h16
-rw-r--r--src/mesa/pipe/draw/draw_validate.c10
-rw-r--r--src/mesa/pipe/draw/draw_vbuf.c233
-rw-r--r--src/mesa/pipe/draw/draw_vertex_fetch.c378
-rw-r--r--src/mesa/pipe/draw/draw_vertex_shader.c44
-rw-r--r--src/mesa/pipe/draw/draw_vertex_shader_llvm.c4
-rw-r--r--src/mesa/pipe/draw/draw_vf.c428
-rw-r--r--src/mesa/pipe/draw/draw_vf.h223
-rw-r--r--src/mesa/pipe/draw/draw_vf_generic.c585
-rw-r--r--src/mesa/pipe/draw/draw_vf_sse.c614
-rw-r--r--src/mesa/pipe/failover/fo_context.c2
-rw-r--r--src/mesa/pipe/failover/fo_state.c79
-rw-r--r--src/mesa/pipe/i915simple/SConscript29
-rw-r--r--src/mesa/pipe/i915simple/i915_fpc_translate.c14
-rw-r--r--src/mesa/pipe/i915simple/i915_prim_vbuf.c3
-rw-r--r--src/mesa/pipe/i915simple/i915_state_derived.c2
-rw-r--r--src/mesa/pipe/i915simple/i915_state_emit.c2
-rw-r--r--src/mesa/pipe/i915simple/i915_state_immediate.c2
-rw-r--r--src/mesa/pipe/i915simple/i915_state_sampler.c2
-rw-r--r--src/mesa/pipe/i915simple/i915_texture.c17
-rw-r--r--src/mesa/pipe/i915simple/i915_texture.h5
-rw-r--r--src/mesa/pipe/i965simple/SConscript55
-rw-r--r--src/mesa/pipe/i965simple/brw_cc.c2
-rw-r--r--src/mesa/pipe/i965simple/brw_curbe.c6
-rw-r--r--src/mesa/pipe/i965simple/brw_eu_debug.c10
-rw-r--r--src/mesa/pipe/i965simple/brw_eu_emit.c4
-rw-r--r--src/mesa/pipe/i965simple/brw_sf.c8
-rw-r--r--src/mesa/pipe/i965simple/brw_sf_emit.c14
-rw-r--r--src/mesa/pipe/i965simple/brw_state.c2
-rw-r--r--src/mesa/pipe/i965simple/brw_state_cache.c6
-rw-r--r--src/mesa/pipe/i965simple/brw_state_pool.c4
-rw-r--r--src/mesa/pipe/i965simple/brw_tex_layout.c15
-rw-r--r--src/mesa/pipe/i965simple/brw_tex_layout.h4
-rw-r--r--src/mesa/pipe/i965simple/brw_urb.c6
-rw-r--r--src/mesa/pipe/i965simple/brw_vs_emit.c2
-rw-r--r--src/mesa/pipe/i965simple/brw_wm.c2
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_glsl.c2
-rw-r--r--src/mesa/pipe/i965simple/brw_wm_sampler_state.c2
-rw-r--r--src/mesa/pipe/p_compiler.h3
-rw-r--r--src/mesa/pipe/p_context.h4
-rw-r--r--src/mesa/pipe/p_debug.h86
-rw-r--r--src/mesa/pipe/p_defines.h2
-rw-r--r--src/mesa/pipe/p_format.h3
-rw-r--r--src/mesa/pipe/p_shader_tokens.h2
-rw-r--r--src/mesa/pipe/p_util.h19
-rw-r--r--src/mesa/pipe/pipebuffer/pb_buffer.h4
-rw-r--r--src/mesa/pipe/pipebuffer/pb_buffer_fenced.c8
-rw-r--r--src/mesa/pipe/pipebuffer/pb_buffer_fenced.h2
-rw-r--r--src/mesa/pipe/pipebuffer/pb_buffer_malloc.c7
-rw-r--r--src/mesa/pipe/pipebuffer/pb_bufmgr_fenced.c4
-rw-r--r--src/mesa/pipe/pipebuffer/pb_bufmgr_mm.c27
-rw-r--r--src/mesa/pipe/pipebuffer/pb_bufmgr_pool.c12
-rw-r--r--src/mesa/pipe/softpipe/SConscript42
-rw-r--r--src/mesa/pipe/softpipe/sp_clear.c2
-rw-r--r--src/mesa/pipe/softpipe/sp_prim_setup.c24
-rw-r--r--src/mesa/pipe/softpipe/sp_quad_fs.c19
-rw-r--r--src/mesa/pipe/softpipe/sp_state_blend.c10
-rw-r--r--src/mesa/pipe/softpipe/sp_state_rasterizer.c7
-rw-r--r--src/mesa/pipe/softpipe/sp_state_sampler.c6
-rw-r--r--src/mesa/pipe/softpipe/sp_texture.c33
-rw-r--r--src/mesa/pipe/softpipe/sp_texture.h5
-rw-r--r--src/mesa/pipe/softpipe/sp_tile_cache.c14
-rw-r--r--src/mesa/pipe/tgsi/exec/tgsi_exec.c10
-rwxr-xr-xsrc/mesa/pipe/tgsi/exec/tgsi_sse2.c53
-rw-r--r--src/mesa/pipe/tgsi/util/tgsi_build.c1
-rw-r--r--src/mesa/pipe/tgsi/util/tgsi_dump.c3
-rw-r--r--src/mesa/pipe/tgsi/util/tgsi_parse.c1
-rw-r--r--src/mesa/pipe/tgsi/util/tgsi_util.c1
-rw-r--r--src/mesa/pipe/util/p_debug.c76
-rw-r--r--src/mesa/sources5
-rw-r--r--src/mesa/state_tracker/st_atom_shader.c10
-rw-r--r--src/mesa/state_tracker/st_atom_texture.c20
-rw-r--r--src/mesa/state_tracker/st_cb_clear.c6
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.c25
-rw-r--r--src/mesa/state_tracker/st_cb_drawpixels.h4
-rw-r--r--src/mesa/state_tracker/st_cb_texture.c96
-rw-r--r--src/mesa/state_tracker/st_context.c2
-rw-r--r--src/mesa/state_tracker/st_context.h2
-rw-r--r--src/mesa/state_tracker/st_draw.c23
-rw-r--r--src/mesa/state_tracker/st_draw.h3
-rw-r--r--src/mesa/state_tracker/st_extensions.c3
-rw-r--r--src/mesa/state_tracker/st_gen_mipmap.c363
-rw-r--r--src/mesa/state_tracker/st_gen_mipmap.h46
-rw-r--r--src/mesa/state_tracker/st_program.c22
-rw-r--r--src/mesa/state_tracker/st_texture.c61
-rw-r--r--src/mesa/state_tracker/st_texture.h8
-rw-r--r--src/mesa/x86/rtasm/x86sse.c1
146 files changed, 9812 insertions, 1582 deletions
diff --git a/.gitignore b/.gitignore
index 033e6e10bd..b5e59dfc3e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,6 @@ depend
depend.bak
lib
lib64
+.sconsign*
+config.py
+build
diff --git a/SConstruct b/SConstruct
new file mode 100644
index 0000000000..22a4072c93
--- /dev/null
+++ b/SConstruct
@@ -0,0 +1,228 @@
+#######################################################################
+# Top-level SConstruct
+
+import os
+import os.path
+import sys
+
+
+#######################################################################
+# Configuration options
+#
+# For example, invoke scons as
+#
+# scons debug=1 dri=0 machine=x86
+#
+# to set configuration variables. Or you can write those options to a file
+# named config.py:
+#
+# # config.py
+# debug=1
+# dri=0
+# machine='x86'
+#
+# Invoke
+#
+# scons -h
+#
+# to get the full list of options. See scons manpage for more info.
+#
+
+# TODO: auto-detect defaults
+opts = Options('config.py')
+opts.Add(BoolOption('debug', 'build debug version', False))
+opts.Add(BoolOption('dri', 'build dri drivers', False))
+opts.Add(EnumOption('machine', 'use machine-specific assembly code', 'x86',
+ allowed_values=('generic', 'x86', 'x86-64')))
+
+env = Environment(
+ options = opts,
+ ENV = os.environ)
+Help(opts.GenerateHelpText(env))
+
+# for debugging
+#print env.Dump()
+
+if 0:
+ # platform will be typically 'posix' or 'win32'
+ platform = env['PLATFORM']
+else:
+ # platform will be one of 'linux', 'freebsd', 'win32', 'darwin', etc.
+ platform = sys.platform
+ if platform == 'linux2':
+ platform = 'linux'
+
+# replicate options values in local variables
+debug = env['debug']
+dri = env['dri']
+machine = env['machine']
+
+# derived options
+x86 = machine == 'x86'
+gcc = platform in ('posix', 'linux', 'freebsd', 'darwin')
+msvc = platform == 'win32'
+
+Export([
+ 'debug',
+ 'x86',
+ 'dri',
+ 'platform',
+ 'gcc',
+ 'msvc',
+])
+
+
+#######################################################################
+# Environment setup
+#
+# TODO: put the compiler specific settings in seperate files
+# TODO: auto-detect as much as possible
+
+
+# Optimization flags
+if gcc:
+ if debug:
+ env.Append(CFLAGS = '-O0 -g3')
+ env.Append(CXXFLAGS = '-O0 -g3')
+ else:
+ env.Append(CFLAGS = '-O3 -g3')
+ env.Append(CXXFLAGS = '-O3 -g3')
+
+ env.Append(CFLAGS = '-Wall -Wmissing-prototypes -std=c99 -ffast-math -pedantic')
+ env.Append(CXXFLAGS = '-Wall -pedantic')
+
+ # Be nice to Eclipse
+ env.Append(CFLAGS = '-fmessage-length=0')
+ env.Append(CXXFLAGS = '-fmessage-length=0')
+
+
+# Defines
+if debug:
+ env.Append(CPPDEFINES = ['DEBUG'])
+else:
+ env.Append(CPPDEFINES = ['NDEBUG'])
+
+
+# Includes
+env.Append(CPPPATH = [
+ '#/include',
+ '#/src/mesa',
+ '#/src/mesa/main',
+ '#/src/mesa/pipe',
+])
+
+
+# x86 assembly
+if x86:
+ env.Append(CPPDEFINES = [
+ 'USE_X86_ASM',
+ 'USE_MMX_ASM',
+ 'USE_3DNOW_ASM',
+ 'USE_SSE_ASM',
+ ])
+ if gcc:
+ env.Append(CFLAGS = '-m32')
+ env.Append(CXXFLAGS = '-m32')
+
+
+# Posix
+if platform in ('posix', 'linux', 'freebsd', 'darwin'):
+ env.Append(CPPDEFINES = [
+ '_POSIX_SOURCE',
+ ('_POSIX_C_SOURCE', '199309L'),
+ '_SVID_SOURCE',
+ '_BSD_SOURCE',
+ '_GNU_SOURCE',
+
+ 'PTHREADS',
+ 'HAVE_POSIX_MEMALIGN',
+ ])
+ env.Append(CPPPATH = ['/usr/X11R6/include'])
+ env.Append(LIBPATH = ['/usr/X11R6/lib'])
+ env.Append(LIBS = [
+ 'm',
+ 'pthread',
+ 'expat',
+ 'dl',
+ ])
+
+
+# DRI
+if dri:
+ env.ParseConfig('pkg-config --cflags --libs libdrm')
+ env.Append(CPPDEFINES = [
+ ('USE_EXTERNAL_DXTN_LIB', '1'),
+ 'IN_DRI_DRIVER',
+ 'GLX_DIRECT_RENDERING',
+ 'GLX_INDIRECT_RENDERING',
+ ])
+
+# libGL
+if 1:
+ env.Append(LIBS = [
+ 'X11',
+ 'Xext',
+ 'Xxf86vm',
+ 'Xdamage',
+ 'Xfixes',
+ ])
+
+Export('env')
+
+
+#######################################################################
+# Convenience Library Builder
+# based on the stock StaticLibrary and SharedLibrary builders
+
+def createConvenienceLibBuilder(env):
+ """This is a utility function that creates the ConvenienceLibrary
+ Builder in an Environment if it is not there already.
+
+ If it is already there, we return the existing one.
+ """
+
+ try:
+ convenience_lib = env['BUILDERS']['ConvenienceLibrary']
+ except KeyError:
+ action_list = [ Action("$ARCOM", "$ARCOMSTR") ]
+ if env.Detect('ranlib'):
+ ranlib_action = Action("$RANLIBCOM", "$RANLIBCOMSTR")
+ action_list.append(ranlib_action)
+
+ convenience_lib = Builder(action = action_list,
+ emitter = '$LIBEMITTER',
+ prefix = '$LIBPREFIX',
+ suffix = '$LIBSUFFIX',
+ src_suffix = '$SHOBJSUFFIX',
+ src_builder = 'SharedObject')
+ env['BUILDERS']['ConvenienceLibrary'] = convenience_lib
+ env['BUILDERS']['Library'] = convenience_lib
+
+ return convenience_lib
+
+createConvenienceLibBuilder(env)
+
+
+#######################################################################
+# Invoke SConscripts
+
+# Put build output in a separate dir, which depends on the current configuration
+# See also http://www.scons.org/wiki/AdvancedBuildExample
+build_topdir = 'build'
+build_subdir = platform
+if dri:
+ build_subdir += "-dri"
+if x86:
+ build_subdir += "-x86"
+if debug:
+ build_subdir += "-debug"
+build_dir = os.path.join(build_topdir, build_subdir)
+
+# TODO: Build several variants at the same time?
+# http://www.scons.org/wiki/SimultaneousVariantBuilds
+
+SConscript(
+ 'src/mesa/SConscript',
+ build_dir = build_dir,
+ duplicate = 0 # http://www.scons.org/doc/0.97/HTML/scons-user/x2261.html
+)
diff --git a/configs/linux-cell b/configs/linux-cell
index 4f0086cc1f..3d874491e4 100644
--- a/configs/linux-cell
+++ b/configs/linux-cell
@@ -10,11 +10,13 @@ CC = ppu32-gcc
CXX = ppu32-g++
HOST_CC = gcc
+OPT_FLAGS = -g
+
# Cell SDK location
SDK = /opt/ibm/cell-sdk/prototype/sysroot/usr
-CFLAGS = -g -Wall -Winline -fPIC -m32 -mabi=altivec -maltivec -I. -I$(SDK)/include -DGALLIUM_CELL
+CFLAGS = $(OPT_FLAGS) -Wall -Winline -fPIC -m32 -mabi=altivec -maltivec -I. -I$(SDK)/include -DGALLIUM_CELL
CXXFLAGS = $(CFLAGS)
@@ -34,7 +36,7 @@ GL_LIB_DEPS = $(EXTRA_LIB_PATH) -lX11 -lXext -lm -lpthread \
SPU_CC = spu-gcc
-SPU_CFLAGS = -g -W -Wall -Winline -Wmissing-prototypes -Wno-main -I. -I $(SDK)/spu/include -include spu_intrinsics.h -I $(TOP)/src/mesa/
+SPU_CFLAGS = $(OPT_FLAGS) -W -Wall -Winline -Wmissing-prototypes -Wno-main -I. -I $(SDK)/spu/include -include spu_intrinsics.h -I $(TOP)/src/mesa/
SPU_LFLAGS = -L$(SDK)/spu/lib -Wl,-N -lmisc
diff --git a/progs/demos/gears.c b/progs/demos/gears.c
index ab9bc00742..2a9fefefb5 100644
--- a/progs/demos/gears.c
+++ b/progs/demos/gears.c
@@ -27,6 +27,9 @@ static GLint T0 = 0;
static GLint Frames = 0;
static GLint autoexit = 0;
static GLint win = 0;
+static GLboolean Visible = GL_TRUE;
+static GLboolean Animate = GL_TRUE;
+static GLfloat viewDist = 40.0;
/**
@@ -179,6 +182,9 @@ draw(void)
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glPushMatrix();
+
+ glTranslatef(0.0, 0.0, -viewDist);
+
glRotatef(view_rotx, 1.0, 0.0, 0.0);
glRotatef(view_roty, 0.0, 1.0, 0.0);
glRotatef(view_rotz, 0.0, 0.0, 1.0);
@@ -240,6 +246,15 @@ idle(void)
glutPostRedisplay();
}
+static void
+update_idle_func(void)
+{
+ if (Visible && Animate)
+ glutIdleFunc(idle);
+ else
+ glutIdleFunc(NULL);
+}
+
/* change view angle, exit upon ESC */
/* ARGSUSED1 */
static void
@@ -252,6 +267,16 @@ key(unsigned char k, int x, int y)
case 'Z':
view_rotz -= 5.0;
break;
+ case 'd':
+ viewDist += 1.0;
+ break;
+ case 'D':
+ viewDist -= 1.0;
+ break;
+ case 'a':
+ Animate = !Animate;
+ update_idle_func();
+ break;
case 27: /* Escape */
cleanup();
exit(0);
@@ -295,10 +320,8 @@ reshape(int width, int height)
glViewport(0, 0, (GLint) width, (GLint) height);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
- glFrustum(-1.0, 1.0, -h, h, 5.0, 60.0);
+ glFrustum(-1.0, 1.0, -h, h, 5.0, 200.0);
glMatrixMode(GL_MODELVIEW);
- glLoadIdentity();
- glTranslatef(0.0, 0.0, -40.0);
}
static void
@@ -351,13 +374,12 @@ init(int argc, char *argv[])
}
}
+
static void
visible(int vis)
{
- if (vis == GLUT_VISIBLE)
- glutIdleFunc(idle);
- else
- glutIdleFunc(NULL);
+ Visible = vis;
+ update_idle_func();
}
int main(int argc, char *argv[])
@@ -375,6 +397,7 @@ int main(int argc, char *argv[])
glutKeyboardFunc(key);
glutSpecialFunc(special);
glutVisibilityFunc(visible);
+ update_idle_func();
glutMainLoop();
return 0; /* ANSI C requires main to return int. */
diff --git a/src/mesa/Makefile b/src/mesa/Makefile
index b16d74bf49..720f1b2e02 100644
--- a/src/mesa/Makefile
+++ b/src/mesa/Makefile
@@ -125,24 +125,25 @@ osmesa-only: depend subdirs $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME)
# Make the GL library
$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) $(LLVM_LIB)
@ $(TOP)/bin/mklib -o $(GL_LIB) \
- -linker $(CC) \
+ -linker "$(CC)" \
-major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \
-install $(TOP)/$(LIB_DIR) \
$(MKLIB_OPTIONS) $(STAND_ALONE_OBJECTS) \
$(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) $(LLVM_LIB) $(GL_LIB_DEPS)
# Make the OSMesa library
-$(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME): $(OSMESA_DRIVER_OBJECTS) $(OSMESA16_OBJECTS)
+$(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME): $(OSMESA_DRIVER_OBJECTS) \
+ $(OSMESA16_OBJECTS) $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME)
@ if [ "${DRIVER_DIRS}" = "osmesa" ] ; then \
$(TOP)/bin/mklib -o $(OSMESA_LIB) \
- -linker $(CC) \
+ -linker "$(CC)" \
-major $(MESA_MAJOR) \
-minor $(MESA_MINOR) -patch $(MESA_TINY) \
-install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \
$(OSMESA_LIB_DEPS) $(OSMESA16_OBJECTS) ; \
else \
$(TOP)/bin/mklib -o $(OSMESA_LIB) \
- -linker $(CC) \
+ -linker "$(CC)" \
-major $(MESA_MAJOR) \
-minor $(MESA_MINOR) -patch $(GL_TINY) \
-install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \
diff --git a/src/mesa/SConscript b/src/mesa/SConscript
new file mode 100644
index 0000000000..faf8c84872
--- /dev/null
+++ b/src/mesa/SConscript
@@ -0,0 +1,436 @@
+#######################################################################
+# SConscript for mesa
+#
+# TODO: Split this into per-module SConscripts
+
+
+Import('*')
+
+
+#######################################################################
+# Core sources
+
+MAIN_SOURCES = [
+ 'main/api_arrayelt.c',
+ 'main/api_loopback.c',
+ 'main/api_noop.c',
+ 'main/api_validate.c',
+ 'main/accum.c',
+ 'main/attrib.c',
+ 'main/arrayobj.c',
+ 'main/blend.c',
+ 'main/bufferobj.c',
+ 'main/buffers.c',
+ 'main/clip.c',
+ 'main/colortab.c',
+ 'main/context.c',
+ 'main/convolve.c',
+ 'main/debug.c',
+ 'main/depth.c',
+ 'main/depthstencil.c',
+ 'main/dlist.c',
+ 'main/drawpix.c',
+ 'main/enable.c',
+ 'main/enums.c',
+ 'main/eval.c',
+ 'main/execmem.c',
+ 'main/extensions.c',
+ 'main/fbobject.c',
+ 'main/feedback.c',
+ 'main/ffvertex_prog.c',
+ 'main/fog.c',
+ 'main/framebuffer.c',
+ 'main/get.c',
+ 'main/getstring.c',
+ 'main/hash.c',
+ 'main/hint.c',
+ 'main/histogram.c',
+ 'main/image.c',
+ 'main/imports.c',
+ 'main/light.c',
+ 'main/lines.c',
+ 'main/matrix.c',
+ 'main/mipmap.c',
+ 'main/mm.c',
+ 'main/pixel.c',
+ 'main/points.c',
+ 'main/polygon.c',
+ 'main/queryobj.c',
+ 'main/rastpos.c',
+ 'main/rbadaptors.c',
+ 'main/renderbuffer.c',
+ 'main/shaders.c',
+ 'main/state.c',
+ 'main/stencil.c',
+ 'main/texcompress.c',
+ 'main/texcompress_s3tc.c',
+ 'main/texcompress_fxt1.c',
+ 'main/texenvprogram.c',
+ 'main/texformat.c',
+ 'main/teximage.c',
+ 'main/texobj.c',
+ 'main/texrender.c',
+ 'main/texstate.c',
+ 'main/texstore.c',
+ 'main/varray.c',
+ 'main/vtxfmt.c',
+]
+
+GLAPI_SOURCES = [
+ 'main/dispatch.c',
+ 'glapi/glapi.c',
+ 'glapi/glthread.c',
+]
+
+MATH_SOURCES = [
+ 'math/m_debug_clip.c',
+ 'math/m_debug_norm.c',
+ 'math/m_debug_xform.c',
+ 'math/m_eval.c',
+ 'math/m_matrix.c',
+ 'math/m_translate.c',
+ 'math/m_vector.c',
+ 'math/m_xform.c',
+]
+
+VBO_SOURCES = [
+ 'vbo/vbo_context.c',
+ 'vbo/vbo_exec.c',
+ 'vbo/vbo_exec_api.c',
+ 'vbo/vbo_exec_array.c',
+ 'vbo/vbo_exec_draw.c',
+ 'vbo/vbo_exec_eval.c',
+ 'vbo/vbo_rebase.c',
+ 'vbo/vbo_split.c',
+ 'vbo/vbo_split_copy.c',
+ 'vbo/vbo_split_inplace.c',
+ 'vbo/vbo_save.c',
+ 'vbo/vbo_save_api.c',
+ 'vbo/vbo_save_draw.c',
+ 'vbo/vbo_save_loopback.c',
+]
+
+VF_SOURCES = [
+ 'vf/vf.c',
+ 'vf/vf_generic.c',
+ 'vf/vf_sse.c',
+]
+
+DRAW_SOURCES = [
+ 'pipe/draw/draw_clip.c',
+ 'pipe/draw/draw_context.c',
+ 'pipe/draw/draw_cull.c',
+ 'pipe/draw/draw_debug.c',
+ 'pipe/draw/draw_flatshade.c',
+ 'pipe/draw/draw_offset.c',
+ 'pipe/draw/draw_prim.c',
+ 'pipe/draw/draw_stipple.c',
+ 'pipe/draw/draw_twoside.c',
+ 'pipe/draw/draw_unfilled.c',
+ 'pipe/draw/draw_validate.c',
+ 'pipe/draw/draw_vbuf.c',
+ 'pipe/draw/draw_vertex.c',
+ 'pipe/draw/draw_vertex_cache.c',
+ 'pipe/draw/draw_vertex_fetch.c',
+ 'pipe/draw/draw_vertex_shader.c',
+ 'pipe/draw/draw_vertex_shader_llvm.c',
+ 'pipe/draw/draw_vf.c',
+ 'pipe/draw/draw_vf_generic.c',
+ 'pipe/draw/draw_vf_sse.c',
+ 'pipe/draw/draw_wide_prims.c',
+]
+
+TGSIEXEC_SOURCES = [
+ 'pipe/tgsi/exec/tgsi_exec.c',
+ 'pipe/tgsi/exec/tgsi_sse2.c',
+]
+
+TGSIUTIL_SOURCES = [
+ 'pipe/tgsi/util/tgsi_build.c',
+ 'pipe/tgsi/util/tgsi_dump.c',
+ 'pipe/tgsi/util/tgsi_parse.c',
+ 'pipe/tgsi/util/tgsi_util.c',
+]
+
+STATECACHE_SOURCES = [
+ 'pipe/cso_cache/cso_hash.c',
+ 'pipe/cso_cache/cso_cache.c',
+]
+
+PIPEUTIL_SOURCES = [
+ 'pipe/util/p_debug.c',
+ 'pipe/util/p_tile.c',
+ 'pipe/util/p_util.c',
+]
+
+STATETRACKER_SOURCES = [
+ 'state_tracker/st_atom.c',
+ 'state_tracker/st_atom_blend.c',
+ 'state_tracker/st_atom_clip.c',
+ 'state_tracker/st_atom_constbuf.c',
+ 'state_tracker/st_atom_depth.c',
+ 'state_tracker/st_atom_framebuffer.c',
+ 'state_tracker/st_atom_pixeltransfer.c',
+ 'state_tracker/st_atom_sampler.c',
+ 'state_tracker/st_atom_scissor.c',
+ 'state_tracker/st_atom_shader.c',
+ 'state_tracker/st_atom_rasterizer.c',
+ 'state_tracker/st_atom_stipple.c',
+ 'state_tracker/st_atom_texture.c',
+ 'state_tracker/st_atom_viewport.c',
+ 'state_tracker/st_cb_accum.c',
+ 'state_tracker/st_cb_bufferobjects.c',
+ 'state_tracker/st_cb_clear.c',
+ 'state_tracker/st_cb_flush.c',
+ 'state_tracker/st_cb_drawpixels.c',
+ 'state_tracker/st_cb_fbo.c',
+ 'state_tracker/st_cb_feedback.c',
+ 'state_tracker/st_cb_program.c',
+ 'state_tracker/st_cb_queryobj.c',
+ 'state_tracker/st_cb_rasterpos.c',
+ 'state_tracker/st_cb_readpixels.c',
+ 'state_tracker/st_cb_strings.c',
+ 'state_tracker/st_cb_texture.c',
+ 'state_tracker/st_cache.c',
+ 'state_tracker/st_context.c',
+ 'state_tracker/st_debug.c',
+ 'state_tracker/st_draw.c',
+ 'state_tracker/st_extensions.c',
+ 'state_tracker/st_format.c',
+ 'state_tracker/st_framebuffer.c',
+ 'state_tracker/st_mesa_to_tgsi.c',
+ 'state_tracker/st_program.c',
+ 'state_tracker/st_texture.c',
+]
+
+SHADER_SOURCES = [
+ 'shader/arbprogparse.c',
+ 'shader/arbprogram.c',
+ 'shader/atifragshader.c',
+ 'shader/grammar/grammar_mesa.c',
+ 'shader/nvfragparse.c',
+ 'shader/nvprogram.c',
+ 'shader/nvvertparse.c',
+ 'shader/program.c',
+ 'shader/prog_cache.c',
+ 'shader/prog_debug.c',
+ 'shader/prog_execute.c',
+ 'shader/prog_instruction.c',
+ 'shader/prog_parameter.c',
+ 'shader/prog_print.c',
+ 'shader/prog_statevars.c',
+ 'shader/programopt.c',
+ 'shader/shader_api.c',
+]
+
+SLANG_SOURCES = [
+ 'shader/slang/slang_builtin.c',
+ 'shader/slang/slang_codegen.c',
+ 'shader/slang/slang_compile.c',
+ 'shader/slang/slang_compile_function.c',
+ 'shader/slang/slang_compile_operation.c',
+ 'shader/slang/slang_compile_struct.c',
+ 'shader/slang/slang_compile_variable.c',
+ 'shader/slang/slang_emit.c',
+ 'shader/slang/slang_ir.c',
+ 'shader/slang/slang_label.c',
+ 'shader/slang/slang_library_noise.c',
+ 'shader/slang/slang_link.c',
+ 'shader/slang/slang_log.c',
+ 'shader/slang/slang_mem.c',
+ 'shader/slang/slang_preprocess.c',
+ 'shader/slang/slang_print.c',
+ 'shader/slang/slang_simplify.c',
+ 'shader/slang/slang_storage.c',
+ 'shader/slang/slang_typeinfo.c',
+ 'shader/slang/slang_vartable.c',
+ 'shader/slang/slang_utility.c',
+]
+
+
+#######################################################################
+# Assembly sources
+
+ASM_C_SOURCES = [
+ 'x86/common_x86.c',
+ 'x86/x86.c',
+ 'x86/3dnow.c',
+ 'x86/sse.c',
+ 'x86/rtasm/x86sse.c',
+ 'sparc/sparc.c',
+ 'ppc/common_ppc.c',
+ 'x86-64/x86-64.c',
+]
+
+X86_SOURCES = [
+ 'x86/common_x86_asm.S',
+ 'x86/x86_xform2.S',
+ 'x86/x86_xform3.S',
+ 'x86/x86_xform4.S',
+ 'x86/x86_cliptest.S',
+ 'x86/mmx_blend.S',
+ 'x86/3dnow_xform1.S',
+ 'x86/3dnow_xform2.S',
+ 'x86/3dnow_xform3.S',
+ 'x86/3dnow_xform4.S',
+ 'x86/3dnow_normal.S',
+ 'x86/sse_xform1.S',
+ 'x86/sse_xform2.S',
+ 'x86/sse_xform3.S',
+ 'x86/sse_xform4.S',
+ 'x86/sse_normal.S',
+ 'x86/read_rgba_span_x86.S',
+]
+
+X86_API = [
+ 'x86/glapi_x86.S',
+]
+
+X86_64_SOURCES = [
+ 'x86-64/xform4.S',
+]
+
+X86_64_API = [
+ 'x86-64/glapi_x86-64.S',
+]
+
+SPARC_SOURCES = [
+ 'sparc/clip.S',
+ 'sparc/norm.S',
+ 'sparc/xform.S',
+]
+
+SPARC_API = [
+ 'sparc/glapi_sparc.S',
+]
+
+if x86:
+ ASM_SOURCES = ASM_C_SOURCES + X86_SOURCES
+ API_SOURCES = X86_API
+else:
+ ASM_SOURCES = []
+ API_SOURCES = []
+
+
+#######################################################################
+# Driver sources
+
+
+X11_DRIVER_SOURCES = [
+ 'pipe/xlib/glxapi.c',
+ 'pipe/xlib/fakeglx.c',
+ 'pipe/xlib/xfonts.c',
+ 'pipe/xlib/xm_api.c',
+ 'pipe/xlib/xm_winsys.c',
+ 'pipe/xlib/xm_winsys_aub.c',
+ 'pipe/xlib/brw_aub.c',
+]
+
+OSMESA_DRIVER_SOURCES = [
+ 'drivers/osmesa/osmesa.c',
+]
+
+GLIDE_DRIVER_SOURCES = [
+ 'drivers/glide/fxapi.c',
+ 'drivers/glide/fxdd.c',
+ 'drivers/glide/fxddspan.c',
+ 'drivers/glide/fxddtex.c',
+ 'drivers/glide/fxsetup.c',
+ 'drivers/glide/fxtexman.c',
+ 'drivers/glide/fxtris.c',
+ 'drivers/glide/fxvb.c',
+ 'drivers/glide/fxglidew.c',
+ 'drivers/glide/fxg.c',
+]
+
+SVGA_DRIVER_SOURCES = [
+ 'drivers/svga/svgamesa.c',
+ 'drivers/svga/svgamesa8.c',
+ 'drivers/svga/svgamesa15.c',
+ 'drivers/svga/svgamesa16.c',
+ 'drivers/svga/svgamesa24.c',
+ 'drivers/svga/svgamesa32.c',
+]
+
+FBDEV_DRIVER_SOURCES = [
+ 'drivers/fbdev/glfbdev.c',
+]
+
+
+### All the core C sources
+
+SOLO_SOURCES = \
+ MAIN_SOURCES + \
+ MATH_SOURCES + \
+ VBO_SOURCES + \
+ VF_SOURCES + \
+ DRAW_SOURCES + \
+ TGSIEXEC_SOURCES + \
+ TGSIUTIL_SOURCES + \
+ PIPEUTIL_SOURCES + \
+ STATECACHE_SOURCES + \
+ STATETRACKER_SOURCES + \
+ SHADER_SOURCES + \
+ ASM_SOURCES + \
+ SLANG_SOURCES
+
+CORE_SOURCES = \
+ GLAPI_SOURCES + API_SOURCES + \
+ SOLO_SOURCES
+
+ALL_SOURCES = \
+ GLAPI_SOURCES + API_SOURCES + \
+ SOLO_SOURCES + \
+ ASM_SOURCES + \
+ X11_DRIVER_SOURCES + \
+ FBDEV_DRIVER_SOURCES + \
+ OSMESA_DRIVER_SOURCES
+
+
+######################################################################
+# Gallium sources
+
+SConscript([
+ 'pipe/SConscript',
+])
+
+
+######################################################################
+# libGL
+
+if not dri:
+ STAND_ALONE_DRIVER_SOURCES = \
+ CORE_SOURCES + \
+ X11_DRIVER_SOURCES
+
+ Import(
+ 'softpipe',
+ 'i915simple',
+ 'i965simple'
+ )
+
+ pipe_drivers = [
+ softpipe,
+ i965simple
+ ]
+
+ env.SharedLibrary(
+ target ='GL',
+ source = STAND_ALONE_DRIVER_SOURCES,
+ LIBS = [softpipe, i965simple] + env['LIBS'],
+ )
+
+
+######################################################################
+# Driver sources
+
+if dri:
+ mesa = env.ConvenienceLibrary(
+ target = 'mesa',
+ source = SOLO_SOURCES,
+ )
+ env.Prepend(LIBS = [mesa])
+
+ SConscript([
+ 'drivers/dri/SConscript',
+ ])
diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c
index 33caf7dae1..b5b383b4e4 100644
--- a/src/mesa/drivers/common/driverfuncs.c
+++ b/src/mesa/drivers/common/driverfuncs.c
@@ -28,6 +28,7 @@
#include "buffers.h"
#include "context.h"
#include "framebuffer.h"
+#include "mipmap.h"
#include "program.h"
#include "prog_execute.h"
#include "queryobj.h"
@@ -99,6 +100,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver)
driver->CopyTexSubImage1D = _swrast_copy_texsubimage1d;
driver->CopyTexSubImage2D = _swrast_copy_texsubimage2d;
driver->CopyTexSubImage3D = _swrast_copy_texsubimage3d;
+ driver->GenerateMipmap = _mesa_generate_mipmap;
driver->TestProxyTexImage = _mesa_test_proxy_teximage;
driver->CompressedTexImage1D = _mesa_store_compressed_teximage1d;
driver->CompressedTexImage2D = _mesa_store_compressed_teximage2d;
diff --git a/src/mesa/drivers/dri/SConscript b/src/mesa/drivers/dri/SConscript
new file mode 100644
index 0000000000..d32bd08669
--- /dev/null
+++ b/src/mesa/drivers/dri/SConscript
@@ -0,0 +1,48 @@
+Import('*')
+
+drienv = env.Clone()
+
+drienv.Replace(CPPPATH = [
+ '#src/mesa/drivers/dri/common',
+ '#include',
+ '#include/GL/internal',
+ '#src/mesa',
+ '#src/mesa/main',
+ '#src/mesa/glapi',
+ '#src/mesa/math',
+ '#src/mesa/transform',
+ '#src/mesa/shader',
+ '#src/mesa/swrast',
+ '#src/mesa/swrast_setup',
+ '#src/egl/main',
+ '#src/egl/drivers/dri',
+])
+
+drienv.ParseConfig('pkg-config --cflags --libs libdrm')
+
+COMMON_GALLIUM_SOURCES = [
+ '../common/utils.c',
+ '../common/vblank.c',
+ '../common/dri_util.c',
+ '../common/xmlconfig.c',
+]
+
+COMMON_BM_SOURCES = [
+ '../common/dri_bufmgr.c',
+ '../common/dri_drmpool.c',
+]
+
+Export([
+ 'drienv',
+ 'COMMON_GALLIUM_SOURCES',
+ 'COMMON_BM_SOURCES',
+])
+
+# TODO: Installation
+#install: $(LIBNAME)
+# $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR)
+# $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR)
+
+SConscript([
+ 'intel_winsys/SConscript',
+])
diff --git a/src/mesa/drivers/dri/intel_winsys/SConscript b/src/mesa/drivers/dri/intel_winsys/SConscript
new file mode 100644
index 0000000000..a7cc10450e
--- /dev/null
+++ b/src/mesa/drivers/dri/intel_winsys/SConscript
@@ -0,0 +1,41 @@
+Import('*')
+
+env = drienv.Clone()
+
+env.Append(CPPPATH = [
+ '../intel',
+ 'server'
+])
+
+#MINIGLX_SOURCES = server/intel_dri.c
+
+pipe_drivers = [
+ softpipe,
+ i915simple
+]
+
+DRIVER_SOURCES = [
+ 'intel_winsys_pipe.c',
+ 'intel_winsys_softpipe.c',
+ 'intel_winsys_i915.c',
+ 'intel_batchbuffer.c',
+ 'intel_swapbuffers.c',
+ 'intel_context.c',
+ 'intel_lock.c',
+ 'intel_screen.c',
+ 'intel_batchpool.c',
+]
+
+sources = \
+ COMMON_GALLIUM_SOURCES + \
+ COMMON_BM_SOURCES + \
+ DRIVER_SOURCES
+
+# DRIVER_DEFINES = -I../intel $(shell pkg-config libdrm --atleast-version=2.3.1 \
+# && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP")
+
+env.SharedLibrary(
+ target ='i915tex_dri.so',
+ source = sources,
+ LIBS = pipe_drivers + env['LIBS'],
+) \ No newline at end of file
diff --git a/src/mesa/drivers/dri/intel_winsys/intel_winsys_pipe.c b/src/mesa/drivers/dri/intel_winsys/intel_winsys_pipe.c
index 910c0d2cc5..789a386500 100644
--- a/src/mesa/drivers/dri/intel_winsys/intel_winsys_pipe.c
+++ b/src/mesa/drivers/dri/intel_winsys/intel_winsys_pipe.c
@@ -224,11 +224,6 @@ intel_i915_surface_alloc_storage(struct pipe_winsys *winsys,
if(!surf->buffer)
return -1;
- if(ret) {
- pipe_buffer_reference(winsys, &surf->buffer, NULL);
- return ret;
- }
-
return 0;
}
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 3bec3bd433..37ef2a865b 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -333,6 +333,12 @@ struct dd_function_table {
GLsizei width, GLsizei height );
/**
+ * Called by glGenerateMipmap() or when GL_GENERATE_MIPMAP_SGIS is enabled.
+ */
+ void (*GenerateMipmap)(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *texObj);
+
+ /**
* Called by glTexImage[123]D when user specifies a proxy texture
* target.
*
diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c
index 963e35d678..6a8cba4d8a 100644
--- a/src/mesa/main/fbobject.c
+++ b/src/mesa/main/fbobject.c
@@ -1560,7 +1560,7 @@ _mesa_GenerateMipmapEXT(GLenum target)
/* XXX this might not handle cube maps correctly */
_mesa_lock_texture(ctx, texObj);
- _mesa_generate_mipmap(ctx, target, texUnit, texObj);
+ ctx->Driver.GenerateMipmap(ctx, target, texObj);
_mesa_unlock_texture(ctx, texObj);
}
diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c
index 9f3db22b75..1e61829e8f 100644
--- a/src/mesa/main/mipmap.c
+++ b/src/mesa/main/mipmap.c
@@ -933,7 +933,6 @@ make_2d_stack_mipmap(const struct gl_texture_format *format, GLint border,
*/
void
_mesa_generate_mipmap(GLcontext *ctx, GLenum target,
- const struct gl_texture_unit *texUnit,
struct gl_texture_object *texObj)
{
const struct gl_texture_image *srcImage;
diff --git a/src/mesa/main/mipmap.h b/src/mesa/main/mipmap.h
index df78603283..46e16902c8 100644
--- a/src/mesa/main/mipmap.h
+++ b/src/mesa/main/mipmap.h
@@ -30,7 +30,6 @@
extern void
_mesa_generate_mipmap(GLcontext *ctx, GLenum target,
- const struct gl_texture_unit *texUnit,
struct gl_texture_object *texObj);
diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 30be65525e..a6a18910fc 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -2917,9 +2917,7 @@ _mesa_store_teximage1d(GLcontext *ctx, GLenum target, GLint level,
/* GL_SGIS_generate_mipmap */
if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- _mesa_generate_mipmap(ctx, target,
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
- texObj);
+ ctx->Driver.GenerateMipmap(ctx, target, texObj);
}
_mesa_unmap_teximage_pbo(ctx, packing);
@@ -3003,9 +3001,7 @@ _mesa_store_teximage2d(GLcontext *ctx, GLenum target, GLint level,
/* GL_SGIS_generate_mipmap */
if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- _mesa_generate_mipmap(ctx, target,
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
- texObj);
+ ctx->Driver.GenerateMipmap(ctx, target, texObj);
}
_mesa_unmap_teximage_pbo(ctx, packing);
@@ -3079,9 +3075,7 @@ _mesa_store_teximage3d(GLcontext *ctx, GLenum target, GLint level,
/* GL_SGIS_generate_mipmap */
if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- _mesa_generate_mipmap(ctx, target,
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
- texObj);
+ ctx->Driver.GenerateMipmap(ctx, target, texObj);
}
_mesa_unmap_teximage_pbo(ctx, packing);
@@ -3127,9 +3121,7 @@ _mesa_store_texsubimage1d(GLcontext *ctx, GLenum target, GLint level,
/* GL_SGIS_generate_mipmap */
if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- _mesa_generate_mipmap(ctx, target,
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
- texObj);
+ ctx->Driver.GenerateMipmap(ctx, target, texObj);
}
_mesa_unmap_teximage_pbo(ctx, packing);
@@ -3182,9 +3174,7 @@ _mesa_store_texsubimage2d(GLcontext *ctx, GLenum target, GLint level,
/* GL_SGIS_generate_mipmap */
if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- _mesa_generate_mipmap(ctx, target,
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
- texObj);
+ ctx->Driver.GenerateMipmap(ctx, target, texObj);
}
_mesa_unmap_teximage_pbo(ctx, packing);
@@ -3237,9 +3227,7 @@ _mesa_store_texsubimage3d(GLcontext *ctx, GLenum target, GLint level,
/* GL_SGIS_generate_mipmap */
if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- _mesa_generate_mipmap(ctx, target,
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
- texObj);
+ ctx->Driver.GenerateMipmap(ctx, target, texObj);
}
_mesa_unmap_teximage_pbo(ctx, packing);
@@ -3313,9 +3301,7 @@ _mesa_store_compressed_teximage2d(GLcontext *ctx, GLenum target, GLint level,
/* GL_SGIS_generate_mipmap */
if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- _mesa_generate_mipmap(ctx, target,
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
- texObj);
+ ctx->Driver.GenerateMipmap(ctx, target, texObj);
}
_mesa_unmap_teximage_pbo(ctx, &ctx->Unpack);
@@ -3425,9 +3411,7 @@ _mesa_store_compressed_texsubimage2d(GLcontext *ctx, GLenum target,
/* GL_SGIS_generate_mipmap */
if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- _mesa_generate_mipmap(ctx, target,
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
- texObj);
+ ctx->Driver.GenerateMipmap(ctx, target, texObj);
}
_mesa_unmap_teximage_pbo(ctx, &ctx->Unpack);
diff --git a/src/mesa/pipe/README.portability b/src/mesa/pipe/README.portability
new file mode 100644
index 0000000000..c70ca774da
--- /dev/null
+++ b/src/mesa/pipe/README.portability
@@ -0,0 +1,43 @@
+ CROSS-PLATFORM PORTABILITY GUIDELINES FOR GALLIUM3D
+
+
+= General Considerations =
+
+The state tracker and winsys driver support a rather limited number of
+platforms. However, the pipe drivers are meant to run in a wide number of
+platforms. Hence the pipe drivers, the auxiliary modules, and all public
+headers in general, should stricly follow these guidelines to ensure
+
+
+= Compiler Support =
+
+* Include the p_compiler.h.
+
+* Don't use the 'inline' keyword, use the INLINE macro in p_compiler.h instead.
+
+* Cast explicitly when converting to integer types of smaller sizes.
+
+* Cast explicitly when converting between float, double and integral types.
+
+* Don't use named struct initializers.
+
+* Don't use variable number of macro arguments. Use static inline functions
+instead.
+
+
+= Standard Library =
+
+* Avoid including standard library headers. Most standard library functions are
+not available in Windows Kernel Mode. Use the appropriate p_*.h include.
+
+== Memory Allocation ==
+
+* Use MALLOC, CALLOC, FREE instead of the malloc, calloc, free functions.
+
+* Use align_pointer() function defined in p_util.h for aligning pointers in a
+portable way.
+
+== Debugging ==
+
+TODO
+
diff --git a/src/mesa/pipe/SConscript b/src/mesa/pipe/SConscript
new file mode 100644
index 0000000000..d9c20e0100
--- /dev/null
+++ b/src/mesa/pipe/SConscript
@@ -0,0 +1,9 @@
+Import('*')
+
+#env = env.Clone()
+
+SConscript([
+ 'softpipe/SConscript',
+ 'i915simple/SConscript',
+ 'i965simple/SConscript',
+])
diff --git a/src/mesa/pipe/cell/common.h b/src/mesa/pipe/cell/common.h
index 0b63ed39be..4de514c358 100644
--- a/src/mesa/pipe/cell/common.h
+++ b/src/mesa/pipe/cell/common.h
@@ -51,16 +51,21 @@
/** for sanity checking */
#define ASSERT_ALIGN16(ptr) \
- assert((((unsigned long) (ptr)) & 0xf) == 0);
+ ASSERT((((unsigned long) (ptr)) & 0xf) == 0);
/** round up value to next multiple of 4 */
#define ROUNDUP4(k) (((k) + 0x3) & ~0x3)
+/** round up value to next multiple of 8 */
+#define ROUNDUP8(k) (((k) + 0x7) & ~0x7)
+
/** round up value to next multiple of 16 */
#define ROUNDUP16(k) (((k) + 0xf) & ~0xf)
+#define CELL_MAX_SPUS 6
+
#define TILE_SIZE 32
@@ -68,21 +73,27 @@
* The low byte of a mailbox word contains the command opcode.
* Remaining higher bytes are command specific.
*/
-#define CELL_CMD_OPCODE_MASK 0xf
+#define CELL_CMD_OPCODE_MASK 0xff
#define CELL_CMD_EXIT 1
#define CELL_CMD_CLEAR_SURFACE 2
#define CELL_CMD_FINISH 3
#define CELL_CMD_RENDER 4
#define CELL_CMD_BATCH 5
+#define CELL_CMD_RELEASE_VERTS 6
#define CELL_CMD_STATE_FRAMEBUFFER 10
#define CELL_CMD_STATE_DEPTH_STENCIL 11
#define CELL_CMD_STATE_SAMPLER 12
-#define CELL_CMD_STATE_VERTEX_INFO 13
+#define CELL_CMD_STATE_TEXTURE 13
+#define CELL_CMD_STATE_VERTEX_INFO 14
+#define CELL_CMD_STATE_VIEWPORT 15
+#define CELL_CMD_STATE_VS_ARRAY_INFO 16
+#define CELL_CMD_STATE_BLEND 17
+#define CELL_CMD_VS_EXECUTE 18
-#define CELL_NUM_BATCH_BUFFERS 3
-#define CELL_BATCH_BUFFER_SIZE 1024 /**< 16KB would be the max */
+#define CELL_NUM_BUFFERS 4
+#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */
#define CELL_BUFFER_STATUS_FREE 10
#define CELL_BUFFER_STATUS_USED 20
@@ -94,11 +105,11 @@
*/
struct cell_command_framebuffer
{
- uint opcode;
+ uint64_t opcode; /**< CELL_CMD_FRAMEBUFFER */
int width, height;
void *color_start, *depth_start;
enum pipe_format color_format, depth_format;
-} ALIGN16_ATTRIB;
+};
/**
@@ -106,38 +117,90 @@ struct cell_command_framebuffer
*/
struct cell_command_clear_surface
{
- uint opcode;
+ uint64_t opcode; /**< CELL_CMD_CLEAR_SURFACE */
uint surface; /**< Temporary: 0=color, 1=Z */
uint value;
+};
+
+
+/**
+ * Array info used by the vertex shader's vertex puller.
+ */
+struct cell_array_info
+{
+ uint64_t base; /**< Base address of the 0th element. */
+ uint attr; /**< Attribute that this state is for. */
+ uint pitch; /**< Byte pitch from one entry to the next. */
+ uint format; /**< Pipe format of each entry. */
+} ALIGN16_ATTRIB;
+
+
+struct cell_shader_info
+{
+ unsigned num_outputs;
+
+ uint64_t declarations;
+ unsigned num_declarations;
+ uint64_t instructions;
+ unsigned num_instructions;
+ uint64_t uniforms;
+ uint64_t immediates;
+ unsigned num_immediates;
} ALIGN16_ATTRIB;
-#define CELL_MAX_VBUF_SIZE (16 * 1024)
-#define CELL_MAX_VBUF_INDEXES 1024
+#define SPU_VERTS_PER_BATCH 64
+struct cell_command_vs
+{
+ uint64_t opcode; /**< CELL_CMD_VS_EXECUTE */
+ struct cell_shader_info shader;
+ unsigned num_elts;
+ unsigned elts[SPU_VERTS_PER_BATCH];
+ uint64_t vOut[SPU_VERTS_PER_BATCH];
+ float plane[12][4];
+ unsigned nr_planes;
+ unsigned nr_attrs;
+} ALIGN16_ATTRIB;
struct cell_command_render
{
- uint opcode; /**< CELL_CMD_RENDER */
+ uint64_t opcode; /**< CELL_CMD_RENDER */
uint prim_type; /**< PIPE_PRIM_x */
uint num_verts;
uint vertex_size; /**< bytes per vertex */
- uint dummy; /* XXX this dummy field works around a compiler bug */
uint num_indexes;
- const void *vertex_data;
- const ushort *index_data;
- float xmin, ymin, xmax, ymax;
- boolean inline_indexes;
+ uint vertex_buf; /**< which cell->buffer[] contains the vertex data */
+ float xmin, ymin, xmax, ymax; /* XXX another dummy field */
+ uint min_index;
boolean inline_verts;
-} ALIGN16_ATTRIB;
+};
+
+
+struct cell_command_release_verts
+{
+ uint64_t opcode; /**< CELL_CMD_RELEASE_VERTS */
+ uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */
+};
+
+
+struct cell_command_texture
+{
+ void *start; /**< Address in main memory */
+ uint width, height;
+};
/** XXX unions don't seem to work */
+/* XXX this should go away; all commands should be placed in batch buffers */
struct cell_command
{
+#if 0
struct cell_command_framebuffer fb;
struct cell_command_clear_surface clear;
struct cell_command_render render;
+#endif
+ struct cell_command_vs vs;
} ALIGN16_ATTRIB;
@@ -147,7 +210,9 @@ struct cell_init_info
unsigned id;
unsigned num_spus;
struct cell_command *cmd;
- ubyte *batch_buffers[CELL_NUM_BATCH_BUFFERS];
+
+ /** Buffers for command batches, vertex/index data */
+ ubyte *buffers[CELL_NUM_BUFFERS];
uint *buffer_status; /**< points at cell_context->buffer_status */
} ALIGN16_ATTRIB;
diff --git a/src/mesa/pipe/cell/ppu/Makefile b/src/mesa/pipe/cell/ppu/Makefile
index e7f2562da7..50060f5cd3 100644
--- a/src/mesa/pipe/cell/ppu/Makefile
+++ b/src/mesa/pipe/cell/ppu/Makefile
@@ -34,6 +34,7 @@ SOURCES = \
cell_surface.c \
cell_texture.c \
cell_vbuf.c \
+ cell_vertex_shader.c \
cell_winsys.c
diff --git a/src/mesa/pipe/cell/ppu/cell_batch.c b/src/mesa/pipe/cell/ppu/cell_batch.c
index c894ef8608..f45e5f25b6 100644
--- a/src/mesa/pipe/cell/ppu/cell_batch.c
+++ b/src/mesa/pipe/cell/ppu/cell_batch.c
@@ -31,12 +31,55 @@
#include "cell_spu.h"
+
+uint
+cell_get_empty_buffer(struct cell_context *cell)
+{
+ uint buf = 0, tries = 0;
+
+ /* Find a buffer that's marked as free by all SPUs */
+ while (1) {
+ uint spu, num_free = 0;
+
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) {
+ num_free++;
+
+ if (num_free == cell->num_spus) {
+ /* found a free buffer, now mark status as used */
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED;
+ }
+ /*
+ printf("PPU: ALLOC BUFFER %u\n", buf);
+ */
+ return buf;
+ }
+ }
+ else {
+ break;
+ }
+ }
+
+ /* try next buf */
+ buf = (buf + 1) % CELL_NUM_BUFFERS;
+
+ tries++;
+ if (tries == 100) {
+ /*
+ printf("PPU WAITING for buffer...\n");
+ */
+ }
+ }
+}
+
+
void
cell_batch_flush(struct cell_context *cell)
{
static boolean flushing = FALSE;
uint batch = cell->cur_batch;
- const uint size = cell->batch_buffer_size[batch];
+ const uint size = cell->buffer_size[batch];
uint spu, cmd_word;
assert(!flushing);
@@ -46,7 +89,7 @@ cell_batch_flush(struct cell_context *cell)
flushing = TRUE;
- assert(batch < CELL_NUM_BATCH_BUFFERS);
+ assert(batch < CELL_NUM_BUFFERS);
/*
printf("cell_batch_dispatch: buf %u at %p, size %u\n",
@@ -68,28 +111,9 @@ cell_batch_flush(struct cell_context *cell)
* array indicating that the PPU can re-use the buffer.
*/
+ batch = cell_get_empty_buffer(cell);
- /* Find a buffer that's marked as free by all SPUs */
- while (1) {
- uint num_free = 0;
-
- batch = (batch + 1) % CELL_NUM_BATCH_BUFFERS;
-
- for (spu = 0; spu < cell->num_spus; spu++) {
- if (cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_FREE)
- num_free++;
- }
-
- if (num_free == cell->num_spus) {
- /* found a free buffer, now mark status as used */
- for (spu = 0; spu < cell->num_spus; spu++) {
- cell->buffer_status[spu][batch][0] = CELL_BUFFER_STATUS_USED;
- }
- break;
- }
- }
-
- cell->batch_buffer_size[batch] = 0; /* empty */
+ cell->buffer_size[batch] = 0; /* empty */
cell->cur_batch = batch;
flushing = FALSE;
@@ -99,61 +123,95 @@ cell_batch_flush(struct cell_context *cell)
uint
cell_batch_free_space(const struct cell_context *cell)
{
- uint free = CELL_BATCH_BUFFER_SIZE
- - cell->batch_buffer_size[cell->cur_batch];
+ uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch];
return free;
}
/**
- * \param cmd command to append
- * \param length command size in bytes
+ * Append data to current batch.
*/
void
-cell_batch_append(struct cell_context *cell, const void *cmd, uint length)
+cell_batch_append(struct cell_context *cell, const void *data, uint bytes)
{
uint size;
- assert(length % 4 == 0);
- assert(cell->cur_batch >= 0);
+ ASSERT(bytes % 8 == 0);
+ ASSERT(bytes <= CELL_BUFFER_SIZE);
+ ASSERT(cell->cur_batch >= 0);
+
+#ifdef ASSERT
+ {
+ uint spu;
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ ASSERT(cell->buffer_status[spu][cell->cur_batch][0]
+ == CELL_BUFFER_STATUS_USED);
+ }
+ }
+#endif
- size = cell->batch_buffer_size[cell->cur_batch];
+ size = cell->buffer_size[cell->cur_batch];
- if (size + length > CELL_BATCH_BUFFER_SIZE) {
+ if (size + bytes > CELL_BUFFER_SIZE) {
cell_batch_flush(cell);
size = 0;
}
- assert(size + length <= CELL_BATCH_BUFFER_SIZE);
+ ASSERT(size + bytes <= CELL_BUFFER_SIZE);
- memcpy(cell->batch_buffer[cell->cur_batch] + size, cmd, length);
+ memcpy(cell->buffer[cell->cur_batch] + size, data, bytes);
- cell->batch_buffer_size[cell->cur_batch] = size + length;
+ cell->buffer_size[cell->cur_batch] = size + bytes;
}
void *
cell_batch_alloc(struct cell_context *cell, uint bytes)
{
+ return cell_batch_alloc_aligned(cell, bytes, 1);
+}
+
+
+void *
+cell_batch_alloc_aligned(struct cell_context *cell, uint bytes,
+ uint alignment)
+{
void *pos;
- uint size;
+ uint size, padbytes;
- ASSERT(bytes % 4 == 0);
+ ASSERT(bytes % 8 == 0);
+ ASSERT(bytes <= CELL_BUFFER_SIZE);
+ ASSERT(alignment > 0);
+ ASSERT(cell->cur_batch >= 0);
- assert(cell->cur_batch >= 0);
+#ifdef ASSERT
+ {
+ uint spu;
+ for (spu = 0; spu < cell->num_spus; spu++) {
+ ASSERT(cell->buffer_status[spu][cell->cur_batch][0]
+ == CELL_BUFFER_STATUS_USED);
+ }
+ }
+#endif
- size = cell->batch_buffer_size[cell->cur_batch];
+ size = cell->buffer_size[cell->cur_batch];
- if (size + bytes > CELL_BATCH_BUFFER_SIZE) {
+ padbytes = (alignment - (size % alignment)) % alignment;
+
+ if (padbytes + size + bytes > CELL_BUFFER_SIZE) {
cell_batch_flush(cell);
size = 0;
}
+ else {
+ size += padbytes;
+ }
- assert(size + bytes <= CELL_BATCH_BUFFER_SIZE);
+ ASSERT(size % alignment == 0);
+ ASSERT(size + bytes <= CELL_BUFFER_SIZE);
- pos = (void *) (cell->batch_buffer[cell->cur_batch] + size);
+ pos = (void *) (cell->buffer[cell->cur_batch] + size);
- cell->batch_buffer_size[cell->cur_batch] = size + bytes;
+ cell->buffer_size[cell->cur_batch] = size + bytes;
return pos;
}
diff --git a/src/mesa/pipe/cell/ppu/cell_batch.h b/src/mesa/pipe/cell/ppu/cell_batch.h
index c4ba7feb3d..a6eee0a8b1 100644
--- a/src/mesa/pipe/cell/ppu/cell_batch.h
+++ b/src/mesa/pipe/cell/ppu/cell_batch.h
@@ -35,6 +35,9 @@
struct cell_context;
+extern uint
+cell_get_empty_buffer(struct cell_context *cell);
+
extern void
cell_batch_flush(struct cell_context *cell);
@@ -42,10 +45,14 @@ extern uint
cell_batch_free_space(const struct cell_context *cell);
extern void
-cell_batch_append(struct cell_context *cell, const void *cmd, uint length);
+cell_batch_append(struct cell_context *cell, const void *data, uint bytes);
extern void *
cell_batch_alloc(struct cell_context *cell, uint bytes);
+extern void *
+cell_batch_alloc_aligned(struct cell_context *cell, uint bytes,
+ uint alignment);
+
#endif /* CELL_BATCH_H */
diff --git a/src/mesa/pipe/cell/ppu/cell_clear.c b/src/mesa/pipe/cell/ppu/cell_clear.c
index e01640b994..07b908eec5 100644
--- a/src/mesa/pipe/cell/ppu/cell_clear.c
+++ b/src/mesa/pipe/cell/ppu/cell_clear.c
@@ -48,9 +48,12 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
unsigned clearValue)
{
struct cell_context *cell = cell_context(pipe);
- /*uint i;*/
uint surfIndex;
+ if (cell->dirty)
+ cell_update_derived(cell);
+
+
if (!cell->cbuf_map[0])
cell->cbuf_map[0] = pipe_surface_map(ps);
@@ -61,29 +64,7 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
surfIndex = 0;
}
-#if 0
- for (i = 0; i < cell->num_spus; i++) {
-#if 1
- uint clr = clearValue;
- if (surfIndex == 0) {
- /* XXX debug: clear color varied per-SPU to visualize tiles */
- if ((clr & 0xff) == 0)
- clr |= 64 + i * 8;
- if ((clr & 0xff00) == 0)
- clr |= (64 + i * 8) << 8;
- if ((clr & 0xff0000) == 0)
- clr |= (64 + i * 8) << 16;
- if ((clr & 0xff000000) == 0)
- clr |= (64 + i * 8) << 24;
- }
- cell_global.command[i].clear.value = clr;
-#else
- cell_global.command[i].clear.value = clearValue;
-#endif
- cell_global.command[i].clear.surface = surfIndex;
- send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_CLEAR_SURFACE);
- }
-#else
+
{
struct cell_command_clear_surface *clr
= (struct cell_command_clear_surface *)
@@ -92,9 +73,4 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps,
clr->surface = surfIndex;
clr->value = clearValue;
}
-#endif
-
- /* XXX temporary */
- cell_flush(&cell->pipe, 0x0);
-
}
diff --git a/src/mesa/pipe/cell/ppu/cell_context.c b/src/mesa/pipe/cell/ppu/cell_context.c
index 8cb0c48f40..bbe1fd7a11 100644
--- a/src/mesa/pipe/cell/ppu/cell_context.c
+++ b/src/mesa/pipe/cell/ppu/cell_context.c
@@ -39,6 +39,7 @@
#include "pipe/p_winsys.h"
#include "pipe/cell/common.h"
#include "pipe/draw/draw_context.h"
+#include "pipe/draw/draw_private.h"
#include "cell_clear.h"
#include "cell_context.h"
#include "cell_draw_arrays.h"
@@ -156,6 +157,19 @@ cell_destroy_context( struct pipe_context *pipe )
}
+static struct draw_context *
+cell_draw_create(struct cell_context *cell)
+{
+ struct draw_context *draw = draw_create();
+
+ if (getenv("GALLIUM_CELL_VS")) {
+ /* plug in SPU-based vertex transformation code */
+ draw->shader_queue_flush = cell_vertex_shader_queue_flush;
+ draw->driver_private = cell;
+ }
+
+ return draw;
+}
struct pipe_context *
@@ -242,7 +256,7 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws)
cell_init_surface_functions(cell);
- cell->draw = draw_create();
+ cell->draw = cell_draw_create(cell);
cell_init_vbuf(cell);
draw_set_rasterize_stage(cell->draw, cell->vbuf);
@@ -254,8 +268,9 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws)
cell_start_spus(cell);
- for (buf = 0; buf < CELL_NUM_BATCH_BUFFERS; buf++) {
- cell->batch_buffer_size[buf] = 0;
+ /* init command, vertex/index buffer info */
+ for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) {
+ cell->buffer_size[buf] = 0;
/* init batch buffer status values,
* mark 0th buffer as used, rest as free.
diff --git a/src/mesa/pipe/cell/ppu/cell_context.h b/src/mesa/pipe/cell/ppu/cell_context.h
index 3bd88bfd5b..3b63419b5e 100644
--- a/src/mesa/pipe/cell/ppu/cell_context.h
+++ b/src/mesa/pipe/cell/ppu/cell_context.h
@@ -38,9 +38,6 @@
#include "pipe/cell/common.h"
-#define CELL_MAX_SPUS 6
-
-
struct cell_vbuf_render;
struct cell_vertex_shader_state
@@ -76,7 +73,7 @@ struct cell_context
struct pipe_framebuffer_state framebuffer;
struct pipe_poly_stipple poly_stipple;
struct pipe_scissor_state scissor;
- struct pipe_texture *texture[PIPE_MAX_SAMPLERS];
+ struct cell_texture *texture[PIPE_MAX_SAMPLERS];
struct pipe_viewport_state viewport;
struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX];
struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX];
@@ -84,6 +81,9 @@ struct cell_context
ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS];
ubyte *zsbuf_map;
+ struct pipe_surface *tex_surf;
+ uint *tex_map;
+
uint dirty;
/** The primitive drawing context */
@@ -102,12 +102,14 @@ struct cell_context
uint num_spus;
- uint batch_buffer_size[CELL_NUM_BATCH_BUFFERS];
- ubyte batch_buffer[CELL_NUM_BATCH_BUFFERS][CELL_BATCH_BUFFER_SIZE] ALIGN16_ATTRIB;
- int cur_batch; /**< which batch buffer is being filled */
+ /** Buffers for command batches, vertex/index data */
+ uint buffer_size[CELL_NUM_BUFFERS];
+ ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB;
+
+ int cur_batch; /**< which buffer is being filled w/ commands */
/** [4] to ensure 16-byte alignment for each status word */
- uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BATCH_BUFFERS][4] ALIGN16_ATTRIB;
+ uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB;
};
@@ -124,6 +126,8 @@ cell_context(struct pipe_context *pipe)
extern struct pipe_context *
cell_create_context(struct pipe_winsys *ws, struct cell_winsys *cws);
+extern void
+cell_vertex_shader_queue_flush(struct draw_context *draw);
diff --git a/src/mesa/pipe/cell/ppu/cell_flush.c b/src/mesa/pipe/cell/ppu/cell_flush.c
index b98bb566b1..f62bc4650c 100644
--- a/src/mesa/pipe/cell/ppu/cell_flush.c
+++ b/src/mesa/pipe/cell/ppu/cell_flush.c
@@ -39,6 +39,9 @@ cell_flush(struct pipe_context *pipe, unsigned flags)
{
struct cell_context *cell = cell_context(pipe);
+ if (flags & PIPE_FLUSH_SWAPBUFFERS)
+ flags |= PIPE_FLUSH_WAIT;
+
draw_flush( cell->draw );
cell_flush_int(pipe, flags);
}
@@ -56,7 +59,7 @@ cell_flush_int(struct pipe_context *pipe, unsigned flags)
flushing = TRUE;
if (flags & PIPE_FLUSH_WAIT) {
- uint *cmd = (uint *) cell_batch_alloc(cell, sizeof(uint));
+ uint64_t *cmd = (uint64_t *) cell_batch_alloc(cell, sizeof(uint64_t));
*cmd = CELL_CMD_FINISH;
}
diff --git a/src/mesa/pipe/cell/ppu/cell_spu.c b/src/mesa/pipe/cell/ppu/cell_spu.c
index 4627bc8d1f..7c83a47e57 100644
--- a/src/mesa/pipe/cell/ppu/cell_spu.c
+++ b/src/mesa/pipe/cell/ppu/cell_spu.c
@@ -111,8 +111,8 @@ cell_start_spus(struct cell_context *cell)
cell_global.inits[i].id = i;
cell_global.inits[i].num_spus = cell->num_spus;
cell_global.inits[i].cmd = &cell_global.command[i];
- for (j = 0; j < CELL_NUM_BATCH_BUFFERS; j++) {
- cell_global.inits[i].batch_buffers[j] = cell->batch_buffer[j];
+ for (j = 0; j < CELL_NUM_BUFFERS; j++) {
+ cell_global.inits[i].buffers[j] = cell->buffer[j];
}
cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0];
diff --git a/src/mesa/pipe/cell/ppu/cell_state_blend.c b/src/mesa/pipe/cell/ppu/cell_state_blend.c
index 34ae0128ea..4fc60548c8 100644
--- a/src/mesa/pipe/cell/ppu/cell_state_blend.c
+++ b/src/mesa/pipe/cell/ppu/cell_state_blend.c
@@ -29,6 +29,7 @@
*/
#include "pipe/p_util.h"
+#include "pipe/draw/draw_context.h"
#include "cell_context.h"
#include "cell_state.h"
@@ -38,9 +39,7 @@ void *
cell_create_blend_state(struct pipe_context *pipe,
const struct pipe_blend_state *blend)
{
- struct pipe_blend_state *state = MALLOC(sizeof(struct pipe_blend_state));
- memcpy(state, blend, sizeof(struct pipe_blend_state));
- return state;
+ return mem_dup(blend, sizeof(*blend));
}
@@ -49,6 +48,8 @@ cell_bind_blend_state(struct pipe_context *pipe, void *blend)
{
struct cell_context *cell = cell_context(pipe);
+ draw_flush(cell->draw);
+
cell->blend = (const struct pipe_blend_state *)blend;
cell->dirty |= CELL_NEW_BLEND;
@@ -68,6 +69,8 @@ cell_set_blend_color(struct pipe_context *pipe,
{
struct cell_context *cell = cell_context(pipe);
+ draw_flush(cell->draw);
+
cell->blend_color = *blend_color;
cell->dirty |= CELL_NEW_BLEND;
@@ -80,10 +83,7 @@ void *
cell_create_depth_stencil_alpha_state(struct pipe_context *pipe,
const struct pipe_depth_stencil_alpha_state *depth_stencil)
{
- struct pipe_depth_stencil_alpha_state *state =
- MALLOC(sizeof(struct pipe_depth_stencil_alpha_state));
- memcpy(state, depth_stencil, sizeof(struct pipe_depth_stencil_alpha_state));
- return state;
+ return mem_dup(depth_stencil, sizeof(*depth_stencil));
}
@@ -93,6 +93,8 @@ cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe,
{
struct cell_context *cell = cell_context(pipe);
+ draw_flush(cell->draw);
+
cell->depth_stencil
= (const struct pipe_depth_stencil_alpha_state *) depth_stencil;
diff --git a/src/mesa/pipe/cell/ppu/cell_state_emit.c b/src/mesa/pipe/cell/ppu/cell_state_emit.c
index dbca900c35..5d2a786449 100644
--- a/src/mesa/pipe/cell/ppu/cell_state_emit.c
+++ b/src/mesa/pipe/cell/ppu/cell_state_emit.c
@@ -30,6 +30,18 @@
#include "cell_state.h"
#include "cell_state_emit.h"
#include "cell_batch.h"
+#include "cell_texture.h"
+
+
+static void
+emit_state_cmd(struct cell_context *cell, uint cmd,
+ const void *state, uint state_size)
+{
+ uint64_t *dst = (uint64_t *)
+ cell_batch_alloc(cell, ROUNDUP8(sizeof(uint64_t) + state_size));
+ *dst = cmd;
+ memcpy(dst + 1, state, state_size);
+}
@@ -50,23 +62,42 @@ cell_emit_state(struct cell_context *cell)
fb->height = cell->framebuffer.cbufs[0]->height;
}
+ if (cell->dirty & CELL_NEW_BLEND) {
+ emit_state_cmd(cell, CELL_CMD_STATE_BLEND,
+ cell->blend,
+ sizeof(struct pipe_blend_state));
+ }
+
if (cell->dirty & CELL_NEW_DEPTH_STENCIL) {
- uint cmd = CELL_CMD_STATE_DEPTH_STENCIL;
- cell_batch_append(cell, &cmd, 4);
- cell_batch_append(cell, cell->depth_stencil,
- sizeof(struct pipe_depth_stencil_alpha_state));
+ emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL,
+ cell->depth_stencil,
+ sizeof(struct pipe_depth_stencil_alpha_state));
}
if (cell->dirty & CELL_NEW_SAMPLER) {
- uint cmd = CELL_CMD_STATE_SAMPLER;
- cell_batch_append(cell, &cmd, 4);
- cell_batch_append(cell, cell->sampler[0],
- sizeof(struct pipe_sampler_state));
+ emit_state_cmd(cell, CELL_CMD_STATE_SAMPLER,
+ cell->sampler[0], sizeof(struct pipe_sampler_state));
+ }
+
+ if (cell->dirty & CELL_NEW_TEXTURE) {
+ struct cell_command_texture texture;
+ if (cell->texture[0]) {
+ texture.start = cell->texture[0]->tiled_data;
+ texture.width = cell->texture[0]->base.width[0];
+ texture.height = cell->texture[0]->base.height[0];
+ }
+ else {
+ texture.start = NULL;
+ texture.width = 0;
+ texture.height = 0;
+ }
+
+ emit_state_cmd(cell, CELL_CMD_STATE_TEXTURE,
+ &texture, sizeof(struct cell_command_texture));
}
if (cell->dirty & CELL_NEW_VERTEX_INFO) {
- uint cmd = CELL_CMD_STATE_VERTEX_INFO;
- cell_batch_append(cell, &cmd, 4);
- cell_batch_append(cell, &cell->vertex_info, sizeof(struct vertex_info));
+ emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO,
+ &cell->vertex_info, sizeof(struct vertex_info));
}
}
diff --git a/src/mesa/pipe/cell/ppu/cell_state_fs.c b/src/mesa/pipe/cell/ppu/cell_state_fs.c
index 81c2ac14dd..96a52273b0 100644
--- a/src/mesa/pipe/cell/ppu/cell_state_fs.c
+++ b/src/mesa/pipe/cell/ppu/cell_state_fs.c
@@ -45,7 +45,7 @@ void *
cell_create_fs_state(struct pipe_context *pipe,
const struct pipe_shader_state *templ)
{
- struct cell_context *cell = cell_context(pipe);
+ /*struct cell_context *cell = cell_context(pipe);*/
struct cell_fragment_shader_state *state;
state = CALLOC_STRUCT(cell_fragment_shader_state);
@@ -94,8 +94,6 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs)
void
cell_delete_fs_state(struct pipe_context *pipe, void *fs)
{
- struct cell_context *cell = cell_context(pipe);
-
struct cell_fragment_shader_state *state =
(struct cell_fragment_shader_state *) fs;
diff --git a/src/mesa/pipe/cell/ppu/cell_state_sampler.c b/src/mesa/pipe/cell/ppu/cell_state_sampler.c
index ae1eeb4620..ade6cc8338 100644
--- a/src/mesa/pipe/cell/ppu/cell_state_sampler.c
+++ b/src/mesa/pipe/cell/ppu/cell_state_sampler.c
@@ -30,21 +30,17 @@
*/
#include "pipe/p_util.h"
+#include "pipe/draw/draw_context.h"
#include "cell_context.h"
#include "cell_state.h"
-#if 0
#include "cell_texture.h"
-#include "cell_tile_cache.h"
-#endif
void *
cell_create_sampler_state(struct pipe_context *pipe,
const struct pipe_sampler_state *sampler)
{
- struct pipe_sampler_state *state = MALLOC( sizeof(struct pipe_sampler_state) );
- memcpy(state, sampler, sizeof(struct pipe_sampler_state));
- return state;
+ return mem_dup(sampler, sizeof(*sampler));
}
void
@@ -53,6 +49,8 @@ cell_bind_sampler_state(struct pipe_context *pipe,
{
struct cell_context *cell = cell_context(pipe);
+ draw_flush(cell->draw);
+
assert(unit < PIPE_MAX_SAMPLERS);
cell->sampler[unit] = (struct pipe_sampler_state *)sampler;
@@ -76,7 +74,11 @@ cell_set_sampler_texture(struct pipe_context *pipe,
{
struct cell_context *cell = cell_context(pipe);
+ draw_flush(cell->draw);
+
cell->texture[sampler] = texture;
+ cell_update_texture_mapping(cell);
+
cell->dirty |= CELL_NEW_TEXTURE;
}
diff --git a/src/mesa/pipe/cell/ppu/cell_texture.c b/src/mesa/pipe/cell/ppu/cell_texture.c
index 0a8190d983..df178d9ca2 100644
--- a/src/mesa/pipe/cell/ppu/cell_texture.c
+++ b/src/mesa/pipe/cell/ppu/cell_texture.c
@@ -79,31 +79,30 @@ cell_texture_layout(struct cell_texture * spt)
}
-void
-cell_texture_create(struct pipe_context *pipe, struct pipe_texture **pt)
+struct pipe_texture *
+cell_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat)
{
- struct cell_texture *spt = REALLOC(*pt, sizeof(struct pipe_texture),
- sizeof(struct cell_texture));
+ struct cell_texture *spt = CALLOC_STRUCT(cell_texture);
+ if (!spt)
+ return NULL;
- if (spt) {
- memset(&spt->base + 1, 0,
- sizeof(struct cell_texture) - sizeof(struct pipe_texture));
+ spt->base = *templat;
- cell_texture_layout(spt);
+ cell_texture_layout(spt);
- spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32,
- PIPE_BUFFER_USAGE_PIXEL,
- spt->buffer_size);
+ spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32,
+ PIPE_BUFFER_USAGE_PIXEL,
+ spt->buffer_size);
- if (!spt->buffer) {
- FREE(spt);
- spt = NULL;
- }
+ if (!spt->buffer) {
+ FREE(spt);
+ return NULL;
}
- *pt = &spt->base;
+ return &spt->base;
}
+
void
cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt)
{
@@ -163,3 +162,91 @@ cell_get_tex_surface(struct pipe_context *pipe,
}
return ps;
}
+
+
+
+static void
+tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src)
+{
+ const uint tile_size2 = tile_size * tile_size;
+ const uint h_t = h / tile_size, w_t = w / tile_size;
+
+ uint it, jt; /* tile counters */
+ uint i, j; /* intra-tile counters */
+
+ for (it = 0; it < h_t; it++) {
+ for (jt = 0; jt < w_t; jt++) {
+ /* fill in tile (i, j) */
+ uint *tdst = dst + (it * w_t + jt) * tile_size2;
+ for (i = 0; i < tile_size; i++) {
+ for (j = 0; j < tile_size; j++) {
+ const uint srci = it * tile_size + i;
+ const uint srcj = jt * tile_size + j;
+ *tdst++ = src[srci * h + srcj];
+ }
+ }
+ }
+ }
+}
+
+
+
+/**
+ * Convert linear texture image data to tiled format for SPU usage.
+ */
+static void
+cell_tile_texture(struct cell_context *cell,
+ struct cell_texture *texture)
+{
+ uint face = 0, level = 0, zslice = 0;
+ struct pipe_surface *surf;
+ const uint w = texture->base.width[0], h = texture->base.height[0];
+ const uint *src;
+
+ /* temporary restrictions: */
+ assert(w >= TILE_SIZE);
+ assert(h >= TILE_SIZE);
+ assert(w % TILE_SIZE == 0);
+ assert(h % TILE_SIZE == 0);
+
+ surf = cell_get_tex_surface(&cell->pipe, &texture->base, face, level, zslice);
+ ASSERT(surf);
+
+ src = (const uint *) pipe_surface_map(surf);
+
+ if (texture->tiled_data) {
+ align_free(texture->tiled_data);
+ }
+ texture->tiled_data = align_malloc(w * h * 4, 16);
+
+ tile_copy_data(w, h, TILE_SIZE, texture->tiled_data, src);
+
+ pipe_surface_unmap(surf);
+
+ pipe_surface_reference(&surf, NULL);
+}
+
+
+
+void
+cell_update_texture_mapping(struct cell_context *cell)
+{
+ uint face = 0, level = 0, zslice = 0;
+
+ if (cell->texture[0])
+ cell_tile_texture(cell, cell->texture[0]);
+#if 0
+ if (cell->tex_surf && cell->tex_map) {
+ pipe_surface_unmap(cell->tex_surf);
+ cell->tex_map = NULL;
+ }
+
+ /* XXX free old surface */
+
+ cell->tex_surf = cell_get_tex_surface(&cell->pipe,
+ &cell->texture[0]->base,
+ face, level, zslice);
+
+ cell->tex_map = pipe_surface_map(cell->tex_surf);
+#endif
+}
diff --git a/src/mesa/pipe/cell/ppu/cell_texture.h b/src/mesa/pipe/cell/ppu/cell_texture.h
index ef5808c086..0264fed88e 100644
--- a/src/mesa/pipe/cell/ppu/cell_texture.h
+++ b/src/mesa/pipe/cell/ppu/cell_texture.h
@@ -46,6 +46,8 @@ struct cell_texture
*/
struct pipe_buffer *buffer;
unsigned long buffer_size;
+
+ void *tiled_data; /* XXX this may be temporary */ /*ALIGN16*/
};
@@ -58,8 +60,9 @@ cell_texture(struct pipe_texture *pt)
-extern void
-cell_texture_create(struct pipe_context *pipe, struct pipe_texture **pt);
+extern struct pipe_texture *
+cell_texture_create(struct pipe_context *pipe,
+ const struct pipe_texture *templat);
extern void
cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt);
@@ -70,4 +73,8 @@ cell_get_tex_surface(struct pipe_context *pipe,
unsigned face, unsigned level, unsigned zslice);
+extern void
+cell_update_texture_mapping(struct cell_context *cell);
+
+
#endif /* CELL_TEXTURE */
diff --git a/src/mesa/pipe/cell/ppu/cell_vbuf.c b/src/mesa/pipe/cell/ppu/cell_vbuf.c
index ee572b3a51..e9fafe492e 100644
--- a/src/mesa/pipe/cell/ppu/cell_vbuf.c
+++ b/src/mesa/pipe/cell/ppu/cell_vbuf.c
@@ -39,8 +39,7 @@
#include "pipe/draw/draw_vbuf.h"
-/** Allow prim indexes, verts to be inlined after RENDER command */
-#define ALLOW_INLINE_INDEXES 1
+/** Allow vertex data to be inlined after RENDER command */
#define ALLOW_INLINE_VERTS 1
@@ -52,9 +51,10 @@ struct cell_vbuf_render
{
struct vbuf_render base;
struct cell_context *cell;
- uint prim;
- uint vertex_size;
- void *vertex_buffer;
+ uint prim; /**< PIPE_PRIM_x */
+ uint vertex_size; /**< in bytes */
+ void *vertex_buffer; /**< just for debug, really */
+ uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */
};
@@ -81,14 +81,46 @@ cell_vbuf_allocate_vertices(struct vbuf_render *vbr,
{
struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
/*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/
- assert(!cvbr->vertex_buffer);
- cvbr->vertex_buffer = align_malloc(vertex_size * nr_vertices, 16);
+
+ assert(cvbr->vertex_buf == ~0);
+ cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell);
+ cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf];
cvbr->vertex_size = vertex_size;
return cvbr->vertex_buffer;
}
static void
+cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
+ unsigned vertex_size, unsigned vertices_used)
+{
+ struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
+ struct cell_context *cell = cvbr->cell;
+
+ /*
+ printf("%s vertex_buf = %u count = %u\n",
+ __FUNCTION__, cvbr->vertex_buf, vertices_used);
+ */
+
+ /* Tell SPUs they can release the vert buf */
+ if (cvbr->vertex_buf != ~0U) {
+ struct cell_command_release_verts *release
+ = (struct cell_command_release_verts *)
+ cell_batch_alloc(cell, sizeof(struct cell_command_release_verts));
+ release->opcode = CELL_CMD_RELEASE_VERTS;
+ release->vertex_buf = cvbr->vertex_buf;
+ }
+
+ cvbr->vertex_buf = ~0;
+ cell_flush_int(&cell->pipe, 0x0);
+
+ assert(vertices == cvbr->vertex_buffer);
+ cvbr->vertex_buffer = NULL;
+}
+
+
+
+static void
cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim)
{
struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
@@ -106,17 +138,24 @@ cell_vbuf_draw(struct vbuf_render *vbr,
struct cell_context *cell = cvbr->cell;
float xmin, ymin, xmax, ymax;
uint i;
- uint nr_vertices = 0;
+ uint nr_vertices = 0, min_index = ~0;
const void *vertices = cvbr->vertex_buffer;
const uint vertex_size = cvbr->vertex_size;
for (i = 0; i < nr_indices; i++) {
if (indices[i] > nr_vertices)
nr_vertices = indices[i];
+ if (indices[i] < min_index)
+ min_index = indices[i];
}
nr_vertices++;
#if 0
+ /*if (min_index > 0)*/
+ printf("%s min_index = %u\n", __FUNCTION__, min_index);
+#endif
+
+#if 0
printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n",
nr_indices, nr_vertices);
printf(" ");
@@ -137,7 +176,7 @@ cell_vbuf_draw(struct vbuf_render *vbr,
/* compute x/y bounding box */
xmin = ymin = 1e50;
xmax = ymax = -1e50;
- for (i = 0; i < nr_vertices; i++) {
+ for (i = min_index; i < nr_vertices; i++) {
const float *v = (float *) ((ubyte *) vertices + i * vertex_size);
if (v[0] < xmin)
xmin = v[0];
@@ -148,83 +187,68 @@ cell_vbuf_draw(struct vbuf_render *vbr,
if (v[1] > ymax)
ymax = v[1];
}
+#if 0
+ printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax);
+ fflush(stdout);
+#endif
if (cvbr->prim != PIPE_PRIM_TRIANGLES)
return; /* only render tris for now */
/* build/insert batch RENDER command */
{
- const uint index_bytes = ROUNDUP4(nr_indices * 2);
+ const uint index_bytes = ROUNDUP8(nr_indices * 2);
const uint vertex_bytes = nr_vertices * 4 * cell->vertex_info.size;
+ const uint batch_size = sizeof(struct cell_command_render) + index_bytes;
struct cell_command_render *render
= (struct cell_command_render *)
- cell_batch_alloc(cell, sizeof(*render));
+ cell_batch_alloc(cell, batch_size);
+
render->opcode = CELL_CMD_RENDER;
render->prim_type = cvbr->prim;
render->num_indexes = nr_indices;
- if (ALLOW_INLINE_INDEXES &&
- index_bytes <= cell_batch_free_space(cell)) {
- /* indices inlined, right after render cmd */
- void *dst = cell_batch_alloc(cell, index_bytes);
- memcpy(dst, indices, nr_indices * 2);
- render->inline_indexes = TRUE;
- render->index_data = NULL;
- }
- else {
- /* indices in separate buffer */
- render->inline_indexes = FALSE;
- render->index_data = indices;
- ASSERT_ALIGN16(render->index_data);
- }
+ render->min_index = min_index;
+
+ /* append indices after render command */
+ memcpy(render + 1, indices, nr_indices * 2);
+ /* if there's room, append vertices after the indices, else leave
+ * vertices in the original/separate buffer.
+ */
render->vertex_size = 4 * cell->vertex_info.size;
render->num_verts = nr_vertices;
if (ALLOW_INLINE_VERTS &&
- render->inline_indexes &&
- vertex_bytes <= cell_batch_free_space(cell)) {
- /* vertex data inlined, after indices */
- void *dst = cell_batch_alloc(cell, vertex_bytes);
+ min_index == 0 &&
+ vertex_bytes + 16 <= cell_batch_free_space(cell)) {
+ /* vertex data inlined, after indices, at 16-byte boundary */
+ void *dst = cell_batch_alloc_aligned(cell, vertex_bytes, 16);
memcpy(dst, vertices, vertex_bytes);
render->inline_verts = TRUE;
- render->vertex_data = NULL;
+ render->vertex_buf = ~0;
}
else {
+ /* vertex data in separate buffer */
render->inline_verts = FALSE;
- render->vertex_data = vertices;
- ASSERT_ALIGN16(render->vertex_data);
+ ASSERT(cvbr->vertex_buf >= 0);
+ render->vertex_buf = cvbr->vertex_buf;
}
-
render->xmin = xmin;
render->ymin = ymin;
render->xmax = xmax;
render->ymax = ymax;
}
-#if 01
- /* XXX this is temporary */
+#if 0
+ /* helpful for debug */
cell_flush_int(&cell->pipe, PIPE_FLUSH_WAIT);
#endif
}
static void
-cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices,
- unsigned vertex_size, unsigned vertices_used)
-{
- struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
-
- /*printf("Free verts %u * %u\n", vertex_size, vertices_used);*/
- align_free(vertices);
-
- assert(vertices == cvbr->vertex_buffer);
- cvbr->vertex_buffer = NULL;
-}
-
-
-static void
cell_vbuf_destroy(struct vbuf_render *vbr)
{
struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr);
@@ -244,8 +268,15 @@ cell_init_vbuf(struct cell_context *cell)
cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render);
- cell->vbuf_render->base.max_indices = CELL_MAX_VBUF_INDEXES;
- cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_MAX_VBUF_SIZE;
+ /* The max number of indexes is what can fix into a batch buffer,
+ * minus the render and release-verts commands.
+ */
+ cell->vbuf_render->base.max_indices
+ = (CELL_BUFFER_SIZE
+ - sizeof(struct cell_command_render)
+ - sizeof(struct cell_command_release_verts))
+ / sizeof(ushort);
+ cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE;
cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info;
cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices;
@@ -255,6 +286,9 @@ cell_init_vbuf(struct cell_context *cell)
cell->vbuf_render->base.destroy = cell_vbuf_destroy;
cell->vbuf_render->cell = cell;
+#if 1
+ cell->vbuf_render->vertex_buf = ~0;
+#endif
cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base);
}
diff --git a/src/mesa/pipe/cell/ppu/cell_vertex_shader.c b/src/mesa/pipe/cell/ppu/cell_vertex_shader.c
new file mode 100644
index 0000000000..80dd500b34
--- /dev/null
+++ b/src/mesa/pipe/cell/ppu/cell_vertex_shader.c
@@ -0,0 +1,120 @@
+/*
+ * (C) Copyright IBM Corporation 2008
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file cell_vertex_shader.c
+ * Vertex shader interface routines for Cell.
+ *
+ * \author Ian Romanick <idr@us.ibm.com>
+ */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_context.h"
+#include "pipe/p_winsys.h"
+
+#include "cell_context.h"
+#include "cell_draw_arrays.h"
+#include "cell_spu.h"
+#include "cell_batch.h"
+
+#include "pipe/cell/common.h"
+#include "pipe/draw/draw_context.h"
+#include "pipe/draw/draw_private.h"
+
+/**
+ * Run the vertex shader on all vertices in the vertex queue.
+ * Called by the draw module when the vertx cache needs to be flushed.
+ */
+void
+cell_vertex_shader_queue_flush(struct draw_context *draw)
+{
+ struct cell_context *const cell =
+ (struct cell_context *) draw->driver_private;
+ struct cell_command_vs *const vs = &cell_global.command[0].vs;
+ uint64_t *batch;
+ struct cell_array_info *array_info;
+ unsigned i, j;
+
+ assert(draw->vs.queue_nr != 0);
+
+ /* XXX: do this on statechange:
+ */
+ draw_update_vertex_fetch(draw);
+
+ for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
+ batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info));
+
+ batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO;
+
+ array_info = (struct cell_array_info *) &batch[1];
+ assert(draw->vertex_fetch.src_ptr[i] != NULL);
+ array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i];
+ array_info->attr = i;
+ array_info->pitch = draw->vertex_fetch.pitch[i];
+ array_info->format = draw->vertex_element[i].src_format;
+ }
+
+ batch = cell_batch_alloc(cell, sizeof(batch[0])
+ + sizeof(struct pipe_viewport_state));
+ batch[0] = CELL_CMD_STATE_VIEWPORT;
+ (void) memcpy(&batch[1], &draw->viewport,
+ sizeof(struct pipe_viewport_state));
+
+ cell_batch_flush(cell);
+
+ vs->opcode = CELL_CMD_VS_EXECUTE;
+ vs->shader.num_outputs = draw->num_vs_outputs;
+ vs->shader.declarations = (uintptr_t) draw->machine.Declarations;
+ vs->shader.num_declarations = draw->machine.NumDeclarations;
+ vs->shader.instructions = (uintptr_t) draw->machine.Instructions;
+ vs->shader.num_instructions = draw->machine.NumInstructions;
+ vs->shader.uniforms = (uintptr_t) draw->user.constants;
+ vs->shader.immediates = (uintptr_t) draw->machine.Imms;
+ vs->shader.num_immediates = draw->machine.ImmLimit / 4;
+ vs->nr_attrs = draw->vertex_fetch.nr_attrs;
+
+ (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane));
+ vs->nr_planes = draw->nr_planes;
+
+ for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) {
+ const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i);
+
+ for (j = 0; j < n; j++) {
+ vs->elts[j] = draw->vs.queue[i + j].elt;
+ vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].dest;
+ }
+
+ for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) {
+ vs->elts[j] = vs->elts[0];
+ vs->vOut[j] = vs->vOut[0];
+ }
+
+ vs->num_elts = n;
+ send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE);
+
+ cell_flush_int(& cell->pipe, PIPE_FLUSH_WAIT);
+ }
+
+ draw->vs.queue_nr = 0;
+}
diff --git a/src/mesa/pipe/cell/spu/Makefile b/src/mesa/pipe/cell/spu/Makefile
index 417ae1b072..f202971d73 100644
--- a/src/mesa/pipe/cell/spu/Makefile
+++ b/src/mesa/pipe/cell/spu/Makefile
@@ -17,8 +17,15 @@ PROG_SPU_EMBED_O = $(PROG)_spu-embed.o
SOURCES = \
spu_main.c \
+ spu_blend.c \
+ spu_render.c \
+ spu_texture.c \
spu_tile.c \
- spu_tri.c
+ spu_tri.c \
+ spu_exec.c \
+ spu_util.c \
+ spu_vertex_fetch.c \
+ spu_vertex_shader.c
SPU_OBJECTS = $(SOURCES:.c=.o) \
diff --git a/src/mesa/pipe/cell/spu/spu_blend.c b/src/mesa/pipe/cell/spu/spu_blend.c
new file mode 100644
index 0000000000..23ec0eeb45
--- /dev/null
+++ b/src/mesa/pipe/cell/spu/spu_blend.c
@@ -0,0 +1,62 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "spu_main.h"
+#include "spu_blend.h"
+#include "spu_colorpack.h"
+
+
+void
+blend_quad(uint itx, uint ity, vector float colors[4])
+{
+ /* simple SRC_ALPHA, ONE_MINUS_SRC_ALPHA blending */
+ vector float fbc00 = spu_unpack_color(spu.ctile.ui[ity][itx]);
+ vector float fbc01 = spu_unpack_color(spu.ctile.ui[ity][itx+1]);
+ vector float fbc10 = spu_unpack_color(spu.ctile.ui[ity+1][itx]);
+ vector float fbc11 = spu_unpack_color(spu.ctile.ui[ity+1][itx+1]);
+
+ vector float alpha00 = spu_splats(spu_extract(colors[0], 3));
+ vector float alpha01 = spu_splats(spu_extract(colors[1], 3));
+ vector float alpha10 = spu_splats(spu_extract(colors[2], 3));
+ vector float alpha11 = spu_splats(spu_extract(colors[3], 3));
+
+ vector float one_minus_alpha00 = spu_sub(spu_splats(1.0f), alpha00);
+ vector float one_minus_alpha01 = spu_sub(spu_splats(1.0f), alpha01);
+ vector float one_minus_alpha10 = spu_sub(spu_splats(1.0f), alpha10);
+ vector float one_minus_alpha11 = spu_sub(spu_splats(1.0f), alpha11);
+
+ colors[0] = spu_add(spu_mul(colors[0], alpha00),
+ spu_mul(fbc00, one_minus_alpha00));
+ colors[1] = spu_add(spu_mul(colors[1], alpha01),
+ spu_mul(fbc01, one_minus_alpha01));
+ colors[2] = spu_add(spu_mul(colors[2], alpha10),
+ spu_mul(fbc10, one_minus_alpha10));
+ colors[3] = spu_add(spu_mul(colors[3], alpha11),
+ spu_mul(fbc11, one_minus_alpha11));
+}
+
diff --git a/src/mesa/pipe/cell/spu/spu_blend.h b/src/mesa/pipe/cell/spu/spu_blend.h
new file mode 100644
index 0000000000..2b594b578b
--- /dev/null
+++ b/src/mesa/pipe/cell/spu/spu_blend.h
@@ -0,0 +1,37 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef SPU_BLEND_H
+#define SPU_BLEND_H
+
+
+extern void
+blend_quad(uint itx, uint ity, vector float colors[4]);
+
+
+#endif /* SPU_BLEND_H */
diff --git a/src/mesa/pipe/cell/spu/spu_colorpack.h b/src/mesa/pipe/cell/spu/spu_colorpack.h
new file mode 100644
index 0000000000..e9fee8a3a6
--- /dev/null
+++ b/src/mesa/pipe/cell/spu/spu_colorpack.h
@@ -0,0 +1,110 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+
+#ifndef SPU_COLORPACK_H
+#define SPU_COLORPACK_H
+
+
+#include <spu_intrinsics.h>
+
+
+static INLINE unsigned int
+spu_pack_R8G8B8A8(vector float rgba)
+{
+ vector unsigned int out = spu_convtu(rgba, 32);
+
+ out = spu_shuffle(out, out, ((vector unsigned char) {
+ 0, 4, 8, 12, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 }) );
+
+ return spu_extract(out, 0);
+}
+
+
+static INLINE unsigned int
+spu_pack_A8R8G8B8(vector float rgba)
+{
+ vector unsigned int out = spu_convtu(rgba, 32);
+ out = spu_shuffle(out, out, ((vector unsigned char) {
+ 12, 0, 4, 8, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0}) );
+ return spu_extract(out, 0);
+}
+
+
+static INLINE unsigned int
+spu_pack_B8G8R8A8(vector float rgba)
+{
+ vector unsigned int out = spu_convtu(rgba, 32);
+ out = spu_shuffle(out, out, ((vector unsigned char) {
+ 8, 4, 0, 12, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0}) );
+ return spu_extract(out, 0);
+}
+
+
+static INLINE unsigned int
+spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle)
+{
+ vector unsigned int out = spu_convtu(rgba, 32);
+ out = spu_shuffle(out, out, shuffle);
+ return spu_extract(out, 0);
+}
+
+
+static INLINE vector float
+spu_unpack_color(uint color)
+{
+ vector unsigned int color_u4 = spu_splats(color);
+ color_u4 = spu_shuffle(color_u4, color_u4,
+ ((vector unsigned char) {
+ 0, 0, 0, 0,
+ 5, 5, 5, 5,
+ 10, 10, 10, 10,
+ 15, 15, 15, 15}) );
+ return spu_convtf(color_u4, 32);
+}
+
+
+static INLINE vector float
+spu_unpack_A8R8G8B8(uint color)
+{
+ vector unsigned int color_u4 = spu_splats(color);
+ color_u4 = spu_shuffle(color_u4, color_u4,
+ ((vector unsigned char) {
+ 5, 5, 5, 5,
+ 10, 10, 10, 10,
+ 15, 15, 15, 15,
+ 0, 0, 0, 0}) );
+
+ return spu_convtf(color_u4, 32);
+}
+
+
+#endif /* SPU_COLORPACK_H */
diff --git a/src/mesa/pipe/cell/spu/spu_exec.c b/src/mesa/pipe/cell/spu/spu_exec.c
new file mode 100644
index 0000000000..e51008b9b3
--- /dev/null
+++ b/src/mesa/pipe/cell/spu/spu_exec.c
@@ -0,0 +1,1948 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * TGSI interpretor/executor.
+ *
+ * Flow control information:
+ *
+ * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
+ * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
+ * care since a condition may be true for some quad components but false
+ * for other components.
+ *
+ * We basically execute all statements (even if they're in the part of
+ * an IF/ELSE clause that's "not taken") and use a special mask to
+ * control writing to destination registers. This is the ExecMask.
+ * See store_dest().
+ *
+ * The ExecMask is computed from three other masks (CondMask, LoopMask and
+ * ContMask) which are controlled by the flow control instructions (namely:
+ * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
+ *
+ *
+ * Authors:
+ * Michal Krol
+ * Brian Paul
+ */
+
+#include <libmisc.h>
+#include <spu_mfcio.h>
+#include <transpose_matrix4x4.h>
+#include <simdmath/ceilf4.h>
+#include <simdmath/cosf4.h>
+#include <simdmath/divf4.h>
+#include <simdmath/floorf4.h>
+#include <simdmath/log2f4.h>
+#include <simdmath/powf4.h>
+#include <simdmath/sinf4.h>
+#include <simdmath/sqrtf4.h>
+#include <simdmath/truncf4.h>
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/tgsi/util/tgsi_parse.h"
+#include "pipe/tgsi/util/tgsi_util.h"
+#include "spu_exec.h"
+#include "spu_main.h"
+#include "spu_vertex_shader.h"
+
+#define TILE_TOP_LEFT 0
+#define TILE_TOP_RIGHT 1
+#define TILE_BOTTOM_LEFT 2
+#define TILE_BOTTOM_RIGHT 3
+
+/*
+ * Shorthand locations of various utility registers (_I = Index, _C = Channel)
+ */
+#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I
+#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C
+#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I
+#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C
+#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I
+#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C
+#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I
+#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C
+#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I
+#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C
+#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I
+#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C
+#define TEMP_128_I TGSI_EXEC_TEMP_128_I
+#define TEMP_128_C TGSI_EXEC_TEMP_128_C
+#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I
+#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C
+#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
+#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
+#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
+#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
+#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
+#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
+#define TEMP_R0 TGSI_EXEC_TEMP_R0
+
+#define FOR_EACH_CHANNEL(CHAN)\
+ for (CHAN = 0; CHAN < 4; CHAN++)
+
+#define IS_CHANNEL_ENABLED(INST, CHAN)\
+ ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define IS_CHANNEL_ENABLED2(INST, CHAN)\
+ ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN)))
+
+#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\
+ FOR_EACH_CHANNEL( CHAN )\
+ if (IS_CHANNEL_ENABLED( INST, CHAN ))
+
+#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\
+ FOR_EACH_CHANNEL( CHAN )\
+ if (IS_CHANNEL_ENABLED2( INST, CHAN ))
+
+
+/** The execution mask depends on the conditional mask and the loop mask */
+#define UPDATE_EXEC_MASK(MACH) \
+ MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask
+
+
+#define CHAN_X 0
+#define CHAN_Y 1
+#define CHAN_Z 2
+#define CHAN_W 3
+
+
+
+/**
+ * Initialize machine state by expanding tokens to full instructions,
+ * allocating temporary storage, setting up constants, etc.
+ * After this, we can call spu_exec_machine_run() many times.
+ */
+void
+spu_exec_machine_init(struct spu_exec_machine *mach,
+ uint numSamplers,
+ struct spu_sampler *samplers,
+ unsigned processor)
+{
+ qword zero;
+ qword not_zero;
+ uint i;
+
+ mach->Samplers = samplers;
+ mach->Processor = processor;
+ mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS];
+
+ zero = si_xor(zero, zero);
+ not_zero = si_xori(zero, 0xff);
+
+ /* Setup constants. */
+ mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero;
+ mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero;
+ mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1);
+ mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31);
+
+ mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f);
+ mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f);
+ mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f);
+ mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f);
+}
+
+
+static INLINE qword
+micro_abs(qword src)
+{
+ return si_rotmi(si_shli(src, 1), -1);
+}
+
+static INLINE qword
+micro_ceil(qword src)
+{
+ return (qword) _ceilf4((vec_float4) src);
+}
+
+static INLINE qword
+micro_cos(qword src)
+{
+ return (qword) _cosf4((vec_float4) src);
+}
+
+static const qword br_shuf = {
+ TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
+ TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
+ TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
+ TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
+ TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
+ TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
+ TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1,
+ TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3,
+};
+
+static const qword bl_shuf = {
+ TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
+ TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
+ TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
+ TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
+ TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
+ TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
+ TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1,
+ TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3,
+};
+
+static const qword tl_shuf = {
+ TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
+ TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
+ TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
+ TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
+ TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
+ TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
+ TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1,
+ TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3,
+};
+
+static qword
+micro_ddx(qword src)
+{
+ qword bottom_right = si_shufb(src, src, br_shuf);
+ qword bottom_left = si_shufb(src, src, bl_shuf);
+
+ return si_fs(bottom_right, bottom_left);
+}
+
+static qword
+micro_ddy(qword src)
+{
+ qword top_left = si_shufb(src, src, tl_shuf);
+ qword bottom_left = si_shufb(src, src, bl_shuf);
+
+ return si_fs(top_left, bottom_left);
+}
+
+static INLINE qword
+micro_div(qword src0, qword src1)
+{
+ return (qword) _divf4((vec_float4) src0, (vec_float4) src1);
+}
+
+static qword
+micro_flr(qword src)
+{
+ return (qword) _floorf4((vec_float4) src);
+}
+
+static qword
+micro_frc(qword src)
+{
+ return si_fs(src, (qword) _floorf4((vec_float4) src));
+}
+
+static INLINE qword
+micro_ge(qword src0, qword src1)
+{
+ return si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
+}
+
+static qword
+micro_lg2(qword src)
+{
+ return (qword) _log2f4((vec_float4) src);
+}
+
+static INLINE qword
+micro_lt(qword src0, qword src1)
+{
+ const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1));
+
+ return si_xori(tmp, 0xff);
+}
+
+static INLINE qword
+micro_max(qword src0, qword src1)
+{
+ return si_selb(src1, src0, si_fcgt(src0, src1));
+}
+
+static INLINE qword
+micro_min(qword src0, qword src1)
+{
+ return si_selb(src0, src1, si_fcgt(src0, src1));
+}
+
+static qword
+micro_neg(qword src)
+{
+ return si_xor(src, (qword) spu_splats(0x80000000));
+}
+
+static qword
+micro_set_sign(qword src)
+{
+ return si_or(src, (qword) spu_splats(0x80000000));
+}
+
+static qword
+micro_pow(qword src0, qword src1)
+{
+ return (qword) _powf4((vec_float4) src0, (vec_float4) src1);
+}
+
+static qword
+micro_rnd(qword src)
+{
+ const qword half = (qword) spu_splats(0.5f);
+
+ /* May be able to use _roundf4. There may be some difference, though.
+ */
+ return (qword) _floorf4((vec_float4) si_fa(src, half));
+}
+
+static INLINE qword
+micro_ishr(qword src0, qword src1)
+{
+ return si_rotma(src0, si_sfi(src1, 0));
+}
+
+static qword
+micro_trunc(qword src)
+{
+ return (qword) _truncf4((vec_float4) src);
+}
+
+static qword
+micro_sin(qword src)
+{
+ return (qword) _sinf4((vec_float4) src);
+}
+
+static INLINE qword
+micro_sqrt(qword src)
+{
+ return (qword) _sqrtf4((vec_float4) src);
+}
+
+static void
+fetch_src_file_channel(
+ const struct spu_exec_machine *mach,
+ const uint file,
+ const uint swizzle,
+ const union spu_exec_channel *index,
+ union spu_exec_channel *chan )
+{
+ switch( swizzle ) {
+ case TGSI_EXTSWIZZLE_X:
+ case TGSI_EXTSWIZZLE_Y:
+ case TGSI_EXTSWIZZLE_Z:
+ case TGSI_EXTSWIZZLE_W:
+ switch( file ) {
+ case TGSI_FILE_CONSTANT: {
+ unsigned char buffer[32] ALIGN16_ATTRIB;
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ const float *ptr = mach->Consts[index->i[i]];
+ const uint64_t addr = (uint64_t)(uintptr_t) ptr;
+ const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32;
+
+ mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0);
+ wait_on_mask(1 << TAG_VERTEX_BUFFER);
+
+ (void) memcpy(& chan->f[i], &buffer[(addr & 0x0f)
+ + (sizeof(float) * swizzle)], sizeof(float));
+ }
+ break;
+ }
+
+ case TGSI_FILE_INPUT:
+ chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3];
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3];
+ break;
+
+ case TGSI_FILE_IMMEDIATE:
+ assert( index->i[0] < (int) mach->ImmLimit );
+ assert( index->i[1] < (int) mach->ImmLimit );
+ assert( index->i[2] < (int) mach->ImmLimit );
+ assert( index->i[3] < (int) mach->ImmLimit );
+
+ chan->f[0] = mach->Imms[index->i[0]][swizzle];
+ chan->f[1] = mach->Imms[index->i[1]][swizzle];
+ chan->f[2] = mach->Imms[index->i[2]][swizzle];
+ chan->f[3] = mach->Imms[index->i[3]][swizzle];
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3];
+ break;
+
+ case TGSI_FILE_OUTPUT:
+ /* vertex/fragment output vars can be read too */
+ chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0];
+ chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1];
+ chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2];
+ chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3];
+ break;
+
+ default:
+ assert( 0 );
+ }
+ break;
+
+ case TGSI_EXTSWIZZLE_ZERO:
+ *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C];
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C];
+ break;
+
+ default:
+ assert( 0 );
+ }
+}
+
+static void
+fetch_source(
+ const struct spu_exec_machine *mach,
+ union spu_exec_channel *chan,
+ const struct tgsi_full_src_register *reg,
+ const uint chan_index )
+{
+ union spu_exec_channel index;
+ uint swizzle;
+
+ index.i[0] =
+ index.i[1] =
+ index.i[2] =
+ index.i[3] = reg->SrcRegister.Index;
+
+ if (reg->SrcRegister.Indirect) {
+ union spu_exec_channel index2;
+ union spu_exec_channel indir_index;
+
+ index2.i[0] =
+ index2.i[1] =
+ index2.i[2] =
+ index2.i[3] = reg->SrcRegisterInd.Index;
+
+ swizzle = tgsi_util_get_src_register_swizzle(&reg->SrcRegisterInd,
+ CHAN_X);
+ fetch_src_file_channel(
+ mach,
+ reg->SrcRegisterInd.File,
+ swizzle,
+ &index2,
+ &indir_index );
+
+ index.q = si_a(index.q, indir_index.q);
+ }
+
+ if( reg->SrcRegister.Dimension ) {
+ switch( reg->SrcRegister.File ) {
+ case TGSI_FILE_INPUT:
+ index.q = si_mpyi(index.q, 17);
+ break;
+ case TGSI_FILE_CONSTANT:
+ index.q = si_shli(index.q, 12);
+ break;
+ default:
+ assert( 0 );
+ }
+
+ index.i[0] += reg->SrcRegisterDim.Index;
+ index.i[1] += reg->SrcRegisterDim.Index;
+ index.i[2] += reg->SrcRegisterDim.Index;
+ index.i[3] += reg->SrcRegisterDim.Index;
+
+ if (reg->SrcRegisterDim.Indirect) {
+ union spu_exec_channel index2;
+ union spu_exec_channel indir_index;
+
+ index2.i[0] =
+ index2.i[1] =
+ index2.i[2] =
+ index2.i[3] = reg->SrcRegisterDimInd.Index;
+
+ swizzle = tgsi_util_get_src_register_swizzle( &reg->SrcRegisterDimInd, CHAN_X );
+ fetch_src_file_channel(
+ mach,
+ reg->SrcRegisterDimInd.File,
+ swizzle,
+ &index2,
+ &indir_index );
+
+ index.q = si_a(index.q, indir_index.q);
+ }
+ }
+
+ swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
+ fetch_src_file_channel(
+ mach,
+ reg->SrcRegister.File,
+ swizzle,
+ &index,
+ chan );
+
+ switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) {
+ case TGSI_UTIL_SIGN_CLEAR:
+ chan->q = micro_abs(chan->q);
+ break;
+
+ case TGSI_UTIL_SIGN_SET:
+ chan->q = micro_set_sign(chan->q);
+ break;
+
+ case TGSI_UTIL_SIGN_TOGGLE:
+ chan->q = micro_neg(chan->q);
+ break;
+
+ case TGSI_UTIL_SIGN_KEEP:
+ break;
+ }
+
+ if (reg->SrcRegisterExtMod.Complement) {
+ chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q);
+ }
+}
+
+static void
+store_dest(
+ struct spu_exec_machine *mach,
+ const union spu_exec_channel *chan,
+ const struct tgsi_full_dst_register *reg,
+ const struct tgsi_full_instruction *inst,
+ uint chan_index )
+{
+ union spu_exec_channel *dst;
+
+ switch( reg->DstRegister.File ) {
+ case TGSI_FILE_NULL:
+ return;
+
+ case TGSI_FILE_OUTPUT:
+ dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
+ + reg->DstRegister.Index].xyzw[chan_index];
+ break;
+
+ case TGSI_FILE_TEMPORARY:
+ dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
+ break;
+
+ case TGSI_FILE_ADDRESS:
+ dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
+ break;
+
+ default:
+ assert( 0 );
+ return;
+ }
+
+ switch (inst->Instruction.Saturate)
+ {
+ case TGSI_SAT_NONE:
+ if (mach->ExecMask & 0x1)
+ dst->i[0] = chan->i[0];
+ if (mach->ExecMask & 0x2)
+ dst->i[1] = chan->i[1];
+ if (mach->ExecMask & 0x4)
+ dst->i[2] = chan->i[2];
+ if (mach->ExecMask & 0x8)
+ dst->i[3] = chan->i[3];
+ break;
+
+ case TGSI_SAT_ZERO_ONE:
+ /* XXX need to obey ExecMask here */
+ dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
+ dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q);
+ break;
+
+ case TGSI_SAT_MINUS_PLUS_ONE:
+ assert( 0 );
+ break;
+
+ default:
+ assert( 0 );
+ }
+}
+
+#define FETCH(VAL,INDEX,CHAN)\
+ fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN)
+
+#define STORE(VAL,INDEX,CHAN)\
+ store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN )
+
+
+/**
+ * Execute ARB-style KIL which is predicated by a src register.
+ * Kill fragment if any of the four values is less than zero.
+ */
+static void
+exec_kilp(struct spu_exec_machine *mach,
+ const struct tgsi_full_instruction *inst)
+{
+ uint uniquemask;
+ uint chan_index;
+ uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
+ union spu_exec_channel r[1];
+
+ /* This mask stores component bits that were already tested. Note that
+ * we test if the value is less than zero, so 1.0 and 0.0 need not to be
+ * tested. */
+ uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
+
+ for (chan_index = 0; chan_index < 4; chan_index++)
+ {
+ uint swizzle;
+ uint i;
+
+ /* unswizzle channel */
+ swizzle = tgsi_util_get_full_src_register_extswizzle (
+ &inst->FullSrcRegisters[0],
+ chan_index);
+
+ /* check if the component has not been already tested */
+ if (uniquemask & (1 << swizzle))
+ continue;
+ uniquemask |= 1 << swizzle;
+
+ FETCH(&r[0], 0, chan_index);
+ for (i = 0; i < 4; i++)
+ if (r[0].f[i] < 0.0f)
+ kilmask |= 1 << i;
+ }
+
+ mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
+}
+
+
+/*
+ * Fetch a texel using STR texture coordinates.
+ */
+static void
+fetch_texel( struct spu_sampler *sampler,
+ const union spu_exec_channel *s,
+ const union spu_exec_channel *t,
+ const union spu_exec_channel *p,
+ float lodbias, /* XXX should be float[4] */
+ union spu_exec_channel *r,
+ union spu_exec_channel *g,
+ union spu_exec_channel *b,
+ union spu_exec_channel *a )
+{
+ qword rgba[4];
+ qword out[4];
+
+ sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, (float *) rgba);
+
+ _transpose_matrix4x4(out, rgba);
+ r->q = out[0];
+ g->q = out[1];
+ b->q = out[2];
+ a->q = out[3];
+}
+
+
+static void
+exec_tex(struct spu_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ boolean biasLod)
+{
+ const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ union spu_exec_channel r[8];
+ uint chan_index;
+ float lodBias;
+
+ /* printf("Sampler %u unit %u\n", sampler, unit); */
+
+ switch (inst->InstructionExtTexture.Texture) {
+ case TGSI_TEXTURE_1D:
+
+ FETCH(&r[0], 0, CHAN_X);
+
+ switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
+ case TGSI_EXTSWIZZLE_W:
+ FETCH(&r[1], 0, CHAN_W);
+ r[0].q = micro_div(r[0].q, r[1].q);
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ break;
+
+ default:
+ assert (0);
+ }
+
+ if (biasLod) {
+ FETCH(&r[1], 0, CHAN_W);
+ lodBias = r[2].f[0];
+ }
+ else
+ lodBias = 0.0;
+
+ fetch_texel(&mach->Samplers[unit],
+ &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */
+ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
+ break;
+
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
+ case TGSI_EXTSWIZZLE_W:
+ FETCH(&r[3], 0, CHAN_W);
+ r[0].q = micro_div(r[0].q, r[3].q);
+ r[1].q = micro_div(r[1].q, r[3].q);
+ r[2].q = micro_div(r[2].q, r[3].q);
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ break;
+
+ default:
+ assert (0);
+ }
+
+ if (biasLod) {
+ FETCH(&r[3], 0, CHAN_W);
+ lodBias = r[3].f[0];
+ }
+ else
+ lodBias = 0.0;
+
+ fetch_texel(&mach->Samplers[unit],
+ &r[0], &r[1], &r[2], lodBias, /* inputs */
+ &r[0], &r[1], &r[2], &r[3]); /* outputs */
+ break;
+
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 0, CHAN_Z);
+
+ switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) {
+ case TGSI_EXTSWIZZLE_W:
+ FETCH(&r[3], 0, CHAN_W);
+ r[0].q = micro_div(r[0].q, r[3].q);
+ r[1].q = micro_div(r[1].q, r[3].q);
+ r[2].q = micro_div(r[2].q, r[3].q);
+ break;
+
+ case TGSI_EXTSWIZZLE_ONE:
+ break;
+
+ default:
+ assert (0);
+ }
+
+ if (biasLod) {
+ FETCH(&r[3], 0, CHAN_W);
+ lodBias = r[3].f[0];
+ }
+ else
+ lodBias = 0.0;
+
+ fetch_texel(&mach->Samplers[unit],
+ &r[0], &r[1], &r[2], lodBias,
+ &r[0], &r[1], &r[2], &r[3]);
+ break;
+
+ default:
+ assert (0);
+ }
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[chan_index], 0, chan_index );
+ }
+}
+
+
+
+static void
+constant_interpolation(
+ struct spu_exec_machine *mach,
+ unsigned attrib,
+ unsigned chan )
+{
+ unsigned i;
+
+ for( i = 0; i < QUAD_SIZE; i++ ) {
+ mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
+ }
+}
+
+static void
+linear_interpolation(
+ struct spu_exec_machine *mach,
+ unsigned attrib,
+ unsigned chan )
+{
+ const float x = mach->QuadPos.xyzw[0].f[0];
+ const float y = mach->QuadPos.xyzw[1].f[0];
+ const float dadx = mach->InterpCoefs[attrib].dadx[chan];
+ const float dady = mach->InterpCoefs[attrib].dady[chan];
+ const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
+ mach->Inputs[attrib].xyzw[chan].f[0] = a0;
+ mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
+ mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
+ mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
+}
+
+static void
+perspective_interpolation(
+ struct spu_exec_machine *mach,
+ unsigned attrib,
+ unsigned chan )
+{
+ const float x = mach->QuadPos.xyzw[0].f[0];
+ const float y = mach->QuadPos.xyzw[1].f[0];
+ const float dadx = mach->InterpCoefs[attrib].dadx[chan];
+ const float dady = mach->InterpCoefs[attrib].dady[chan];
+ const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
+ const float *w = mach->QuadPos.xyzw[3].f;
+ /* divide by W here */
+ mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
+ mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
+ mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
+ mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
+}
+
+
+typedef void (* interpolation_func)(
+ struct spu_exec_machine *mach,
+ unsigned attrib,
+ unsigned chan );
+
+static void
+exec_declaration(struct spu_exec_machine *mach,
+ const struct tgsi_full_declaration *decl)
+{
+ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
+ if( decl->Declaration.File == TGSI_FILE_INPUT ) {
+ unsigned first, last, mask;
+ interpolation_func interp;
+
+ assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE );
+
+ first = decl->u.DeclarationRange.First;
+ last = decl->u.DeclarationRange.Last;
+ mask = decl->Declaration.UsageMask;
+
+ switch( decl->Interpolation.Interpolate ) {
+ case TGSI_INTERPOLATE_CONSTANT:
+ interp = constant_interpolation;
+ break;
+
+ case TGSI_INTERPOLATE_LINEAR:
+ interp = linear_interpolation;
+ break;
+
+ case TGSI_INTERPOLATE_PERSPECTIVE:
+ interp = perspective_interpolation;
+ break;
+
+ default:
+ assert( 0 );
+ }
+
+ if( mask == TGSI_WRITEMASK_XYZW ) {
+ unsigned i, j;
+
+ for( i = first; i <= last; i++ ) {
+ for( j = 0; j < NUM_CHANNELS; j++ ) {
+ interp( mach, i, j );
+ }
+ }
+ }
+ else {
+ unsigned i, j;
+
+ for( j = 0; j < NUM_CHANNELS; j++ ) {
+ if( mask & (1 << j) ) {
+ for( i = first; i <= last; i++ ) {
+ interp( mach, i, j );
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static void
+exec_instruction(
+ struct spu_exec_machine *mach,
+ const struct tgsi_full_instruction *inst,
+ int *pc )
+{
+ uint chan_index;
+ union spu_exec_channel r[8];
+
+ (*pc)++;
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_ARL:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = si_cflts(r[0].q, 0);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_MOV:
+ /* TGSI_OPCODE_SWZ */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_LIT:
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ FETCH( &r[0], 0, CHAN_X );
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
+ STORE( &r[0], 0, CHAN_Y );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ FETCH( &r[1], 0, CHAN_Y );
+ r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
+
+ FETCH( &r[2], 0, CHAN_W );
+ r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q);
+ r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q);
+ r[1].q = micro_pow(r[1].q, r[2].q);
+
+ /* r0 = (r0 > 0.0) ? r1 : 0.0
+ */
+ r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q);
+ r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q,
+ r[0].q);
+ STORE( &r[0], 0, CHAN_Z );
+ }
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_RCP:
+ /* TGSI_OPCODE_RECIP */
+ FETCH( &r[0], 0, CHAN_X );
+ r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_RSQ:
+ /* TGSI_OPCODE_RECIPSQRT */
+ FETCH( &r[0], 0, CHAN_X );
+ r[0].q = micro_sqrt(r[0].q);
+ r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q);
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_EXP:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_LOG:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_MUL:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
+ {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+
+ r[0].q = si_fm(r[0].q, r[1].q);
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_ADD:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ r[0].q = si_fa(r[0].q, r[1].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DP3:
+ /* TGSI_OPCODE_DOT3 */
+ FETCH( &r[0], 0, CHAN_X );
+ FETCH( &r[1], 1, CHAN_X );
+ r[0].q = si_fm(r[0].q, r[1].q);
+
+ FETCH( &r[1], 0, CHAN_Y );
+ FETCH( &r[2], 1, CHAN_Y );
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+
+ FETCH( &r[1], 0, CHAN_Z );
+ FETCH( &r[2], 1, CHAN_Z );
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DP4:
+ /* TGSI_OPCODE_DOT4 */
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 1, CHAN_X);
+
+ r[0].q = si_fm(r[0].q, r[1].q);
+
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 1, CHAN_Y);
+
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+ FETCH(&r[1], 0, CHAN_Z);
+ FETCH(&r[2], 1, CHAN_Z);
+
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+ FETCH(&r[1], 0, CHAN_W);
+ FETCH(&r[2], 1, CHAN_W);
+
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DST:
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ FETCH( &r[0], 0, CHAN_Y );
+ FETCH( &r[1], 1, CHAN_Y);
+ r[0].q = si_fm(r[0].q, r[1].q);
+ STORE( &r[0], 0, CHAN_Y );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ FETCH( &r[0], 0, CHAN_Z );
+ STORE( &r[0], 0, CHAN_Z );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ FETCH( &r[0], 1, CHAN_W );
+ STORE( &r[0], 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_MIN:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+
+ r[0].q = micro_min(r[0].q, r[1].q);
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_MAX:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+
+ r[0].q = micro_max(r[0].q, r[1].q);
+
+ STORE(&r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SLT:
+ /* TGSI_OPCODE_SETLT */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+
+ r[0].q = micro_ge(r[0].q, r[1].q);
+ r[0].q = si_xori(r[0].q, 0xff);
+
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SGE:
+ /* TGSI_OPCODE_SETGE */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ r[0].q = micro_ge(r[0].q, r[1].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_MAD:
+ /* TGSI_OPCODE_MADD */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ FETCH( &r[2], 2, chan_index );
+ r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SUB:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+
+ r[0].q = si_fs(r[0].q, r[1].q);
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_LERP:
+ /* TGSI_OPCODE_LRP */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+ FETCH(&r[2], 2, chan_index);
+
+ r[1].q = si_fs(r[1].q, r[2].q);
+ r[0].q = si_fma(r[0].q, r[1].q, r[2].q);
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_CND:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_CND0:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_DOT2ADD:
+ /* TGSI_OPCODE_DP2A */
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_INDEX:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_NEGATE:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_FRAC:
+ /* TGSI_OPCODE_FRC */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = micro_frc(r[0].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_CLAMP:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_FLOOR:
+ /* TGSI_OPCODE_FLR */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = micro_flr(r[0].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_ROUND:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = micro_rnd(r[0].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_EXPBASE2:
+ /* TGSI_OPCODE_EX2 */
+ FETCH(&r[0], 0, CHAN_X);
+
+ r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q);
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_LOGBASE2:
+ /* TGSI_OPCODE_LG2 */
+ FETCH( &r[0], 0, CHAN_X );
+ r[0].q = micro_lg2(r[0].q);
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_POWER:
+ /* TGSI_OPCODE_POW */
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 1, CHAN_X);
+
+ r[0].q = micro_pow(r[0].q, r[1].q);
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_CROSSPRODUCT:
+ /* TGSI_OPCODE_XPD */
+ FETCH(&r[0], 0, CHAN_Y);
+ FETCH(&r[1], 1, CHAN_Z);
+ FETCH(&r[3], 0, CHAN_Z);
+ FETCH(&r[4], 1, CHAN_Y);
+
+ /* r2 = (r0 * r1) - (r3 * r5)
+ */
+ r[2].q = si_fm(r[3].q, r[5].q);
+ r[2].q = si_fms(r[0].q, r[1].q, r[2].q);
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+ STORE( &r[2], 0, CHAN_X );
+ }
+
+ FETCH(&r[2], 1, CHAN_X);
+ FETCH(&r[5], 0, CHAN_X);
+
+ /* r3 = (r3 * r2) - (r1 * r5)
+ */
+ r[1].q = si_fm(r[1].q, r[5].q);
+ r[3].q = si_fms(r[3].q, r[2].q, r[1].q);
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
+ STORE( &r[3], 0, CHAN_Y );
+ }
+
+ /* r5 = (r5 * r4) - (r0 * r2)
+ */
+ r[0].q = si_fm(r[0].q, r[2].q);
+ r[5].q = si_fms(r[5].q, r[4].q, r[0].q);
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
+ STORE( &r[5], 0, CHAN_Z );
+ }
+
+ if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_MULTIPLYMATRIX:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_ABS:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+
+ r[0].q = micro_abs(r[0].q);
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_RCC:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_DPH:
+ FETCH(&r[0], 0, CHAN_X);
+ FETCH(&r[1], 1, CHAN_X);
+
+ r[0].q = si_fm(r[0].q, r[1].q);
+
+ FETCH(&r[1], 0, CHAN_Y);
+ FETCH(&r[2], 1, CHAN_Y);
+
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+ FETCH(&r[1], 0, CHAN_Z);
+ FETCH(&r[2], 1, CHAN_Z);
+
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+ FETCH(&r[1], 1, CHAN_W);
+
+ r[0].q = si_fa(r[0].q, r[1].q);
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_COS:
+ FETCH(&r[0], 0, CHAN_X);
+
+ r[0].q = micro_cos(r[0].q);
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DDX:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = micro_ddx(r[0].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_DDY:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = micro_ddy(r[0].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_KILP:
+ exec_kilp (mach, inst);
+ break;
+
+ case TGSI_OPCODE_KIL:
+ /* for enabled ExecMask bits, set the killed bit */
+ mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask;
+ break;
+
+ case TGSI_OPCODE_PK2H:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_PK2US:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_PK4B:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_PK4UB:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_RFL:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_SEQ:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+
+ r[0].q = si_fceq(r[0].q, r[1].q);
+
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SFL:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_SGT:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ r[0].q = si_fcgt(r[0].q, r[1].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SIN:
+ FETCH( &r[0], 0, CHAN_X );
+ r[0].q = micro_sin(r[0].q);
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SLE:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+
+ r[0].q = si_fcgt(r[0].q, r[1].q);
+ r[0].q = si_xori(r[0].q, 0xff);
+
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SNE:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+
+ r[0].q = si_fceq(r[0].q, r[1].q);
+ r[0].q = si_xori(r[0].q, 0xff);
+
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_STR:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_TEX:
+ /* simple texture lookup */
+ /* src[0] = texcoord */
+ /* src[1] = sampler unit */
+ exec_tex(mach, inst, FALSE);
+ break;
+
+ case TGSI_OPCODE_TXB:
+ /* Texture lookup with lod bias */
+ /* src[0] = texcoord (src[0].w = load bias) */
+ /* src[1] = sampler unit */
+ exec_tex(mach, inst, TRUE);
+ break;
+
+ case TGSI_OPCODE_TXD:
+ /* Texture lookup with explict partial derivatives */
+ /* src[0] = texcoord */
+ /* src[1] = d[strq]/dx */
+ /* src[2] = d[strq]/dy */
+ /* src[3] = sampler unit */
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_TXL:
+ /* Texture lookup with explit LOD */
+ /* src[0] = texcoord (src[0].w = load bias) */
+ /* src[1] = sampler unit */
+ exec_tex(mach, inst, TRUE);
+ break;
+
+ case TGSI_OPCODE_UP2H:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_UP2US:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_UP4B:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_UP4UB:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_X2D:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_ARA:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_ARR:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_BRA:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_CAL:
+ /* skip the call if no execution channels are enabled */
+ if (mach->ExecMask) {
+ /* do the call */
+
+ /* push the Cond, Loop, Cont stacks */
+ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+ mach->CondStack[mach->CondStackTop++] = mach->CondMask;
+ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
+ assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+
+ assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
+ mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
+
+ /* note that PC was already incremented above */
+ mach->CallStack[mach->CallStackTop++] = *pc;
+ *pc = inst->InstructionExtLabel.Label;
+ }
+ break;
+
+ case TGSI_OPCODE_RET:
+ mach->FuncMask &= ~mach->ExecMask;
+ UPDATE_EXEC_MASK(mach);
+
+ if (mach->ExecMask == 0x0) {
+ /* really return now (otherwise, keep executing */
+
+ if (mach->CallStackTop == 0) {
+ /* returning from main() */
+ *pc = -1;
+ return;
+ }
+ *pc = mach->CallStack[--mach->CallStackTop];
+
+ /* pop the Cond, Loop, Cont stacks */
+ assert(mach->CondStackTop > 0);
+ mach->CondMask = mach->CondStack[--mach->CondStackTop];
+ assert(mach->LoopStackTop > 0);
+ mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+ assert(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[--mach->ContStackTop];
+ assert(mach->FuncStackTop > 0);
+ mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
+
+ UPDATE_EXEC_MASK(mach);
+ }
+ break;
+
+ case TGSI_OPCODE_SSG:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_CMP:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH(&r[0], 0, chan_index);
+ FETCH(&r[1], 1, chan_index);
+ FETCH(&r[2], 2, chan_index);
+
+ /* r0 = (r0 < 0.0) ? r1 : r2
+ */
+ r[3].q = si_xor(r[3].q, r[3].q);
+ r[0].q = micro_lt(r[0].q, r[3].q);
+ r[0].q = si_selb(r[1].q, r[2].q, r[0].q);
+
+ STORE(&r[0], 0, chan_index);
+ }
+ break;
+
+ case TGSI_OPCODE_SCS:
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+ FETCH( &r[0], 0, CHAN_X );
+ }
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
+ r[1].q = micro_cos(r[0].q);
+ STORE( &r[1], 0, CHAN_X );
+ }
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
+ r[1].q = micro_sin(r[0].q);
+ STORE( &r[1], 0, CHAN_Y );
+ }
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
+ STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z );
+ }
+ if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
+ STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
+ }
+ break;
+
+ case TGSI_OPCODE_NRM:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_DIV:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_DP2:
+ FETCH( &r[0], 0, CHAN_X );
+ FETCH( &r[1], 1, CHAN_X );
+ r[0].q = si_fm(r[0].q, r[1].q);
+
+ FETCH( &r[1], 0, CHAN_Y );
+ FETCH( &r[2], 1, CHAN_Y );
+ r[0].q = si_fma(r[1].q, r[2].q, r[0].q);
+
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_IF:
+ /* push CondMask */
+ assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
+ mach->CondStack[mach->CondStackTop++] = mach->CondMask;
+ FETCH( &r[0], 0, CHAN_X );
+ /* update CondMask */
+ if( ! r[0].u[0] ) {
+ mach->CondMask &= ~0x1;
+ }
+ if( ! r[0].u[1] ) {
+ mach->CondMask &= ~0x2;
+ }
+ if( ! r[0].u[2] ) {
+ mach->CondMask &= ~0x4;
+ }
+ if( ! r[0].u[3] ) {
+ mach->CondMask &= ~0x8;
+ }
+ UPDATE_EXEC_MASK(mach);
+ /* Todo: If CondMask==0, jump to ELSE */
+ break;
+
+ case TGSI_OPCODE_ELSE:
+ /* invert CondMask wrt previous mask */
+ {
+ uint prevMask;
+ assert(mach->CondStackTop > 0);
+ prevMask = mach->CondStack[mach->CondStackTop - 1];
+ mach->CondMask = ~mach->CondMask & prevMask;
+ UPDATE_EXEC_MASK(mach);
+ /* Todo: If CondMask==0, jump to ENDIF */
+ }
+ break;
+
+ case TGSI_OPCODE_ENDIF:
+ /* pop CondMask */
+ assert(mach->CondStackTop > 0);
+ mach->CondMask = mach->CondStack[--mach->CondStackTop];
+ UPDATE_EXEC_MASK(mach);
+ break;
+
+ case TGSI_OPCODE_END:
+ /* halt execution */
+ *pc = -1;
+ break;
+
+ case TGSI_OPCODE_REP:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_ENDREP:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_PUSHA:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_POPA:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_CEIL:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = micro_ceil(r[0].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_I2F:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = si_csflt(r[0].q, 0);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_NOT:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = si_xorbi(r[0].q, 0xff);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_TRUNC:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ r[0].q = micro_trunc(r[0].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SHL:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+
+ r[0].q = si_shl(r[0].q, r[1].q);
+
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SHR:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ r[0].q = micro_ishr(r[0].q, r[1].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_AND:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ r[0].q = si_and(r[0].q, r[1].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_OR:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ r[0].q = si_or(r[0].q, r[1].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_MOD:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_XOR:
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ FETCH( &r[1], 1, chan_index );
+ r[0].q = si_xor(r[0].q, r[1].q);
+ STORE( &r[0], 0, chan_index );
+ }
+ break;
+
+ case TGSI_OPCODE_SAD:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_TXF:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_TXQ:
+ assert (0);
+ break;
+
+ case TGSI_OPCODE_EMIT:
+ mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16;
+ mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
+ break;
+
+ case TGSI_OPCODE_ENDPRIM:
+ mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++;
+ mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
+ break;
+
+ case TGSI_OPCODE_LOOP:
+ /* fall-through (for now) */
+ case TGSI_OPCODE_BGNLOOP2:
+ /* push LoopMask and ContMasks */
+ assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
+ assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
+ mach->ContStack[mach->ContStackTop++] = mach->ContMask;
+ break;
+
+ case TGSI_OPCODE_ENDLOOP:
+ /* fall-through (for now at least) */
+ case TGSI_OPCODE_ENDLOOP2:
+ /* Restore ContMask, but don't pop */
+ assert(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
+ if (mach->LoopMask) {
+ /* repeat loop: jump to instruction just past BGNLOOP */
+ *pc = inst->InstructionExtLabel.Label + 1;
+ }
+ else {
+ /* exit loop: pop LoopMask */
+ assert(mach->LoopStackTop > 0);
+ mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
+ /* pop ContMask */
+ assert(mach->ContStackTop > 0);
+ mach->ContMask = mach->ContStack[--mach->ContStackTop];
+ }
+ UPDATE_EXEC_MASK(mach);
+ break;
+
+ case TGSI_OPCODE_BRK:
+ /* turn off loop channels for each enabled exec channel */
+ mach->LoopMask &= ~mach->ExecMask;
+ /* Todo: if mach->LoopMask == 0, jump to end of loop */
+ UPDATE_EXEC_MASK(mach);
+ break;
+
+ case TGSI_OPCODE_CONT:
+ /* turn off cont channels for each enabled exec channel */
+ mach->ContMask &= ~mach->ExecMask;
+ /* Todo: if mach->LoopMask == 0, jump to end of loop */
+ UPDATE_EXEC_MASK(mach);
+ break;
+
+ case TGSI_OPCODE_BGNSUB:
+ /* no-op */
+ break;
+
+ case TGSI_OPCODE_ENDSUB:
+ /* no-op */
+ break;
+
+ case TGSI_OPCODE_NOISE1:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_NOISE2:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_NOISE3:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_NOISE4:
+ assert( 0 );
+ break;
+
+ case TGSI_OPCODE_NOP:
+ break;
+
+ default:
+ assert( 0 );
+ }
+}
+
+
+/**
+ * Run TGSI interpreter.
+ * \return bitmask of "alive" quad components
+ */
+uint
+spu_exec_machine_run( struct spu_exec_machine *mach )
+{
+ uint i;
+ int pc = 0;
+
+ mach->CondMask = 0xf;
+ mach->LoopMask = 0xf;
+ mach->ContMask = 0xf;
+ mach->FuncMask = 0xf;
+ mach->ExecMask = 0xf;
+
+ mach->CondStackTop = 0; /* temporarily subvert this assertion */
+ assert(mach->CondStackTop == 0);
+ assert(mach->LoopStackTop == 0);
+ assert(mach->ContStackTop == 0);
+ assert(mach->CallStackTop == 0);
+
+ mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
+ mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
+
+ if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) {
+ mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
+ mach->Primitives[0] = 0;
+ }
+
+
+ /* execute declarations (interpolants) */
+ if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
+ for (i = 0; i < mach->NumDeclarations; i++) {
+ uint8_t buffer[sizeof(struct tgsi_full_declaration) + 32] ALIGN16_ATTRIB;
+ struct tgsi_full_declaration decl;
+ unsigned long decl_addr = (unsigned long) (mach->Declarations+i);
+ unsigned size = ((sizeof(decl) + (decl_addr & 0x0f) + 0x0f) & ~0x0f);
+
+ mfc_get(buffer, decl_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0);
+ wait_on_mask(1 << TAG_INSTRUCTION_FETCH);
+
+ memcpy(& decl, buffer + (decl_addr & 0x0f), sizeof(decl));
+ exec_declaration( mach, &decl );
+ }
+ }
+
+ /* execute instructions, until pc is set to -1 */
+ while (pc != -1) {
+ uint8_t buffer[sizeof(struct tgsi_full_instruction) + 32] ALIGN16_ATTRIB;
+ struct tgsi_full_instruction inst;
+ unsigned long inst_addr = (unsigned long) (mach->Instructions + pc);
+ unsigned size = ((sizeof(inst) + (inst_addr & 0x0f) + 0x0f) & ~0x0f);
+
+ assert(pc < mach->NumInstructions);
+ mfc_get(buffer, inst_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0);
+ wait_on_mask(1 << TAG_INSTRUCTION_FETCH);
+
+ memcpy(& inst, buffer + (inst_addr & 0x0f), sizeof(inst));
+ exec_instruction( mach, & inst, &pc );
+ }
+
+#if 0
+ /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
+ if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
+ /*
+ * Scale back depth component.
+ */
+ for (i = 0; i < 4; i++)
+ mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
+ }
+#endif
+
+ return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
+}
+
+
diff --git a/src/mesa/pipe/cell/spu/spu_exec.h b/src/mesa/pipe/cell/spu/spu_exec.h
new file mode 100644
index 0000000000..b4c7661ef6
--- /dev/null
+++ b/src/mesa/pipe/cell/spu/spu_exec.h
@@ -0,0 +1,172 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#if !defined SPU_EXEC_H
+#define SPU_EXEC_H
+
+#include "pipe/p_compiler.h"
+#include "pipe/tgsi/exec/tgsi_exec.h"
+
+#if defined __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Registers may be treated as float, signed int or unsigned int.
+ */
+union spu_exec_channel
+{
+ float f[QUAD_SIZE];
+ int i[QUAD_SIZE];
+ unsigned u[QUAD_SIZE];
+ qword q;
+};
+
+/**
+ * A vector[RGBA] of channels[4 pixels]
+ */
+struct spu_exec_vector
+{
+ union spu_exec_channel xyzw[NUM_CHANNELS];
+};
+
+/**
+ * For fragment programs, information for computing fragment input
+ * values from plane equation of the triangle/line.
+ */
+struct spu_interp_coef
+{
+ float a0[NUM_CHANNELS]; /* in an xyzw layout */
+ float dadx[NUM_CHANNELS];
+ float dady[NUM_CHANNELS];
+};
+
+
+struct softpipe_tile_cache; /**< Opaque to TGSI */
+
+/**
+ * Information for sampling textures, which must be implemented
+ * by code outside the TGSI executor.
+ */
+struct spu_sampler
+{
+ const struct pipe_sampler_state *state;
+ struct pipe_texture *texture;
+ /** Get samples for four fragments in a quad */
+ void (*get_samples)(struct spu_sampler *sampler,
+ const float s[QUAD_SIZE],
+ const float t[QUAD_SIZE],
+ const float p[QUAD_SIZE],
+ float lodbias,
+ float rgba[NUM_CHANNELS][QUAD_SIZE]);
+ void *pipe; /*XXX temporary*/
+ struct softpipe_tile_cache *cache;
+};
+
+
+/**
+ * Run-time virtual machine state for executing TGSI shader.
+ */
+struct spu_exec_machine
+{
+ /*
+ * 32 program temporaries
+ * 4 internal temporaries
+ * 1 address
+ */
+ struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS
+ + TGSI_EXEC_NUM_ADDRS + 1]
+ ALIGN16_ATTRIB;
+
+ struct spu_exec_vector *Addrs;
+
+ struct spu_sampler *Samplers;
+
+ float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
+ unsigned ImmLimit;
+ float (*Consts)[4];
+ struct spu_exec_vector *Inputs;
+ struct spu_exec_vector *Outputs;
+ unsigned Processor;
+
+ /* GEOMETRY processor only. */
+ unsigned *Primitives;
+
+ /* FRAGMENT processor only. */
+ const struct spu_interp_coef *InterpCoefs;
+ struct spu_exec_vector QuadPos;
+
+ /* Conditional execution masks */
+ uint CondMask; /**< For IF/ELSE/ENDIF */
+ uint LoopMask; /**< For BGNLOOP/ENDLOOP */
+ uint ContMask; /**< For loop CONT statements */
+ uint FuncMask; /**< For function calls */
+ uint ExecMask; /**< = CondMask & LoopMask */
+
+ /** Condition mask stack (for nested conditionals) */
+ uint CondStack[TGSI_EXEC_MAX_COND_NESTING];
+ int CondStackTop;
+
+ /** Loop mask stack (for nested loops) */
+ uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING];
+ int LoopStackTop;
+
+ /** Loop continue mask stack (see comments in tgsi_exec.c) */
+ uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING];
+ int ContStackTop;
+
+ /** Function execution mask stack (for executing subroutine code) */
+ uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING];
+ int FuncStackTop;
+
+ /** Function call stack for saving/restoring the program counter */
+ uint CallStack[TGSI_EXEC_MAX_CALL_NESTING];
+ int CallStackTop;
+
+ struct tgsi_full_instruction *Instructions;
+ uint NumInstructions;
+
+ struct tgsi_full_declaration *Declarations;
+ uint NumDeclarations;
+};
+
+
+extern void
+spu_exec_machine_init(struct spu_exec_machine *mach,
+ uint numSamplers,
+ struct spu_sampler *samplers,
+ unsigned processor);
+
+extern uint
+spu_exec_machine_run( struct spu_exec_machine *mach );
+
+
+#if defined __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* SPU_EXEC_H */
diff --git a/src/mesa/pipe/cell/spu/spu_main.c b/src/mesa/pipe/cell/spu/spu_main.c
index 0c83900a18..e375197fe6 100644
--- a/src/mesa/pipe/cell/spu/spu_main.c
+++ b/src/mesa/pipe/cell/spu/spu_main.c
@@ -31,11 +31,13 @@
#include <stdio.h>
#include <libmisc.h>
-#include <spu_mfcio.h>
#include "spu_main.h"
-#include "spu_tri.h"
+#include "spu_render.h"
+#include "spu_texture.h"
#include "spu_tile.h"
+//#include "spu_test.h"
+#include "spu_vertex_shader.h"
#include "pipe/cell/common.h"
#include "pipe/p_defines.h"
@@ -46,28 +48,37 @@ helpful headers:
/opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h
*/
-static boolean Debug = FALSE;
+boolean Debug = FALSE;
struct spu_global spu;
+struct spu_vs_context draw;
-void
-wait_on_mask(unsigned tagMask)
+/**
+ * Tell the PPU that this SPU has finished copying a buffer to
+ * local store and that it may be reused by the PPU.
+ * This is done by writting a 16-byte batch-buffer-status block back into
+ * main memory (in cell_context->buffer_status[]).
+ */
+static void
+release_buffer(uint buffer)
{
- mfc_write_tag_mask( tagMask );
- /* wait for completion of _any_ DMAs specified by tagMask */
- mfc_read_tag_status_any();
-}
+ /* Evidently, using less than a 16-byte status doesn't work reliably */
+ static const uint status[4] ALIGN16_ATTRIB
+ = {CELL_BUFFER_STATUS_FREE, 0, 0, 0};
+ const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
+ uint *dst = spu.init.buffer_status + index;
-static void
-wait_on_mask_all(unsigned tagMask)
-{
- mfc_write_tag_mask( tagMask );
- /* wait for completion of _any_ DMAs specified by tagMask */
- mfc_read_tag_status_all();
-}
+ ASSERT(buffer < CELL_NUM_BUFFERS);
+ mfc_put((void *) &status, /* src in local memory */
+ (unsigned int) dst, /* dst in main memory */
+ sizeof(status), /* size */
+ TAG_MISC, /* tag is unimportant */
+ 0, /* tid */
+ 0 /* rid */);
+}
/**
@@ -81,24 +92,24 @@ really_clear_tiles(uint surfaceIndex)
uint i;
if (surfaceIndex == 0) {
- clear_c_tile(&ctile);
+ clear_c_tile(&spu.ctile);
for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
uint tx = i % spu.fb.width_tiles;
uint ty = i / spu.fb.width_tiles;
- if (tile_status[ty][tx] == TILE_STATUS_CLEAR) {
- put_tile(tx, ty, &ctile, TAG_SURFACE_CLEAR, 0);
+ if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) {
+ put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
}
}
}
else {
- clear_z_tile(&ztile);
+ clear_z_tile(&spu.ztile);
for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
uint tx = i % spu.fb.width_tiles;
uint ty = i / spu.fb.width_tiles;
- if (tile_status_z[ty][tx] == TILE_STATUS_CLEAR)
- put_tile(tx, ty, &ctile, TAG_SURFACE_CLEAR, 1);
+ if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR)
+ put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1);
}
}
@@ -122,11 +133,11 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear)
#if CLEAR_OPT
/* set all tile's status to CLEAR */
if (clear->surface == 0) {
- memset(tile_status, TILE_STATUS_CLEAR, sizeof(tile_status));
+ memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
spu.fb.color_clear_value = clear->value;
}
else {
- memset(tile_status_z, TILE_STATUS_CLEAR, sizeof(tile_status_z));
+ memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
spu.fb.depth_clear_value = clear->value;
}
return;
@@ -134,11 +145,11 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear)
if (clear->surface == 0) {
spu.fb.color_clear_value = clear->value;
- clear_c_tile(&ctile);
+ clear_c_tile(&spu.ctile);
}
else {
spu.fb.depth_clear_value = clear->value;
- clear_z_tile(&ztile);
+ clear_z_tile(&spu.ztile);
}
/*
@@ -150,9 +161,9 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear)
uint tx = i % spu.fb.width_tiles;
uint ty = i / spu.fb.width_tiles;
if (clear->surface == 0)
- put_tile(tx, ty, &ctile, TAG_SURFACE_CLEAR, 0);
+ put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
else
- put_tile(tx, ty, &ztile, TAG_SURFACE_CLEAR, 1);
+ put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
/* XXX we don't want this here, but it fixes bad tile results */
}
@@ -165,229 +176,14 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear)
}
-/**
- * Given a rendering command's bounding box (in pixels) compute the
- * location of the corresponding screen tile bounding box.
- */
-static INLINE void
-tile_bounding_box(const struct cell_command_render *render,
- uint *txmin, uint *tymin,
- uint *box_num_tiles, uint *box_width_tiles)
-{
-#if 1
- /* Debug: full-window bounding box */
- uint txmax = spu.fb.width_tiles - 1;
- uint tymax = spu.fb.height_tiles - 1;
- *txmin = 0;
- *tymin = 0;
- *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
- *box_width_tiles = spu.fb.width_tiles;
- (void) render;
- (void) txmax;
- (void) tymax;
-#else
- uint txmax, tymax, box_height_tiles;
-
- *txmin = (uint) render->xmin / TILE_SIZE;
- *tymin = (uint) render->ymin / TILE_SIZE;
- txmax = (uint) render->xmax / TILE_SIZE;
- tymax = (uint) render->ymax / TILE_SIZE;
- *box_width_tiles = txmax - *txmin + 1;
- box_height_tiles = tymax - *tymin + 1;
- *box_num_tiles = *box_width_tiles * box_height_tiles;
-#endif
-#if 0
- printf("Render bounds: %g, %g ... %g, %g\n",
- render->xmin, render->ymin, render->xmax, render->ymax);
- printf("Render tiles: %u, %u .. %u, %u\n", *txmin, *tymin, txmax, tymax);
-#endif
-}
-
-
-/**
- * Render primitives
- * \param pos_incr returns value indicating how may words to skip after
- * this command in the batch buffer
- */
static void
-cmd_render(const struct cell_command_render *render, uint *pos_incr)
+cmd_release_verts(const struct cell_command_release_verts *release)
{
- /* we'll DMA into these buffers */
- ubyte vertex_data[CELL_MAX_VBUF_SIZE] ALIGN16_ATTRIB;
- ushort index_data[CELL_MAX_VBUF_INDEXES] ALIGN16_ATTRIB;
- const uint vertex_size = render->vertex_size; /* in bytes */
- const uint total_vertex_bytes = render->num_verts * vertex_size;
- const ubyte *vertices;
- const ushort *indexes;
- uint mask;
- uint i, j;
-
-
- if (Debug) {
- printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u "
- "inline_vert=%u inline_ind=%u\n",
- spu.init.id,
- render->prim_type,
- render->num_verts,
- render->num_indexes,
- render->inline_verts,
- render->inline_indexes);
-
- /*
- printf(" bound: %g, %g .. %g, %g\n",
- render->xmin, render->ymin, render->xmax, render->ymax);
- */
- printf("SPU %u: indices at %p vertices at %p\n",
- spu.init.id,
- render->index_data, render->vertex_data);
- }
-
- ASSERT(sizeof(*render) % 4 == 0);
- ASSERT_ALIGN16(render->vertex_data);
- ASSERT_ALIGN16(render->index_data);
-
-
- /**
- ** Get vertex, index buffers if not inlined
- **/
- if (!render->inline_verts) {
- ASSERT(total_vertex_bytes % 16 == 0);
-
- mfc_get(vertex_data, /* dest */
- (unsigned int) render->vertex_data, /* src */
- total_vertex_bytes, /* size */
- TAG_VERTEX_BUFFER,
- 0, /* tid */
- 0 /* rid */);
-
- vertices = vertex_data;
- }
-
- if (!render->inline_indexes) {
- uint total_index_bytes;
-
- *pos_incr = 0;
-
- total_index_bytes = render->num_indexes * sizeof(ushort);
- if (total_index_bytes < 16)
- total_index_bytes = 16;
- else
- total_index_bytes = ROUNDUP16(total_index_bytes);
-
- indexes = index_data;
-
- /* get index data from main memory */
- mfc_get(index_data, /* dest */
- (unsigned int) render->index_data, /* src */
- total_index_bytes,
- TAG_INDEX_BUFFER,
- 0, /* tid */
- 0 /* rid */);
- }
-
-
- /**
- ** Get pointers to inlined indexes, verts, if present
- **/
- if (render->inline_indexes) {
- /* indexes are right after the render command in the batch buffer */
- indexes = (ushort *) (render + 1);
- *pos_incr = (render->num_indexes * 2 + 3) / 4;
-
- if (render->inline_verts) {
- /* vertices are after indexes, if inlined */
- vertices = (const ubyte *) (render + 1) + *pos_incr * 4;
- *pos_incr = *pos_incr + total_vertex_bytes / 4;
- }
- }
-
-
- /* wait for vertex and/or index buffers if not inlined */
- mask = 0x0;
- if (!render->inline_verts)
- mask |= (1 << TAG_VERTEX_BUFFER);
- if (!render->inline_indexes)
- mask |= (1 << TAG_INDEX_BUFFER);
- wait_on_mask_all(mask);
-
-
- /**
- ** find tiles which intersect the prim bounding box
- **/
- uint txmin, tymin, box_width_tiles, box_num_tiles;
-#if 0
- tile_bounding_box(render, &txmin, &tymin,
- &box_num_tiles, &box_width_tiles);
-#else
- txmin = 0;
- tymin = 0;
- box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
- box_width_tiles = spu.fb.width_tiles;
-#endif
-
- /* make sure any pending clears have completed */
- wait_on_mask(1 << TAG_SURFACE_CLEAR);
-
-
- /**
- ** loop over tiles, rendering tris
- **/
- for (i = spu.init.id; i < box_num_tiles; i += spu.init.num_spus) {
- const uint tx = txmin + i % box_width_tiles;
- const uint ty = tymin + i / box_width_tiles;
-
- ASSERT(tx < spu.fb.width_tiles);
- ASSERT(ty < spu.fb.height_tiles);
-
- /* Start fetching color/z tiles. We'll wait for completion when
- * we need read/write to them later in triangle rasterization.
- */
- if (spu.depth_stencil.depth.enabled) {
- if (tile_status_z[ty][tx] != TILE_STATUS_CLEAR) {
- get_tile(tx, ty, &ztile, TAG_READ_TILE_Z, 1);
- }
- }
-
- if (tile_status[ty][tx] != TILE_STATUS_CLEAR) {
- get_tile(tx, ty, &ctile, TAG_READ_TILE_COLOR, 0);
- }
-
- ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES);
- ASSERT(render->num_indexes % 3 == 0);
-
- /* loop over tris */
- for (j = 0; j < render->num_indexes; j += 3) {
- const float *v0, *v1, *v2;
-
- v0 = (const float *) (vertices + indexes[j+0] * vertex_size);
- v1 = (const float *) (vertices + indexes[j+1] * vertex_size);
- v2 = (const float *) (vertices + indexes[j+2] * vertex_size);
-
- tri_draw(v0, v1, v2, tx, ty);
- }
-
- /* write color/z tiles back to main framebuffer, if dirtied */
- if (tile_status[ty][tx] == TILE_STATUS_DIRTY) {
- put_tile(tx, ty, &ctile, TAG_WRITE_TILE_COLOR, 0);
- tile_status[ty][tx] = TILE_STATUS_DEFINED;
- }
- if (spu.depth_stencil.depth.enabled) {
- if (tile_status_z[ty][tx] == TILE_STATUS_DIRTY) {
- put_tile(tx, ty, &ztile, TAG_WRITE_TILE_Z, 1);
- tile_status_z[ty][tx] = TILE_STATUS_DEFINED;
- }
- }
-
- /* XXX move these... */
- wait_on_mask(1 << TAG_WRITE_TILE_COLOR);
- if (spu.depth_stencil.depth.enabled) {
- wait_on_mask(1 << TAG_WRITE_TILE_Z);
- }
- }
-
if (Debug)
- printf("SPU %u: RENDER done\n",
- spu.init.id);
+ printf("SPU %u: RELEASE VERTS %u\n",
+ spu.init.id, release->vertex_buf);
+ ASSERT(release->vertex_buf != ~0U);
+ release_buffer(release->vertex_buf);
}
@@ -421,6 +217,29 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
spu.fb.zsize = 2;
else
spu.fb.zsize = 0;
+
+ if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM)
+ spu.color_shuffle = ((vector unsigned char) {
+ 12, 0, 4, 8, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0});
+ else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM)
+ spu.color_shuffle = ((vector unsigned char) {
+ 8, 4, 0, 12, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0});
+ else
+ ASSERT(0);
+}
+
+
+static void
+cmd_state_blend(const struct pipe_blend_state *state)
+{
+ if (Debug)
+ printf("SPU %u: BLEND: enabled %d\n",
+ spu.init.id,
+ state->blend_enable);
+
+ memcpy(&spu.blend, state, sizeof(*state));
}
@@ -444,19 +263,53 @@ cmd_state_sampler(const struct pipe_sampler_state *state)
spu.init.id);
memcpy(&spu.sampler[0], state, sizeof(*state));
+ if (spu.sampler[0].min_img_filter == PIPE_TEX_FILTER_LINEAR)
+ spu.sample_texture = sample_texture_bilinear;
+ else
+ spu.sample_texture = sample_texture_nearest;
}
static void
-cmd_state_vertex_info(const struct vertex_info *vinfo)
+cmd_state_texture(const struct cell_command_texture *texture)
{
if (Debug)
+ printf("SPU %u: TEXTURE at %p size %u x %u\n",
+ spu.init.id, texture->start, texture->width, texture->height);
+
+ memcpy(&spu.texture, texture, sizeof(*texture));
+ spu.tex_size = (vector float)
+ { spu.texture.width, spu.texture.height, 0.0, 0.0};
+ spu.tex_size_mask = (vector unsigned int)
+ { spu.texture.width - 1, spu.texture.height - 1, 0, 0 };
+}
+
+
+static void
+cmd_state_vertex_info(const struct vertex_info *vinfo)
+{
+ if (Debug) {
printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id,
vinfo->num_attribs);
+ }
+ ASSERT(vinfo->num_attribs >= 1);
+ ASSERT(vinfo->num_attribs <= 8);
memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
}
+static void
+cmd_state_vs_array_info(const struct cell_array_info *vs_info)
+{
+ const unsigned attr = vs_info->attr;
+
+ ASSERT(attr < PIPE_ATTRIB_MAX);
+ draw.vertex_fetch.src_ptr[attr] = vs_info->base;
+ draw.vertex_fetch.pitch[attr] = vs_info->pitch;
+ draw.vertex_fetch.format[attr] = vs_info->format;
+ draw.vertex_fetch.dirty = 1;
+}
+
static void
cmd_finish(void)
@@ -473,38 +326,6 @@ cmd_finish(void)
/**
- * Tell the PPU that this SPU has finished copying a batch buffer to
- * local store and that it may be reused by the PPU.
- * This is done by writting a 16-byte batch-buffer-status block back into
- * main memory (in cell_contex->buffer_status[]).
- */
-static void
-release_batch_buffer(uint buffer)
-{
- /* Evidently, using less than a 16-byte status doesn't work reliably */
- static const uint status[4] ALIGN16_ATTRIB
- = {CELL_BUFFER_STATUS_FREE, 0, 0, 0};
-
- const uint index = 4 * (spu.init.id * CELL_NUM_BATCH_BUFFERS + buffer);
- uint *dst = spu.init.buffer_status + index;
-
- ASSERT(buffer < CELL_NUM_BATCH_BUFFERS);
-
- /*
- printf("SPU %u: Set batch status buf=%u, index %u, at %p to FREE\n",
- spu.init.id, buffer, index, dst);
- */
-
- mfc_put((void *) &status, /* src in local memory */
- (unsigned int) dst, /* dst in main memory */
- sizeof(status), /* size */
- TAG_MISC, /* tag is unimportant */
- 0, /* tid */
- 0 /* rid */);
-}
-
-
-/**
* Execute a batch of commands
* The opcode param encodes the location of the buffer and its size.
*/
@@ -513,24 +334,24 @@ cmd_batch(uint opcode)
{
const uint buf = (opcode >> 8) & 0xff;
uint size = (opcode >> 16);
- uint buffer[CELL_BATCH_BUFFER_SIZE / 4] ALIGN16_ATTRIB;
- const uint usize = size / sizeof(uint);
+ uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB;
+ const unsigned usize = size / sizeof(buffer[0]);
uint pos;
if (Debug)
printf("SPU %u: BATCH buffer %u, len %u, from %p\n",
- spu.init.id, buf, size, spu.init.batch_buffers[buf]);
+ spu.init.id, buf, size, spu.init.buffers[buf]);
ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
- ASSERT_ALIGN16(spu.init.batch_buffers[buf]);
+ ASSERT_ALIGN16(spu.init.buffers[buf]);
size = ROUNDUP16(size);
- ASSERT_ALIGN16(spu.init.batch_buffers[buf]);
+ ASSERT_ALIGN16(spu.init.buffers[buf]);
mfc_get(buffer, /* dest */
- (unsigned int) spu.init.batch_buffers[buf], /* src */
+ (unsigned int) spu.init.buffers[buf], /* src */
size,
TAG_BATCH_BUFFER,
0, /* tid */
@@ -538,7 +359,9 @@ cmd_batch(uint opcode)
wait_on_mask(1 << TAG_BATCH_BUFFER);
/* Tell PPU we're done copying the buffer to local store */
- release_batch_buffer(buf);
+ if (Debug)
+ printf("SPU %u: release batch buf %u\n", spu.init.id, buf);
+ release_buffer(buf);
for (pos = 0; pos < usize; /* no incr */) {
switch (buffer[pos]) {
@@ -547,7 +370,7 @@ cmd_batch(uint opcode)
struct cell_command_framebuffer *fb
= (struct cell_command_framebuffer *) &buffer[pos];
cmd_state_framebuffer(fb);
- pos += sizeof(*fb) / 4;
+ pos += sizeof(*fb) / 8;
}
break;
case CELL_CMD_CLEAR_SURFACE:
@@ -555,7 +378,7 @@ cmd_batch(uint opcode)
struct cell_command_clear_surface *clr
= (struct cell_command_clear_surface *) &buffer[pos];
cmd_clear_surface(clr);
- pos += sizeof(*clr) / 4;
+ pos += sizeof(*clr) / 8;
}
break;
case CELL_CMD_RENDER:
@@ -564,28 +387,54 @@ cmd_batch(uint opcode)
= (struct cell_command_render *) &buffer[pos];
uint pos_incr;
cmd_render(render, &pos_incr);
- pos += sizeof(*render) / 4 + pos_incr;
+ pos += pos_incr;
+ }
+ break;
+ case CELL_CMD_RELEASE_VERTS:
+ {
+ struct cell_command_release_verts *release
+ = (struct cell_command_release_verts *) &buffer[pos];
+ cmd_release_verts(release);
+ pos += sizeof(*release) / 8;
}
break;
case CELL_CMD_FINISH:
cmd_finish();
pos += 1;
break;
+ case CELL_CMD_STATE_BLEND:
+ cmd_state_blend((struct pipe_blend_state *)
+ &buffer[pos+1]);
+ pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8);
+ break;
case CELL_CMD_STATE_DEPTH_STENCIL:
cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *)
&buffer[pos+1]);
- pos += (1 + sizeof(struct pipe_depth_stencil_alpha_state) / 4);
+ pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8);
break;
case CELL_CMD_STATE_SAMPLER:
cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]);
- pos += (1 + sizeof(struct pipe_sampler_state) / 4);
+ pos += (1 + ROUNDUP8(sizeof(struct pipe_sampler_state)) / 8);
+ break;
+ case CELL_CMD_STATE_TEXTURE:
+ cmd_state_texture((struct cell_command_texture *) &buffer[pos+1]);
+ pos += (1 + ROUNDUP8(sizeof(struct cell_command_texture)) / 8);
break;
case CELL_CMD_STATE_VERTEX_INFO:
cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
- pos += (1 + sizeof(struct vertex_info) / 4);
+ pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8);
+ break;
+ case CELL_CMD_STATE_VIEWPORT:
+ (void) memcpy(& draw.viewport, &buffer[pos+1],
+ sizeof(struct pipe_viewport_state));
+ pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8);
+ break;
+ case CELL_CMD_STATE_VS_ARRAY_INFO:
+ cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
+ pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8);
break;
default:
- printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, buffer[pos]);
+ printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]);
ASSERT(0);
break;
}
@@ -633,31 +482,22 @@ main_loop(void)
0 /* rid */);
wait_on_mask( 1 << tag );
+ /*
+ * NOTE: most commands should be contained in a batch buffer
+ */
+
switch (opcode & CELL_CMD_OPCODE_MASK) {
case CELL_CMD_EXIT:
if (Debug)
printf("SPU %u: EXIT\n", spu.init.id);
exitFlag = 1;
break;
- case CELL_CMD_STATE_FRAMEBUFFER:
- cmd_state_framebuffer(&cmd.fb);
- break;
- case CELL_CMD_CLEAR_SURFACE:
- cmd_clear_surface(&cmd.clear);
- break;
- case CELL_CMD_RENDER:
- {
- uint pos_incr;
- cmd_render(&cmd.render, &pos_incr);
- assert(pos_incr == 0);
- }
+ case CELL_CMD_VS_EXECUTE:
+ spu_execute_vertex_shader(&draw, &cmd.vs);
break;
case CELL_CMD_BATCH:
cmd_batch(opcode);
break;
- case CELL_CMD_FINISH:
- cmd_finish();
- break;
default:
printf("Bad opcode!\n");
}
@@ -673,11 +513,13 @@ main_loop(void)
static void
one_time_init(void)
{
- memset(tile_status, TILE_STATUS_DEFINED, sizeof(tile_status));
- memset(tile_status_z, TILE_STATUS_DEFINED, sizeof(tile_status_z));
+ memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status));
+ memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status));
+ invalidate_tex_cache();
}
+
/* In some versions of the SDK the SPE main takes 'unsigned long' as a
* parameter. In others it takes 'unsigned long long'. Use a define to
* select between the two.
@@ -698,6 +540,9 @@ main(main_param_t speid, main_param_t argp)
(void) speid;
+ ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4);
+ ASSERT(sizeof(struct cell_command_render) % 8 == 0);
+
one_time_init();
if (Debug)
@@ -711,6 +556,10 @@ main(main_param_t speid, main_param_t argp)
0 /* rid */);
wait_on_mask( 1 << tag );
+#if 0
+ if (spu.init.id==0)
+ spu_test_misc();
+#endif
main_loop();
diff --git a/src/mesa/pipe/cell/spu/spu_main.h b/src/mesa/pipe/cell/spu/spu_main.h
index 5bc5d9fa99..1710a17512 100644
--- a/src/mesa/pipe/cell/spu/spu_main.h
+++ b/src/mesa/pipe/cell/spu/spu_main.h
@@ -29,11 +29,33 @@
#define SPU_MAIN_H
+#include <spu_mfcio.h>
+
#include "pipe/cell/common.h"
#include "pipe/draw/draw_vertex.h"
#include "pipe/p_state.h"
+
+#define MAX_WIDTH 1024
+#define MAX_HEIGHT 1024
+
+
+typedef union {
+ ushort us[TILE_SIZE][TILE_SIZE];
+ uint ui[TILE_SIZE][TILE_SIZE];
+ vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4];
+ vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2];
+} tile_t;
+
+
+#define TILE_STATUS_CLEAR 1
+#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */
+#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */
+#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */
+#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */
+
+
struct spu_framebuffer {
void *color_start; /**< addr of color surface in main memory */
void *depth_start; /**< addr of depth surface in main memory */
@@ -57,18 +79,42 @@ struct spu_global
struct cell_init_info init;
struct spu_framebuffer fb;
+ struct pipe_blend_state blend_stencil;
struct pipe_depth_stencil_alpha_state depth_stencil;
struct pipe_blend_state blend;
struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS];
+ struct cell_command_texture texture;
struct vertex_info vertex_info;
/* XXX more state to come */
+
+ /** current color and Z tiles */
+ tile_t ctile ALIGN16_ATTRIB;
+ tile_t ztile ALIGN16_ATTRIB;
+
+ /** Current tiles' status */
+ ubyte cur_ctile_status, cur_ztile_status;
+
+ /** Status of all tiles in framebuffer */
+ ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
+ ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
+
+
+ /** for converting RGBA to PIPE_FORMAT_x colors */
+ vector unsigned char color_shuffle;
+
+ vector float tex_size;
+ vector unsigned int tex_size_mask; /**< == int(size - 1) */
+
+ vector float (*sample_texture)(vector float texcoord);
+
} ALIGN16_ATTRIB;
extern struct spu_global spu;
+extern boolean Debug;
@@ -84,10 +130,30 @@ extern struct spu_global spu;
#define TAG_INDEX_BUFFER 16
#define TAG_BATCH_BUFFER 17
#define TAG_MISC 18
+#define TAG_TEXTURE_TILE 19
+#define TAG_INSTRUCTION_FETCH 20
+
+
+
+static INLINE void
+wait_on_mask(unsigned tagMask)
+{
+ mfc_write_tag_mask( tagMask );
+ /* wait for completion of _any_ DMAs specified by tagMask */
+ mfc_read_tag_status_any();
+}
+
+
+static INLINE void
+wait_on_mask_all(unsigned tagMask)
+{
+ mfc_write_tag_mask( tagMask );
+ /* wait for completion of _any_ DMAs specified by tagMask */
+ mfc_read_tag_status_all();
+}
+
-extern void
-wait_on_mask(unsigned tag);
static INLINE void
diff --git a/src/mesa/pipe/cell/spu/spu_render.c b/src/mesa/pipe/cell/spu/spu_render.c
new file mode 100644
index 0000000000..932fb500b3
--- /dev/null
+++ b/src/mesa/pipe/cell/spu/spu_render.c
@@ -0,0 +1,301 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdio.h>
+#include <libmisc.h>
+#include <spu_mfcio.h>
+
+#include "spu_main.h"
+#include "spu_render.h"
+#include "spu_tri.h"
+#include "spu_tile.h"
+#include "pipe/cell/common.h"
+
+
+
+/**
+ * Given a rendering command's bounding box (in pixels) compute the
+ * location of the corresponding screen tile bounding box.
+ */
+static INLINE void
+tile_bounding_box(const struct cell_command_render *render,
+ uint *txmin, uint *tymin,
+ uint *box_num_tiles, uint *box_width_tiles)
+{
+#if 0
+ /* Debug: full-window bounding box */
+ uint txmax = spu.fb.width_tiles - 1;
+ uint tymax = spu.fb.height_tiles - 1;
+ *txmin = 0;
+ *tymin = 0;
+ *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
+ *box_width_tiles = spu.fb.width_tiles;
+ (void) render;
+ (void) txmax;
+ (void) tymax;
+#else
+ uint txmax, tymax, box_height_tiles;
+
+ *txmin = (uint) render->xmin / TILE_SIZE;
+ *tymin = (uint) render->ymin / TILE_SIZE;
+ txmax = (uint) render->xmax / TILE_SIZE;
+ tymax = (uint) render->ymax / TILE_SIZE;
+ if (txmax >= spu.fb.width_tiles)
+ txmax = spu.fb.width_tiles-1;
+ if (tymax >= spu.fb.height_tiles)
+ tymax = spu.fb.height_tiles-1;
+ *box_width_tiles = txmax - *txmin + 1;
+ box_height_tiles = tymax - *tymin + 1;
+ *box_num_tiles = *box_width_tiles * box_height_tiles;
+#endif
+#if 0
+ printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id,
+ render->xmin, render->ymin, render->xmax, render->ymax);
+ printf("SPU %u: tiles: %u, %u .. %u, %u\n",
+ spu.init.id, *txmin, *tymin, txmax, tymax);
+ ASSERT(render->xmin <= render->xmax);
+ ASSERT(render->ymin <= render->ymax);
+#endif
+}
+
+
+/** Check if the tile at (tx,ty) belongs to this SPU */
+static INLINE boolean
+my_tile(uint tx, uint ty)
+{
+ return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id;
+}
+
+
+/**
+ * Start fetching non-clear color/Z tiles from main memory
+ */
+static INLINE void
+get_cz_tiles(uint tx, uint ty)
+{
+ if (spu.depth_stencil.depth.enabled) {
+ if (spu.cur_ztile_status != TILE_STATUS_CLEAR) {
+ //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty);
+ get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1);
+ spu.cur_ztile_status = TILE_STATUS_GETTING;
+ }
+ }
+
+ if (spu.cur_ctile_status != TILE_STATUS_CLEAR) {
+ //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty);
+ get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0);
+ spu.cur_ctile_status = TILE_STATUS_GETTING;
+ }
+}
+
+
+/**
+ * Start putting dirty color/Z tiles back to main memory
+ */
+static INLINE void
+put_cz_tiles(uint tx, uint ty)
+{
+ if (spu.cur_ztile_status == TILE_STATUS_DIRTY) {
+ /* tile was modified and needs to be written back */
+ //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty);
+ put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1);
+ spu.cur_ztile_status = TILE_STATUS_DEFINED;
+ }
+ else if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
+ /* tile was never used */
+ spu.cur_ztile_status = TILE_STATUS_DEFINED;
+ //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty);
+ }
+
+ if (spu.cur_ctile_status == TILE_STATUS_DIRTY) {
+ /* tile was modified and needs to be written back */
+ //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty);
+ put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0);
+ spu.cur_ctile_status = TILE_STATUS_DEFINED;
+ }
+ else if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
+ /* tile was never used */
+ spu.cur_ctile_status = TILE_STATUS_DEFINED;
+ //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty);
+ }
+}
+
+
+/**
+ * Wait for 'put' of color/z tiles to complete.
+ */
+static INLINE void
+wait_put_cz_tiles(void)
+{
+ wait_on_mask(1 << TAG_WRITE_TILE_COLOR);
+ if (spu.depth_stencil.depth.enabled) {
+ wait_on_mask(1 << TAG_WRITE_TILE_Z);
+ }
+}
+
+
+/**
+ * Render primitives
+ * \param pos_incr returns value indicating how may words to skip after
+ * this command in the batch buffer
+ */
+void
+cmd_render(const struct cell_command_render *render, uint *pos_incr)
+{
+ /* we'll DMA into these buffers */
+ ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB;
+ const uint vertex_size = render->vertex_size; /* in bytes */
+ /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size;
+ uint index_bytes;
+ const ubyte *vertices;
+ const ushort *indexes;
+ uint i, j;
+
+
+ if (Debug) {
+ printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u "
+ "inline_vert=%u\n",
+ spu.init.id,
+ render->prim_type,
+ render->num_verts,
+ render->num_indexes,
+ render->inline_verts);
+
+ /*
+ printf(" bound: %g, %g .. %g, %g\n",
+ render->xmin, render->ymin, render->xmax, render->ymax);
+ */
+ }
+
+ ASSERT(sizeof(*render) % 4 == 0);
+ ASSERT(total_vertex_bytes % 16 == 0);
+ ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES);
+ ASSERT(render->num_indexes % 3 == 0);
+
+
+ /* indexes are right after the render command in the batch buffer */
+ indexes = (const ushort *) (render + 1);
+ index_bytes = ROUNDUP8(render->num_indexes * 2);
+ *pos_incr = index_bytes / 8 + sizeof(*render) / 8;
+
+
+ if (render->inline_verts) {
+ /* Vertices are after indexes in batch buffer at next 16-byte addr */
+ vertices = (const ubyte *) render + (*pos_incr * 8);
+ vertices = (const ubyte *) align_pointer((void *) vertices, 16);
+ ASSERT_ALIGN16(vertices);
+ *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8;
+ }
+ else {
+ /* Begin DMA fetch of vertex buffer */
+ ubyte *src = spu.init.buffers[render->vertex_buf];
+ ubyte *dest = vertex_data;
+
+ /* skip vertex data we won't use */
+#if 01
+ src += render->min_index * vertex_size;
+ dest += render->min_index * vertex_size;
+ total_vertex_bytes -= render->min_index * vertex_size;
+#endif
+ ASSERT(total_vertex_bytes % 16 == 0);
+ ASSERT_ALIGN16(dest);
+ ASSERT_ALIGN16(src);
+
+ mfc_get(dest, /* in vertex_data[] array */
+ (unsigned int) src, /* src in main memory */
+ total_vertex_bytes, /* size */
+ TAG_VERTEX_BUFFER,
+ 0, /* tid */
+ 0 /* rid */);
+
+ vertices = vertex_data;
+
+ wait_on_mask(1 << TAG_VERTEX_BUFFER);
+ }
+
+
+ /**
+ ** find tiles which intersect the prim bounding box
+ **/
+ uint txmin, tymin, box_width_tiles, box_num_tiles;
+ tile_bounding_box(render, &txmin, &tymin,
+ &box_num_tiles, &box_width_tiles);
+
+
+ /* make sure any pending clears have completed */
+ wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */
+
+
+ /**
+ ** loop over tiles, rendering tris
+ **/
+ for (i = 0; i < box_num_tiles; i++) {
+ const uint tx = txmin + i % box_width_tiles;
+ const uint ty = tymin + i / box_width_tiles;
+
+ ASSERT(tx < spu.fb.width_tiles);
+ ASSERT(ty < spu.fb.height_tiles);
+
+ if (!my_tile(tx, ty))
+ continue;
+
+ spu.cur_ctile_status = spu.ctile_status[ty][tx];
+ spu.cur_ztile_status = spu.ztile_status[ty][tx];
+
+ get_cz_tiles(tx, ty);
+
+ uint drawn = 0;
+
+ /* loop over tris */
+ for (j = 0; j < render->num_indexes; j += 3) {
+ const float *v0, *v1, *v2;
+
+ v0 = (const float *) (vertices + indexes[j+0] * vertex_size);
+ v1 = (const float *) (vertices + indexes[j+1] * vertex_size);
+ v2 = (const float *) (vertices + indexes[j+2] * vertex_size);
+
+ drawn += tri_draw(v0, v1, v2, tx, ty);
+ }
+
+ //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3);
+
+ /* write color/z tiles back to main framebuffer, if dirtied */
+ put_cz_tiles(tx, ty);
+
+ wait_put_cz_tiles(); /* XXX seems unnecessary... */
+
+ spu.ctile_status[ty][tx] = spu.cur_ctile_status;
+ spu.ztile_status[ty][tx] = spu.cur_ztile_status;
+ }
+
+ if (Debug)
+ printf("SPU %u: RENDER done\n",
+ spu.init.id);
+}
+
+
diff --git a/src/mesa/pipe/cell/spu/spu_render.h b/src/mesa/pipe/cell/spu/spu_render.h
new file mode 100644
index 0000000000..fbcdc5ec31
--- /dev/null
+++ b/src/mesa/pipe/cell/spu/spu_render.h
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef SPU_RENDER_H
+#define SPU_RENDER_H
+
+#include "pipe/cell/common.h"
+
+extern void
+cmd_render(const struct cell_command_render *render, uint *pos_incr);
+
+#endif /* SPU_RENDER_H */
+
diff --git a/src/mesa/pipe/cell/spu/spu_texture.c b/src/mesa/pipe/cell/spu/spu_texture.c
new file mode 100644
index 0000000000..3962aaa4a9
--- /dev/null
+++ b/src/mesa/pipe/cell/spu/spu_texture.c
@@ -0,0 +1,217 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_compiler.h"
+#include "spu_main.h"
+#include "spu_texture.h"
+#include "spu_tile.h"
+#include "spu_colorpack.h"
+
+
+/**
+ * Number of texture tiles to cache.
+ * Note that this will probably be the largest consumer of SPU local store/
+ * memory for this driver!
+ */
+#define CACHE_SIZE 16
+
+static tile_t tex_tiles[CACHE_SIZE] ALIGN16_ATTRIB;
+
+static vector unsigned int tex_tile_xy[CACHE_SIZE];
+
+
+
+/**
+ * Mark all tex cache entries as invalid.
+ */
+void
+invalidate_tex_cache(void)
+{
+ /* XXX memset? */
+ uint i;
+ for (i = 0; i < CACHE_SIZE; i++) {
+ tex_tile_xy[i] = ((vector unsigned int) { ~0U, ~0U, ~0U, ~0U });
+ }
+}
+
+
+/**
+ * Return the cache pos/index which corresponds to tile (tx,ty)
+ */
+static INLINE uint
+cache_pos(vector unsigned int txty)
+{
+ uint pos = (spu_extract(txty,0) + spu_extract(txty,1) * 4) % CACHE_SIZE;
+ return pos;
+}
+
+
+/**
+ * Make sure the tile for texel (i,j) is present, return its position/index
+ * in the cache.
+ */
+static uint
+get_tex_tile(vector unsigned int ij)
+{
+ /* tile address: tx,ty */
+ const vector unsigned int txty = spu_rlmask(ij, -5); /* divide by 32 */
+ const uint pos = cache_pos(txty);
+
+ if ((spu_extract(tex_tile_xy[pos], 0) != spu_extract(txty, 0)) ||
+ (spu_extract(tex_tile_xy[pos], 1) != spu_extract(txty, 1))) {
+
+ /* texture cache miss, fetch tile from main memory */
+ const uint tiles_per_row = spu.texture.width / TILE_SIZE;
+ const uint bytes_per_tile = sizeof(tile_t);
+ const void *src = (const ubyte *) spu.texture.start
+ + (spu_extract(txty,1) * tiles_per_row + spu_extract(txty,0)) * bytes_per_tile;
+
+ printf("SPU %u: tex cache miss at %d, %d pos=%u old=%d,%d\n",
+ spu.init.id,
+ spu_extract(txty,0),
+ spu_extract(txty,1),
+ pos,
+ spu_extract(tex_tile_xy[pos],0),
+ spu_extract(tex_tile_xy[pos],1));
+
+ ASSERT_ALIGN16(tex_tiles[pos].ui);
+ ASSERT_ALIGN16(src);
+
+ mfc_get(tex_tiles[pos].ui, /* dest */
+ (unsigned int) src,
+ bytes_per_tile, /* size */
+ TAG_TEXTURE_TILE,
+ 0, /* tid */
+ 0 /* rid */);
+
+ wait_on_mask(1 << TAG_TEXTURE_TILE);
+
+ tex_tile_xy[pos] = txty;
+ }
+ else {
+#if 0
+ printf("SPU %u: tex cache HIT at %d, %d\n",
+ spu.init.id, tx, ty);
+#endif
+ }
+
+ return pos;
+}
+
+
+/**
+ * Get texture sample at texcoord.
+ * XXX this is extremely primitive for now.
+ */
+vector float
+sample_texture_nearest(vector float texcoord)
+{
+ vector float tc = spu_mul(texcoord, spu.tex_size);
+ vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */
+ itc = spu_and(itc, spu.tex_size_mask); /* mask (GL_REPEAT) */
+ vector unsigned int ij = spu_and(itc, TILE_SIZE-1); /* intra tile addr */
+ uint pos = get_tex_tile(itc);
+ uint texel = tex_tiles[pos].ui[spu_extract(ij, 1)][spu_extract(ij, 0)];
+ return spu_unpack_A8R8G8B8(texel);
+}
+
+
+vector float
+sample_texture_bilinear(vector float texcoord)
+{
+ static const vector unsigned int offset10 = {1, 0, 0, 0};
+ static const vector unsigned int offset01 = {0, 1, 0, 0};
+
+ vector float tc = spu_mul(texcoord, spu.tex_size);
+ tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */
+
+ /* integer texcoords S,T: */
+ vector unsigned int itc00 = spu_convtu(tc, 0); /* convert to int */
+ vector unsigned int itc01 = spu_add(itc00, offset01);
+ vector unsigned int itc10 = spu_add(itc00, offset10);
+ vector unsigned int itc11 = spu_add(itc10, offset01);
+
+ /* mask (GL_REPEAT) */
+ itc00 = spu_and(itc00, spu.tex_size_mask);
+ itc01 = spu_and(itc01, spu.tex_size_mask);
+ itc10 = spu_and(itc10, spu.tex_size_mask);
+ itc11 = spu_and(itc11, spu.tex_size_mask);
+
+ /* intra tile addr */
+ vector unsigned int ij00 = spu_and(itc00, TILE_SIZE-1);
+ vector unsigned int ij01 = spu_and(itc01, TILE_SIZE-1);
+ vector unsigned int ij10 = spu_and(itc10, TILE_SIZE-1);
+ vector unsigned int ij11 = spu_and(itc11, TILE_SIZE-1);
+
+ /* get tile cache positions */
+ uint pos00 = get_tex_tile(itc00);
+ uint pos01, pos10, pos11;
+ if ((spu_extract(ij00, 0) < TILE_SIZE-1) &&
+ (spu_extract(ij00, 1) < TILE_SIZE-1)) {
+ /* all texels are in the same tile */
+ pos01 = pos10 = pos11 = pos00;
+ }
+ else {
+ pos01 = get_tex_tile(itc01);
+ pos10 = get_tex_tile(itc10);
+ pos11 = get_tex_tile(itc11);
+ }
+
+ /* get texels from tiles and convert to float[4] */
+ vector float texel00 = spu_unpack_A8R8G8B8(tex_tiles[pos00].ui[spu_extract(ij00, 1)][spu_extract(ij00, 0)]);
+ vector float texel01 = spu_unpack_A8R8G8B8(tex_tiles[pos01].ui[spu_extract(ij01, 1)][spu_extract(ij01, 0)]);
+ vector float texel10 = spu_unpack_A8R8G8B8(tex_tiles[pos10].ui[spu_extract(ij10, 1)][spu_extract(ij10, 0)]);
+ vector float texel11 = spu_unpack_A8R8G8B8(tex_tiles[pos11].ui[spu_extract(ij11, 1)][spu_extract(ij11, 0)]);
+
+ /* Compute weighting factors in [0,1]
+ * Multiply texcoord by 1024, AND with 1023, convert back to float.
+ */
+ vector float tc1024 = spu_mul(tc, spu_splats(1024.0f));
+ vector signed int itc1024 = spu_convts(tc1024, 0);
+ itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1));
+ vector float weight = spu_convtf(itc1024, 10);
+
+ /* smeared frac and 1-frac */
+ vector float sfrac = spu_splats(spu_extract(weight, 0));
+ vector float tfrac = spu_splats(spu_extract(weight, 1));
+ vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac);
+ vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac);
+
+ /* multiply the samples (colors) by the S/T weights */
+ texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1);
+ texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1);
+ texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac );
+ texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac );
+
+ /* compute sum of weighted samples */
+ vector float texel_sum = spu_add(texel00, texel01);
+ texel_sum = spu_add(texel_sum, texel10);
+ texel_sum = spu_add(texel_sum, texel11);
+
+ return texel_sum;
+}
diff --git a/src/mesa/pipe/cell/spu/spu_texture.h b/src/mesa/pipe/cell/spu/spu_texture.h
new file mode 100644
index 0000000000..95eb87080f
--- /dev/null
+++ b/src/mesa/pipe/cell/spu/spu_texture.h
@@ -0,0 +1,47 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef SPU_TEXTURE_H
+#define SPU_TEXTURE_H
+
+
+#include "pipe/p_compiler.h"
+
+
+extern void
+invalidate_tex_cache(void);
+
+
+extern vector float
+sample_texture_nearest(vector float texcoord);
+
+
+extern vector float
+sample_texture_bilinear(vector float texcoord);
+
+
+#endif /* SPU_TEXTURE_H */
diff --git a/src/mesa/pipe/cell/spu/spu_tile.c b/src/mesa/pipe/cell/spu/spu_tile.c
index ca1352f9f8..12dc246328 100644
--- a/src/mesa/pipe/cell/spu/spu_tile.c
+++ b/src/mesa/pipe/cell/spu/spu_tile.c
@@ -28,15 +28,7 @@
#include "spu_tile.h"
-
-
-
-tile_t ctile ALIGN16_ATTRIB;
-tile_t ztile ALIGN16_ATTRIB;
-
-ubyte tile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
-ubyte tile_status_z[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
-
+#include "spu_main.h"
void
@@ -55,7 +47,7 @@ get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf)
printf("get_tile: dest: %p src: 0x%x size: %d\n",
tile, (unsigned int) src, bytesPerTile);
*/
- mfc_get(tile->t32, /* dest in local memory */
+ mfc_get(tile->ui, /* dest in local memory */
(unsigned int) src, /* src in main memory */
bytesPerTile,
tag,
@@ -81,7 +73,7 @@ put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf)
spu.init.id,
tile, (unsigned int) dst, bytesPerTile);
*/
- mfc_put((void *) tile->t32, /* src in local memory */
+ mfc_put((void *) tile->ui, /* src in local memory */
(unsigned int) dst, /* dst in main memory */
bytesPerTile,
tag,
diff --git a/src/mesa/pipe/cell/spu/spu_tile.h b/src/mesa/pipe/cell/spu/spu_tile.h
index f83dc009c2..e53340a55a 100644
--- a/src/mesa/pipe/cell/spu/spu_tile.h
+++ b/src/mesa/pipe/cell/spu/spu_tile.h
@@ -35,27 +35,6 @@
#include "pipe/cell/common.h"
-#define MAX_WIDTH 1024
-#define MAX_HEIGHT 1024
-
-
-typedef union {
- ushort t16[TILE_SIZE][TILE_SIZE];
- uint t32[TILE_SIZE][TILE_SIZE];
-} tile_t;
-
-
-extern tile_t ctile ALIGN16_ATTRIB;
-extern tile_t ztile ALIGN16_ATTRIB;
-
-
-#define TILE_STATUS_CLEAR 1
-#define TILE_STATUS_DEFINED 2 /**< defined pixel data */
-#define TILE_STATUS_DIRTY 3 /**< modified, but not put back yet */
-
-extern ubyte tile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
-extern ubyte tile_status_z[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB;
-
void
get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf);
@@ -68,7 +47,7 @@ put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf);
static INLINE void
clear_c_tile(tile_t *ctile)
{
- memset32((uint*) ctile->t32,
+ memset32((uint*) ctile->ui,
spu.fb.color_clear_value,
TILE_SIZE * TILE_SIZE);
}
@@ -78,12 +57,13 @@ static INLINE void
clear_z_tile(tile_t *ztile)
{
if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) {
- memset16((ushort*) ztile->t16,
+ memset16((ushort*) ztile->us,
spu.fb.depth_clear_value,
TILE_SIZE * TILE_SIZE);
}
else {
- memset32((uint*) ztile->t32,
+ ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM);
+ memset32((uint*) ztile->ui,
spu.fb.depth_clear_value,
TILE_SIZE * TILE_SIZE);
}
diff --git a/src/mesa/pipe/cell/spu/spu_tri.c b/src/mesa/pipe/cell/spu/spu_tri.c
index 3d0d106c10..be9624cf7d 100644
--- a/src/mesa/pipe/cell/spu/spu_tri.c
+++ b/src/mesa/pipe/cell/spu/spu_tri.c
@@ -32,22 +32,33 @@
#include "pipe/p_compiler.h"
#include "pipe/p_format.h"
#include "pipe/p_util.h"
+#include "spu_blend.h"
+#include "spu_colorpack.h"
#include "spu_main.h"
+#include "spu_texture.h"
#include "spu_tile.h"
#include "spu_tri.h"
+#include "spu_ztest.h"
+
+
+/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */
+typedef vector unsigned int mask_t;
+
+typedef union
+{
+ vector float v;
+ float f[4];
+} float4;
/**
* Simplified types taken from other parts of Gallium
*/
struct vertex_header {
- float data[0][4];
+ vector float data[1];
};
-struct prim_header {
- struct vertex_header *v[3];
-};
/* XXX fix this */
@@ -82,9 +93,9 @@ struct edge {
struct interp_coef
{
- float a0[4];
- float dadx[4];
- float dady[4];
+ float4 a0;
+ float4 dadx;
+ float4 dady;
};
@@ -133,6 +144,12 @@ struct setup_stage {
};
+
+static struct setup_stage setup;
+
+
+
+
#if 0
/**
* Basically a cast wrapper.
@@ -145,33 +162,33 @@ static INLINE struct setup_stage *setup_stage( struct draw_stage *stage )
#if 0
/**
- * Clip setup->quad against the scissor/surface bounds.
+ * Clip setup.quad against the scissor/surface bounds.
*/
static INLINE void
quad_clip(struct setup_stage *setup)
{
- const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect;
+ const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect;
const int minx = (int) cliprect->minx;
const int maxx = (int) cliprect->maxx;
const int miny = (int) cliprect->miny;
const int maxy = (int) cliprect->maxy;
- if (setup->quad.x0 >= maxx ||
- setup->quad.y0 >= maxy ||
- setup->quad.x0 + 1 < minx ||
- setup->quad.y0 + 1 < miny) {
+ if (setup.quad.x0 >= maxx ||
+ setup.quad.y0 >= maxy ||
+ setup.quad.x0 + 1 < minx ||
+ setup.quad.y0 + 1 < miny) {
/* totally clipped */
- setup->quad.mask = 0x0;
+ setup.quad.mask = 0x0;
return;
}
- if (setup->quad.x0 < minx)
- setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
- if (setup->quad.y0 < miny)
- setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
- if (setup->quad.x0 == maxx - 1)
- setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
- if (setup->quad.y0 == maxy - 1)
- setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
+ if (setup.quad.x0 < minx)
+ setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT);
+ if (setup.quad.y0 < miny)
+ setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT);
+ if (setup.quad.x0 == maxx - 1)
+ setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT);
+ if (setup.quad.y0 == maxy - 1)
+ setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT);
}
#endif
@@ -183,9 +200,9 @@ static INLINE void
clip_emit_quad(struct setup_stage *setup)
{
quad_clip(setup);
- if (setup->quad.mask) {
- struct softpipe_context *sp = setup->softpipe;
- sp->quad.first->run(sp->quad.first, &setup->quad);
+ if (setup.quad.mask) {
+ struct softpipe_context *sp = setup.softpipe;
+ sp->quad.first->run(sp->quad.first, &setup.quad);
}
}
#endif
@@ -196,200 +213,145 @@ clip_emit_quad(struct setup_stage *setup)
* Eg: four colors will be compute.
*/
static INLINE void
-eval_coeff( struct setup_stage *setup, uint slot,
- float x, float y, float result[4][4])
+eval_coeff(uint slot, float x, float y, vector float result[4])
{
- uint i;
- const float *dadx = setup->coef[slot].dadx;
- const float *dady = setup->coef[slot].dady;
-
- /* loop over XYZW comps */
- for (i = 0; i < 4; i++) {
- result[QUAD_TOP_LEFT][i] = setup->coef[slot].a0[i] + x * dadx[i] + y * dady[i];
- result[QUAD_TOP_RIGHT][i] = result[0][i] + dadx[i];
- result[QUAD_BOTTOM_LEFT][i] = result[0][i] + dady[i];
- result[QUAD_BOTTOM_RIGHT][i] = result[0][i] + dadx[i] + dady[i];
+ switch (spu.vertex_info.interp_mode[slot]) {
+ case INTERP_CONSTANT:
+ result[QUAD_TOP_LEFT] =
+ result[QUAD_TOP_RIGHT] =
+ result[QUAD_BOTTOM_LEFT] =
+ result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v;
+ break;
+
+ case INTERP_LINEAR:
+ /* fall-through, for now */
+ default:
+ {
+ register vector float dadx = setup.coef[slot].dadx.v;
+ register vector float dady = setup.coef[slot].dady.v;
+ register vector float topLeft
+ = spu_add(setup.coef[slot].a0.v,
+ spu_add(spu_mul(spu_splats(x), dadx),
+ spu_mul(spu_splats(y), dady)));
+
+ result[QUAD_TOP_LEFT] = topLeft;
+ result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx);
+ result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady);
+ result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady);
+ }
}
}
-static INLINE void
-eval_z( struct setup_stage *setup,
- float x, float y, float result[4])
+static INLINE vector float
+eval_z(float x, float y)
{
const uint slot = 0;
- const uint i = 2;
- const float *dadx = setup->coef[slot].dadx;
- const float *dady = setup->coef[slot].dady;
-
- result[QUAD_TOP_LEFT] = setup->coef[slot].a0[i] + x * dadx[i] + y * dady[i];
- result[QUAD_TOP_RIGHT] = result[0] + dadx[i];
- result[QUAD_BOTTOM_LEFT] = result[0] + dady[i];
- result[QUAD_BOTTOM_RIGHT] = result[0] + dadx[i] + dady[i];
+ const float dzdx = setup.coef[slot].dadx.f[2];
+ const float dzdy = setup.coef[slot].dady.f[2];
+ const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy;
+ const vector float topLeftv = spu_splats(topLeft);
+ const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy };
+ return spu_add(topLeftv, derivs);
}
-static INLINE uint
-pack_color(const float color[4])
+static INLINE mask_t
+do_depth_test(int x, int y, mask_t quadmask)
{
- uint r = (uint) (color[0] * 255.0);
- uint g = (uint) (color[1] * 255.0);
- uint b = (uint) (color[2] * 255.0);
- uint a = (uint) (color[3] * 255.0);
- r = MIN2(r, 255);
- g = MIN2(g, 255);
- b = MIN2(b, 255);
- a = MIN2(a, 255);
- switch (spu.fb.color_format) {
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- return (a << 24) | (r << 16) | (g << 8) | b;
- case PIPE_FORMAT_B8G8R8A8_UNORM:
- return (b << 24) | (g << 16) | (r << 8) | a;
- default:
- ASSERT(0);
- return 0;
- }
-}
-
-
-static uint
-do_depth_test(struct setup_stage *setup, int x, int y, unsigned mask)
-{
- int ix = x - setup->cliprect_minx;
- int iy = y - setup->cliprect_miny;
- float zvals[4];
-
- eval_z(setup, (float) x, (float) y, zvals);
-
- if (tile_status_z[setup->ty][setup->tx] == TILE_STATUS_CLEAR) {
- /* now, _really_ clear the tile */
- clear_z_tile(&ztile);
- }
- else {
- /* make sure we've got the tile from main mem */
- wait_on_mask(1 << TAG_READ_TILE_Z);
- }
- tile_status_z[setup->ty][setup->tx] = TILE_STATUS_DIRTY;
+ float4 zvals;
+ mask_t mask;
+ zvals.v = eval_z((float) x, (float) y);
if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) {
- const float zscale = 65535.0;
- if (mask & MASK_TOP_LEFT) {
- uint z = (uint) (zvals[0] * zscale);
- if (z < ztile.t16[iy][ix])
- ztile.t16[iy][ix] = z;
- else
- mask &= ~MASK_TOP_LEFT;
- }
-
- if (mask & MASK_TOP_RIGHT) {
- uint z = (uint) (zvals[1] * zscale);
- if (z < ztile.t16[iy][ix+1])
- ztile.t16[iy][ix+1] = z;
- else
- mask &= ~MASK_TOP_RIGHT;
- }
-
- if (mask & MASK_BOTTOM_LEFT) {
- uint z = (uint) (zvals[2] * zscale);
- if (z < ztile.t16[iy+1][ix])
- ztile.t16[iy+1][ix] = z;
- else
- mask &= ~MASK_BOTTOM_LEFT;
- }
-
- if (mask & MASK_BOTTOM_RIGHT) {
- uint z = (uint) (zvals[3] * zscale);
- if (z < ztile.t16[iy+1][ix+1])
- ztile.t16[iy+1][ix+1] = z;
- else
- mask &= ~MASK_BOTTOM_RIGHT;
- }
+ int ix = (x - setup.cliprect_minx) / 4;
+ int iy = (y - setup.cliprect_miny) / 2;
+ mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask);
}
else {
- const float zscale = (float) 0xffffffff;
- ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM);
- if (mask & MASK_TOP_LEFT) {
- uint z = (uint) (zvals[0] * zscale);
- if (z < ztile.t32[iy][ix])
- ztile.t32[iy][ix] = z;
- else
- mask &= ~MASK_TOP_LEFT;
- }
-
- if (mask & MASK_TOP_RIGHT) {
- uint z = (uint) (zvals[1] * zscale);
- if (z < ztile.t32[iy][ix+1])
- ztile.t32[iy][ix+1] = z;
- else
- mask &= ~MASK_TOP_RIGHT;
- }
-
- if (mask & MASK_BOTTOM_LEFT) {
- uint z = (uint) (zvals[2] * zscale);
- if (z < ztile.t32[iy+1][ix])
- ztile.t32[iy+1][ix] = z;
- else
- mask &= ~MASK_BOTTOM_LEFT;
- }
-
- if (mask & MASK_BOTTOM_RIGHT) {
- uint z = (uint) (zvals[3] * zscale);
- if (z < ztile.t32[iy+1][ix+1])
- ztile.t32[iy+1][ix+1] = z;
- else
- mask &= ~MASK_BOTTOM_RIGHT;
- }
+ int ix = (x - setup.cliprect_minx) / 2;
+ int iy = (y - setup.cliprect_miny) / 2;
+ mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask);
}
+ if (spu_extract(spu_orx(mask), 0))
+ spu.cur_ztile_status = TILE_STATUS_DIRTY;
+
return mask;
}
/**
* Emit a quad (pass to next stage). No clipping is done.
+ * Note: about 1/5 to 1/7 of the time, mask is zero and this function
+ * should be skipped. But adding the test for that slows things down
+ * overall.
*/
static INLINE void
-emit_quad( struct setup_stage *setup, int x, int y, unsigned mask )
+emit_quad( int x, int y, mask_t mask )
{
#if 0
- struct softpipe_context *sp = setup->softpipe;
- setup->quad.x0 = x;
- setup->quad.y0 = y;
- setup->quad.mask = mask;
- sp->quad.first->run(sp->quad.first, &setup->quad);
+ struct softpipe_context *sp = setup.softpipe;
+ setup.quad.x0 = x;
+ setup.quad.y0 = y;
+ setup.quad.mask = mask;
+ sp->quad.first->run(sp->quad.first, &setup.quad);
#else
- /* Cell: "write" quad fragments to the tile by setting prim color */
- const int ix = x - setup->cliprect_minx;
- const int iy = y - setup->cliprect_miny;
- float colors[4][4];
-
- eval_coeff(setup, 1, (float) x, (float) y, colors);
if (spu.depth_stencil.depth.enabled) {
- mask &= do_depth_test(setup, x, y, mask);
+ mask = do_depth_test(x, y, mask);
}
- if (mask) {
- if (tile_status[setup->ty][setup->tx] == TILE_STATUS_CLEAR) {
- /* now, _really_ clear the tile */
- clear_c_tile(&ctile);
+ /* If any bits in mask are set... */
+ if (spu_extract(spu_orx(mask), 0)) {
+ const int ix = x - setup.cliprect_minx;
+ const int iy = y - setup.cliprect_miny;
+ const vector unsigned char shuffle = spu.color_shuffle;
+ vector float colors[4];
+
+ spu.cur_ctile_status = TILE_STATUS_DIRTY;
+
+ if (spu.texture.start) {
+ /* texture mapping */
+ vector float texcoords[4];
+ eval_coeff(2, (float) x, (float) y, texcoords);
+
+ if (spu_extract(mask, 0))
+ colors[0] = spu.sample_texture(texcoords[0]);
+ if (spu_extract(mask, 1))
+ colors[1] = spu.sample_texture(texcoords[1]);
+ if (spu_extract(mask, 2))
+ colors[2] = spu.sample_texture(texcoords[2]);
+ if (spu_extract(mask, 3))
+ colors[3] = spu.sample_texture(texcoords[3]);
}
else {
- /* make sure we've got the tile from main mem */
- wait_on_mask(1 << TAG_READ_TILE_COLOR);
+ /* simple shading */
+ eval_coeff(1, (float) x, (float) y, colors);
}
- tile_status[setup->ty][setup->tx] = TILE_STATUS_DIRTY;
-
- if (mask & MASK_TOP_LEFT)
- ctile.t32[iy][ix] = pack_color(colors[QUAD_TOP_LEFT]);
- if (mask & MASK_TOP_RIGHT)
- ctile.t32[iy][ix+1] = pack_color(colors[QUAD_TOP_RIGHT]);
- if (mask & MASK_BOTTOM_LEFT)
- ctile.t32[iy+1][ix] = pack_color(colors[QUAD_BOTTOM_LEFT]);
- if (mask & MASK_BOTTOM_RIGHT)
- ctile.t32[iy+1][ix+1] = pack_color(colors[QUAD_BOTTOM_RIGHT]);
+
+#if 1
+ if (spu.blend.blend_enable)
+ blend_quad(ix % TILE_SIZE, iy % TILE_SIZE, colors);
+#endif
+
+ if (spu_extract(mask, 0))
+ spu.ctile.ui[iy][ix] = spu_pack_color_shuffle(colors[0], shuffle);
+ if (spu_extract(mask, 1))
+ spu.ctile.ui[iy][ix+1] = spu_pack_color_shuffle(colors[1], shuffle);
+ if (spu_extract(mask, 2))
+ spu.ctile.ui[iy+1][ix] = spu_pack_color_shuffle(colors[2], shuffle);
+ if (spu_extract(mask, 3))
+ spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(colors[3], shuffle);
+
+#if 0
+ /* SIMD_Z with swizzled color buffer (someday) */
+ vector unsigned int uicolors = *((vector unsigned int *) &colors);
+ spu.ctile.ui4[iy/2][ix/2] = spu_sel(spu.ctile.ui4[iy/2][ix/2], uicolors, mask);
+#endif
}
+
#endif
}
@@ -407,26 +369,19 @@ static INLINE int block( int x )
/**
* Compute mask which indicates which pixels in the 2x2 quad are actually inside
* the triangle's bounds.
- *
- * this is pretty nasty... may need to rework flush_spans again to
- * fix it, if possible.
+ * The mask is a uint4 vector and each element will be 0 or 0xffffffff.
*/
-static unsigned calculate_mask( struct setup_stage *setup, int x )
+static INLINE mask_t calculate_mask( int x )
{
- unsigned mask = 0x0;
-
- if (x >= setup->span.left[0] && x < setup->span.right[0])
- mask |= MASK_TOP_LEFT;
-
- if (x >= setup->span.left[1] && x < setup->span.right[1])
- mask |= MASK_BOTTOM_LEFT;
-
- if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0])
- mask |= MASK_TOP_RIGHT;
-
- if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1])
- mask |= MASK_BOTTOM_RIGHT;
-
+ /* This is a little tricky.
+ * Use & instead of && to avoid branches.
+ * Use negation to convert true/false to ~0/0 values.
+ */
+ mask_t mask;
+ mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0);
+ mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1);
+ mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2);
+ mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3);
return mask;
}
@@ -434,144 +389,175 @@ static unsigned calculate_mask( struct setup_stage *setup, int x )
/**
* Render a horizontal span of quads
*/
-static void flush_spans( struct setup_stage *setup )
+static void flush_spans( void )
{
int minleft, maxright;
int x;
- switch (setup->span.y_flags) {
+ switch (setup.span.y_flags) {
case 0x3:
/* both odd and even lines written (both quad rows) */
- minleft = MIN2(setup->span.left[0], setup->span.left[1]);
- maxright = MAX2(setup->span.right[0], setup->span.right[1]);
+ minleft = MIN2(setup.span.left[0], setup.span.left[1]);
+ maxright = MAX2(setup.span.right[0], setup.span.right[1]);
break;
case 0x1:
/* only even line written (quad top row) */
- minleft = setup->span.left[0];
- maxright = setup->span.right[0];
+ minleft = setup.span.left[0];
+ maxright = setup.span.right[0];
break;
case 0x2:
/* only odd line written (quad bottom row) */
- minleft = setup->span.left[1];
- maxright = setup->span.right[1];
+ minleft = setup.span.left[1];
+ maxright = setup.span.right[1];
break;
default:
return;
}
+
+ /* OK, we're very likely to need the tile data now.
+ * clear or finish waiting if needed.
+ */
+ if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
+ /* wait for mfc_get() to complete */
+ //printf("SPU: %u: waiting for ctile\n", spu.init.id);
+ wait_on_mask(1 << TAG_READ_TILE_COLOR);
+ spu.cur_ctile_status = TILE_STATUS_CLEAN;
+ }
+ else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) {
+ //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
+ clear_c_tile(&spu.ctile);
+ spu.cur_ctile_status = TILE_STATUS_DIRTY;
+ }
+ ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
+
+ if (spu.depth_stencil.depth.enabled) {
+ if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
+ /* wait for mfc_get() to complete */
+ //printf("SPU: %u: waiting for ztile\n", spu.init.id);
+ wait_on_mask(1 << TAG_READ_TILE_Z);
+ spu.cur_ztile_status = TILE_STATUS_CLEAN;
+ }
+ else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) {
+ //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty);
+ clear_z_tile(&spu.ztile);
+ spu.cur_ztile_status = TILE_STATUS_DIRTY;
+ }
+ ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED);
+ }
+
/* XXX this loop could be moved into the above switch cases and
* calculate_mask() could be simplified a bit...
*/
for (x = block(minleft); x <= block(maxright); x += 2) {
- emit_quad( setup, x, setup->span.y,
- calculate_mask( setup, x ) );
+#if 1
+ emit_quad( x, setup.span.y, calculate_mask( x ) );
+#endif
}
- setup->span.y = 0;
- setup->span.y_flags = 0;
- setup->span.right[0] = 0;
- setup->span.right[1] = 0;
+ setup.span.y = 0;
+ setup.span.y_flags = 0;
+ setup.span.right[0] = 0;
+ setup.span.right[1] = 0;
}
#if DEBUG_VERTS
-static void print_vertex(const struct setup_stage *setup,
- const struct vertex_header *v)
+static void print_vertex(const struct vertex_header *v)
{
int i;
fprintf(stderr, "Vertex: (%p)\n", v);
- for (i = 0; i < setup->quad.nr_attrs; i++) {
+ for (i = 0; i < setup.quad.nr_attrs; i++) {
fprintf(stderr, " %d: %f %f %f %f\n", i,
v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]);
}
}
#endif
-static boolean setup_sort_vertices( struct setup_stage *setup,
- const struct prim_header *prim )
+
+static boolean setup_sort_vertices(const struct vertex_header *v0,
+ const struct vertex_header *v1,
+ const struct vertex_header *v2)
{
- const struct vertex_header *v0 = prim->v[0];
- const struct vertex_header *v1 = prim->v[1];
- const struct vertex_header *v2 = prim->v[2];
#if DEBUG_VERTS
fprintf(stderr, "Triangle:\n");
- print_vertex(setup, v0);
- print_vertex(setup, v1);
- print_vertex(setup, v2);
+ print_vertex(v0);
+ print_vertex(v1);
+ print_vertex(v2);
#endif
- setup->vprovoke = v2;
+ setup.vprovoke = v2;
/* determine bottom to top order of vertices */
{
- float y0 = v0->data[0][1];
- float y1 = v1->data[0][1];
- float y2 = v2->data[0][1];
+ float y0 = spu_extract(v0->data[0], 1);
+ float y1 = spu_extract(v1->data[0], 1);
+ float y2 = spu_extract(v2->data[0], 1);
if (y0 <= y1) {
if (y1 <= y2) {
/* y0<=y1<=y2 */
- setup->vmin = v0;
- setup->vmid = v1;
- setup->vmax = v2;
+ setup.vmin = v0;
+ setup.vmid = v1;
+ setup.vmax = v2;
}
else if (y2 <= y0) {
/* y2<=y0<=y1 */
- setup->vmin = v2;
- setup->vmid = v0;
- setup->vmax = v1;
+ setup.vmin = v2;
+ setup.vmid = v0;
+ setup.vmax = v1;
}
else {
/* y0<=y2<=y1 */
- setup->vmin = v0;
- setup->vmid = v2;
- setup->vmax = v1;
+ setup.vmin = v0;
+ setup.vmid = v2;
+ setup.vmax = v1;
}
}
else {
if (y0 <= y2) {
/* y1<=y0<=y2 */
- setup->vmin = v1;
- setup->vmid = v0;
- setup->vmax = v2;
+ setup.vmin = v1;
+ setup.vmid = v0;
+ setup.vmax = v2;
}
else if (y2 <= y1) {
/* y2<=y1<=y0 */
- setup->vmin = v2;
- setup->vmid = v1;
- setup->vmax = v0;
+ setup.vmin = v2;
+ setup.vmid = v1;
+ setup.vmax = v0;
}
else {
/* y1<=y2<=y0 */
- setup->vmin = v1;
- setup->vmid = v2;
- setup->vmax = v0;
+ setup.vmin = v1;
+ setup.vmid = v2;
+ setup.vmax = v0;
}
}
}
/* Check if triangle is completely outside the tile bounds */
- if (setup->vmin->data[0][1] > setup->cliprect_maxy)
+ if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy)
return FALSE;
- if (setup->vmax->data[0][1] < setup->cliprect_miny)
+ if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny)
return FALSE;
- if (setup->vmin->data[0][0] < setup->cliprect_minx &&
- setup->vmid->data[0][0] < setup->cliprect_minx &&
- setup->vmax->data[0][0] < setup->cliprect_minx)
+ if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx &&
+ spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx &&
+ spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx)
return FALSE;
- if (setup->vmin->data[0][0] > setup->cliprect_maxx &&
- setup->vmid->data[0][0] > setup->cliprect_maxx &&
- setup->vmax->data[0][0] > setup->cliprect_maxx)
+ if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx &&
+ spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx &&
+ spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx)
return FALSE;
- setup->ebot.dx = setup->vmid->data[0][0] - setup->vmin->data[0][0];
- setup->ebot.dy = setup->vmid->data[0][1] - setup->vmin->data[0][1];
- setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0];
- setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1];
- setup->etop.dx = setup->vmax->data[0][0] - setup->vmid->data[0][0];
- setup->etop.dy = setup->vmax->data[0][1] - setup->vmid->data[0][1];
+ setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
+ setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
+ setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0);
+ setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1);
+ setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0);
+ setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1);
/*
* Compute triangle's area. Use 1/area to compute partial
@@ -584,13 +570,13 @@ static boolean setup_sort_vertices( struct setup_stage *setup,
* use the prim->det value because its sign is correct.
*/
{
- const float area = (setup->emaj.dx * setup->ebot.dy -
- setup->ebot.dx * setup->emaj.dy);
+ const float area = (setup.emaj.dx * setup.ebot.dy -
+ setup.ebot.dx * setup.emaj.dy);
- setup->oneoverarea = 1.0f / area;
+ setup.oneoverarea = 1.0f / area;
/*
_mesa_printf("%s one-over-area %f area %f det %f\n",
- __FUNCTION__, setup->oneoverarea, area, prim->det );
+ __FUNCTION__, setup.oneoverarea, area, prim->det );
*/
}
@@ -599,56 +585,52 @@ static boolean setup_sort_vertices( struct setup_stage *setup,
* - the GLSL gl_FrontFacing fragment attribute (bool)
* - two-sided stencil test
*/
- setup->quad.facing = (prim->det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
+ setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW);
#endif
return TRUE;
}
-#if 0
/**
* Compute a0 for a constant-valued coefficient (GL_FLAT shading).
- * The value value comes from vertex->data[slot][i].
- * The result will be put into setup->coef[slot].a0[i].
+ * The value value comes from vertex->data[slot].
+ * The result will be put into setup.coef[slot].a0.
* \param slot which attribute slot
- * \param i which component of the slot (0..3)
*/
-static void const_coeff( struct setup_stage *setup,
- unsigned slot,
- unsigned i )
+static INLINE void
+const_coeff(uint slot)
{
- assert(slot < PIPE_MAX_SHADER_INPUTS);
- assert(i <= 3);
-
- setup->coef[slot].dadx[i] = 0;
- setup->coef[slot].dady[i] = 0;
-
- /* need provoking vertex info!
- */
- setup->coef[slot].a0[i] = setup->vprovoke->data[slot][i];
+ setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0};
+ setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0};
+ setup.coef[slot].a0.v = setup.vprovoke->data[slot];
}
-#endif
/**
* Compute a0, dadx and dady for a linearly interpolated coefficient,
* for a triangle.
*/
-static void tri_linear_coeff( struct setup_stage *setup,
- uint slot, uint firstComp, uint lastComp )
+static INLINE void
+tri_linear_coeff(uint slot, uint firstComp, uint lastComp)
{
uint i;
+ const float *vmin_d = (float *) &setup.vmin->data[slot];
+ const float *vmid_d = (float *) &setup.vmid->data[slot];
+ const float *vmax_d = (float *) &setup.vmax->data[slot];
+ const float x = spu_extract(setup.vmin->data[0], 0) - 0.5f;
+ const float y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
+
for (i = firstComp; i < lastComp; i++) {
- float botda = setup->vmid->data[slot][i] - setup->vmin->data[slot][i];
- float majda = setup->vmax->data[slot][i] - setup->vmin->data[slot][i];
- float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
- float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
+ float botda = vmid_d[i] - vmin_d[i];
+ float majda = vmax_d[i] - vmin_d[i];
+ float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
+ float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
ASSERT(slot < PIPE_MAX_SHADER_INPUTS);
- setup->coef[slot].dadx[i] = a * setup->oneoverarea;
- setup->coef[slot].dady[i] = b * setup->oneoverarea;
+ setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
+ setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
/* calculate a0 as the value which would be sampled for the
* fragment at (0,0), taking into account that we want to sample at
@@ -662,21 +644,52 @@ static void tri_linear_coeff( struct setup_stage *setup,
* to define a0 as the sample at a pixel center somewhere near vmin
* instead - i'll switch to this later.
*/
- setup->coef[slot].a0[i] = (setup->vmin->data[slot][i] -
- (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5f) +
- setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5f)));
+ setup.coef[slot].a0.f[i] = (vmin_d[i] -
+ (setup.coef[slot].dadx.f[i] * x +
+ setup.coef[slot].dady.f[i] * y));
}
/*
_mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
slot, "xyzw"[i],
- setup->coef[slot].a0[i],
- setup->coef[slot].dadx[i],
- setup->coef[slot].dady[i]);
+ setup.coef[slot].a0[i],
+ setup.coef[slot].dadx.f[i],
+ setup.coef[slot].dady.f[i]);
*/
}
+/**
+ * As above, but interp setup all four vector components.
+ */
+static INLINE void
+tri_linear_coeff4(uint slot)
+{
+ const vector float vmin_d = setup.vmin->data[slot];
+ const vector float vmid_d = setup.vmid->data[slot];
+ const vector float vmax_d = setup.vmax->data[slot];
+ const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f);
+ const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f);
+
+ vector float botda = vmid_d - vmin_d;
+ vector float majda = vmax_d - vmin_d;
+
+ vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda),
+ spu_mul(botda, spu_splats(setup.emaj.dy)));
+ vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda),
+ spu_mul(majda, spu_splats(setup.ebot.dx)));
+
+ setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea));
+ setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea));
+
+ vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx);
+ vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy);
+
+ setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy));
+}
+
+
+
#if 0
/**
* Compute a0, dadx and dady for a perspective-corrected interpolant,
@@ -686,46 +699,45 @@ static void tri_linear_coeff( struct setup_stage *setup,
* Later, when we compute the value at a particular fragment position we'll
* divide the interpolated value by the interpolated W at that fragment.
*/
-static void tri_persp_coeff( struct setup_stage *setup,
- unsigned slot,
+static void tri_persp_coeff( unsigned slot,
unsigned i )
{
/* premultiply by 1/w:
*/
- float mina = setup->vmin->data[slot][i] * setup->vmin->data[0][3];
- float mida = setup->vmid->data[slot][i] * setup->vmid->data[0][3];
- float maxa = setup->vmax->data[slot][i] * setup->vmax->data[0][3];
+ float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3];
+ float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3];
+ float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3];
float botda = mida - mina;
float majda = maxa - mina;
- float a = setup->ebot.dy * majda - botda * setup->emaj.dy;
- float b = setup->emaj.dx * botda - majda * setup->ebot.dx;
+ float a = setup.ebot.dy * majda - botda * setup.emaj.dy;
+ float b = setup.emaj.dx * botda - majda * setup.ebot.dx;
/*
printf("tri persp %d,%d: %f %f %f\n", slot, i,
- setup->vmin->data[slot][i],
- setup->vmid->data[slot][i],
- setup->vmax->data[slot][i]
+ setup.vmin->data[slot][i],
+ setup.vmid->data[slot][i],
+ setup.vmax->data[slot][i]
);
*/
assert(slot < PIPE_MAX_SHADER_INPUTS);
assert(i <= 3);
- setup->coef[slot].dadx[i] = a * setup->oneoverarea;
- setup->coef[slot].dady[i] = b * setup->oneoverarea;
- setup->coef[slot].a0[i] = (mina -
- (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5f) +
- setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5f)));
+ setup.coef[slot].dadx.f[i] = a * setup.oneoverarea;
+ setup.coef[slot].dady.f[i] = b * setup.oneoverarea;
+ setup.coef[slot].a0.f[i] = (mina -
+ (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) +
+ setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f)));
}
#endif
/**
- * Compute the setup->coef[] array dadx, dady, a0 values.
- * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized.
+ * Compute the setup.coef[] array dadx, dady, a0 values.
+ * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized.
*/
-static void setup_tri_coefficients( struct setup_stage *setup )
+static void setup_tri_coefficients(void)
{
#if 1
uint i;
@@ -735,15 +747,18 @@ static void setup_tri_coefficients( struct setup_stage *setup )
case INTERP_NONE:
break;
case INTERP_POS:
- tri_linear_coeff(setup, i, 2, 3); /* slot 0, z */
+ /*tri_linear_coeff(i, 2, 3);*/
/* XXX interp W if PERSPECTIVE... */
+ tri_linear_coeff4(i);
break;
case INTERP_CONSTANT:
- /* fall-through */
+ const_coeff(i);
+ break;
case INTERP_LINEAR:
- tri_linear_coeff(setup, i, 0, 4); /* slot 1, color */
+ tri_linear_coeff4(i);
break;
case INTERP_PERSPECTIVE:
+ tri_linear_coeff4(i); /* temporary */
break;
default:
ASSERT(0);
@@ -753,35 +768,35 @@ static void setup_tri_coefficients( struct setup_stage *setup )
ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS);
ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR ||
spu.vertex_info.interp_mode[1] == INTERP_CONSTANT);
- tri_linear_coeff(setup, 0, 2, 3); /* slot 0, z */
- tri_linear_coeff(setup, 1, 0, 4); /* slot 1, color */
+ tri_linear_coeff(0, 2, 3); /* slot 0, z */
+ tri_linear_coeff(1, 0, 4); /* slot 1, color */
#endif
}
-static void setup_tri_edges( struct setup_stage *setup )
+static void setup_tri_edges(void)
{
- float vmin_x = setup->vmin->data[0][0] + 0.5f;
- float vmid_x = setup->vmid->data[0][0] + 0.5f;
-
- float vmin_y = setup->vmin->data[0][1] - 0.5f;
- float vmid_y = setup->vmid->data[0][1] - 0.5f;
- float vmax_y = setup->vmax->data[0][1] - 0.5f;
-
- setup->emaj.sy = CEILF(vmin_y);
- setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy);
- setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy;
- setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy;
-
- setup->etop.sy = CEILF(vmid_y);
- setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy);
- setup->etop.dxdy = setup->etop.dx / setup->etop.dy;
- setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy;
-
- setup->ebot.sy = CEILF(vmin_y);
- setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy);
- setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy;
- setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy;
+ float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f;
+ float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f;
+
+ float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f;
+ float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f;
+ float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f;
+
+ setup.emaj.sy = CEILF(vmin_y);
+ setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy);
+ setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy;
+ setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy;
+
+ setup.etop.sy = CEILF(vmid_y);
+ setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy);
+ setup.etop.dxdy = setup.etop.dx / setup.etop.dy;
+ setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy;
+
+ setup.ebot.sy = CEILF(vmin_y);
+ setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy);
+ setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy;
+ setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy;
}
@@ -789,15 +804,14 @@ static void setup_tri_edges( struct setup_stage *setup )
* Render the upper or lower half of a triangle.
* Scissoring/cliprect is applied here too.
*/
-static void subtriangle( struct setup_stage *setup,
- struct edge *eleft,
+static void subtriangle( struct edge *eleft,
struct edge *eright,
unsigned lines )
{
- const int minx = setup->cliprect_minx;
- const int maxx = setup->cliprect_maxx;
- const int miny = setup->cliprect_miny;
- const int maxy = setup->cliprect_maxy;
+ const int minx = setup.cliprect_minx;
+ const int maxx = setup.cliprect_maxx;
+ const int miny = setup.cliprect_miny;
+ const int maxy = setup.cliprect_maxy;
int y, start_y, finish_y;
int sy = (int)eleft->sy;
@@ -839,14 +853,14 @@ static void subtriangle( struct setup_stage *setup,
if (left < right) {
int _y = sy + y;
- if (block(_y) != setup->span.y) {
- flush_spans(setup);
- setup->span.y = block(_y);
+ if (block(_y) != setup.span.y) {
+ flush_spans();
+ setup.span.y = block(_y);
}
- setup->span.left[_y&1] = left;
- setup->span.right[_y&1] = right;
- setup->span.y_flags |= 1<<(_y&1);
+ setup.span.left[_y&1] = left;
+ setup.span.right[_y&1] = right;
+ setup.span.y_flags |= 1<<(_y&1);
}
}
@@ -861,70 +875,52 @@ static void subtriangle( struct setup_stage *setup,
/**
- * Do setup for triangle rasterization, then render the triangle.
+ * Draw triangle into tile at (tx, ty) (tile coords)
+ * The tile data should have already been fetched.
*/
-static void
-setup_tri(struct setup_stage *setup, struct prim_header *prim)
+boolean
+tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty)
{
- if (!setup_sort_vertices( setup, prim )) {
- return; /* totally clipped */
- }
+ setup.tx = tx;
+ setup.ty = ty;
- setup_tri_coefficients( setup );
- setup_tri_edges( setup );
+ /* set clipping bounds to tile bounds */
+ setup.cliprect_minx = tx * TILE_SIZE;
+ setup.cliprect_miny = ty * TILE_SIZE;
+ setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
+ setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
-#if 0
- setup->quad.prim = PRIM_TRI;
-#endif
+ if (!setup_sort_vertices((struct vertex_header *) v0,
+ (struct vertex_header *) v1,
+ (struct vertex_header *) v2)) {
+ return FALSE; /* totally clipped */
+ }
+
+ setup_tri_coefficients();
+ setup_tri_edges();
- setup->span.y = 0;
- setup->span.y_flags = 0;
- setup->span.right[0] = 0;
- setup->span.right[1] = 0;
- /* setup->span.z_mode = tri_z_mode( setup->ctx ); */
+ setup.span.y = 0;
+ setup.span.y_flags = 0;
+ setup.span.right[0] = 0;
+ setup.span.right[1] = 0;
+ /* setup.span.z_mode = tri_z_mode( setup.ctx ); */
/* init_constant_attribs( setup ); */
- if (setup->oneoverarea < 0.0) {
+ if (setup.oneoverarea < 0.0) {
/* emaj on left:
*/
- subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines );
- subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines );
+ subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines );
+ subtriangle( &setup.emaj, &setup.etop, setup.etop.lines );
}
else {
/* emaj on right:
*/
- subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines );
- subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines );
+ subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines );
+ subtriangle( &setup.etop, &setup.emaj, setup.etop.lines );
}
- flush_spans( setup );
-}
-
+ flush_spans();
-
-/**
- * Draw triangle into tile at (tx, ty) (tile coords)
- * The tile data should have already been fetched.
- */
-void
-tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty)
-{
- struct prim_header tri;
- struct setup_stage setup;
-
- tri.v[0] = (struct vertex_header *) v0;
- tri.v[1] = (struct vertex_header *) v1;
- tri.v[2] = (struct vertex_header *) v2;
-
- setup.tx = tx;
- setup.ty = ty;
-
- /* set clipping bounds to tile bounds */
- setup.cliprect_minx = tx * TILE_SIZE;
- setup.cliprect_miny = ty * TILE_SIZE;
- setup.cliprect_maxx = (tx + 1) * TILE_SIZE;
- setup.cliprect_maxy = (ty + 1) * TILE_SIZE;
-
- setup_tri(&setup, &tri);
+ return TRUE;
}
diff --git a/src/mesa/pipe/cell/spu/spu_tri.h b/src/mesa/pipe/cell/spu/spu_tri.h
index 86c42b6339..aa694dd7c9 100644
--- a/src/mesa/pipe/cell/spu/spu_tri.h
+++ b/src/mesa/pipe/cell/spu/spu_tri.h
@@ -30,7 +30,7 @@
#define SPU_TRI_H
-extern void
+extern boolean
tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty);
diff --git a/src/mesa/pipe/cell/spu/spu_util.c b/src/mesa/pipe/cell/spu/spu_util.c
new file mode 100644
index 0000000000..ac373240c1
--- /dev/null
+++ b/src/mesa/pipe/cell/spu/spu_util.c
@@ -0,0 +1,165 @@
+#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/tgsi/util/tgsi_parse.h"
+//#include "tgsi_build.h"
+#include "pipe/tgsi/util/tgsi_util.h"
+
+unsigned
+tgsi_util_get_src_register_swizzle(
+ const struct tgsi_src_register *reg,
+ unsigned component )
+{
+ switch( component ) {
+ case 0:
+ return reg->SwizzleX;
+ case 1:
+ return reg->SwizzleY;
+ case 2:
+ return reg->SwizzleZ;
+ case 3:
+ return reg->SwizzleW;
+ default:
+ assert( 0 );
+ }
+ return 0;
+}
+
+unsigned
+tgsi_util_get_src_register_extswizzle(
+ const struct tgsi_src_register_ext_swz *reg,
+ unsigned component )
+{
+ switch( component ) {
+ case 0:
+ return reg->ExtSwizzleX;
+ case 1:
+ return reg->ExtSwizzleY;
+ case 2:
+ return reg->ExtSwizzleZ;
+ case 3:
+ return reg->ExtSwizzleW;
+ default:
+ assert( 0 );
+ }
+ return 0;
+}
+
+unsigned
+tgsi_util_get_full_src_register_extswizzle(
+ const struct tgsi_full_src_register *reg,
+ unsigned component )
+{
+ unsigned swizzle;
+
+ /*
+ * First, calculate the extended swizzle for a given channel. This will give
+ * us either a channel index into the simple swizzle or a constant 1 or 0.
+ */
+ swizzle = tgsi_util_get_src_register_extswizzle(
+ &reg->SrcRegisterExtSwz,
+ component );
+
+ assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X);
+ assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y);
+ assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z);
+ assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W);
+ assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W);
+ assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W);
+
+ /*
+ * Second, calculate the simple swizzle for the unswizzled channel index.
+ * Leave the constants intact, they are not affected by the simple swizzle.
+ */
+ if( swizzle <= TGSI_SWIZZLE_W ) {
+ swizzle = tgsi_util_get_src_register_swizzle(
+ &reg->SrcRegister,
+ component );
+ }
+
+ return swizzle;
+}
+
+unsigned
+tgsi_util_get_src_register_extnegate(
+ const struct tgsi_src_register_ext_swz *reg,
+ unsigned component )
+{
+ switch( component ) {
+ case 0:
+ return reg->NegateX;
+ case 1:
+ return reg->NegateY;
+ case 2:
+ return reg->NegateZ;
+ case 3:
+ return reg->NegateW;
+ default:
+ assert( 0 );
+ }
+ return 0;
+}
+
+void
+tgsi_util_set_src_register_extnegate(
+ struct tgsi_src_register_ext_swz *reg,
+ unsigned negate,
+ unsigned component )
+{
+ switch( component ) {
+ case 0:
+ reg->NegateX = negate;
+ break;
+ case 1:
+ reg->NegateY = negate;
+ break;
+ case 2:
+ reg->NegateZ = negate;
+ break;
+ case 3:
+ reg->NegateW = negate;
+ break;
+ default:
+ assert( 0 );
+ }
+}
+
+unsigned
+tgsi_util_get_full_src_register_sign_mode(
+ const struct tgsi_full_src_register *reg,
+ unsigned component )
+{
+ unsigned sign_mode;
+
+ if( reg->SrcRegisterExtMod.Absolute ) {
+ /* Consider only the post-abs negation. */
+
+ if( reg->SrcRegisterExtMod.Negate ) {
+ sign_mode = TGSI_UTIL_SIGN_SET;
+ }
+ else {
+ sign_mode = TGSI_UTIL_SIGN_CLEAR;
+ }
+ }
+ else {
+ /* Accumulate the three negations. */
+
+ unsigned negate;
+
+ negate = reg->SrcRegister.Negate;
+ if( tgsi_util_get_src_register_extnegate( &reg->SrcRegisterExtSwz, component ) ) {
+ negate = !negate;
+ }
+ if( reg->SrcRegisterExtMod.Negate ) {
+ negate = !negate;
+ }
+
+ if( negate ) {
+ sign_mode = TGSI_UTIL_SIGN_TOGGLE;
+ }
+ else {
+ sign_mode = TGSI_UTIL_SIGN_KEEP;
+ }
+ }
+
+ return sign_mode;
+}
diff --git a/src/mesa/pipe/cell/spu/spu_vertex_fetch.c b/src/mesa/pipe/cell/spu/spu_vertex_fetch.c
new file mode 100644
index 0000000000..6e86a919ce
--- /dev/null
+++ b/src/mesa/pipe/cell/spu/spu_vertex_fetch.c
@@ -0,0 +1,393 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include <spu_mfcio.h>
+#include <transpose_matrix4x4.h>
+
+#include "pipe/p_util.h"
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+#include "spu_exec.h"
+#include "spu_vertex_shader.h"
+#include "spu_main.h"
+
+
+#define DRAW_DBG 0
+
+
+static const vec_float4 defaults = { 0.0, 0.0, 0.0, 1.0 };
+
+/**
+ * Fetch a float[4] vertex attribute from memory, doing format/type
+ * conversion as needed.
+ *
+ * This is probably needed/dupliocated elsewhere, eg format
+ * conversion, texture sampling etc.
+ */
+#define FETCH_ATTRIB( NAME, SZ, CVT ) \
+static qword \
+fetch_##NAME(const void *ptr) \
+{ \
+ vec_float4 attrib = defaults; \
+ int i; \
+ \
+ for (i = 0; i < SZ; i++) { \
+ attrib = spu_insert(CVT, attrib, i); \
+ } \
+ return (qword) attrib; \
+}
+
+#define CVT_64_FLOAT (float) ((double *) ptr)[i]
+#define CVT_32_FLOAT ((float *) ptr)[i]
+
+#define CVT_8_USCALED (float) ((unsigned char *) ptr)[i]
+#define CVT_16_USCALED (float) ((unsigned short *) ptr)[i]
+#define CVT_32_USCALED (float) ((unsigned int *) ptr)[i]
+
+#define CVT_8_SSCALED (float) ((char *) ptr)[i]
+#define CVT_16_SSCALED (float) ((short *) ptr)[i]
+#define CVT_32_SSCALED (float) ((int *) ptr)[i]
+
+#define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f
+#define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f
+#define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f
+
+#define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f
+#define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f
+#define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f
+
+FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT )
+FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT )
+FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT )
+FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT )
+
+FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT )
+FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT )
+FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT )
+FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT )
+
+FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED )
+FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED )
+FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED )
+FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED )
+
+FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED )
+FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED )
+FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED )
+FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED )
+
+FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM )
+FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM )
+FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM )
+FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM )
+
+FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM )
+FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM )
+FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM )
+FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM )
+
+FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED )
+FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED )
+FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED )
+FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED )
+
+FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED )
+FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED )
+FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED )
+FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED )
+
+FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM )
+FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM )
+FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM )
+FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM )
+
+FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM )
+FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM )
+FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM )
+FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM )
+
+FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED )
+FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED )
+FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED )
+FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED )
+
+FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED )
+FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED )
+FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED )
+FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED )
+
+FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
+FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM )
+FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM )
+FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM )
+
+FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM )
+FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM )
+FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM )
+FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM )
+
+FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM )
+//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
+
+
+
+static spu_fetch_func get_fetch_func( enum pipe_format format )
+{
+#if 0
+ {
+ char tmp[80];
+ pf_sprint_name(tmp, format);
+ _mesa_printf("%s: %s\n", __FUNCTION__, tmp);
+ }
+#endif
+
+ switch (format) {
+ case PIPE_FORMAT_R64_FLOAT:
+ return fetch_R64_FLOAT;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ return fetch_R64G64_FLOAT;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ return fetch_R64G64B64_FLOAT;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ return fetch_R64G64B64A64_FLOAT;
+
+ case PIPE_FORMAT_R32_FLOAT:
+ return fetch_R32_FLOAT;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return fetch_R32G32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return fetch_R32G32B32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return fetch_R32G32B32A32_FLOAT;
+
+ case PIPE_FORMAT_R32_UNORM:
+ return fetch_R32_UNORM;
+ case PIPE_FORMAT_R32G32_UNORM:
+ return fetch_R32G32_UNORM;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ return fetch_R32G32B32_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return fetch_R32G32B32A32_UNORM;
+
+ case PIPE_FORMAT_R32_USCALED:
+ return fetch_R32_USCALED;
+ case PIPE_FORMAT_R32G32_USCALED:
+ return fetch_R32G32_USCALED;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ return fetch_R32G32B32_USCALED;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ return fetch_R32G32B32A32_USCALED;
+
+ case PIPE_FORMAT_R32_SNORM:
+ return fetch_R32_SNORM;
+ case PIPE_FORMAT_R32G32_SNORM:
+ return fetch_R32G32_SNORM;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ return fetch_R32G32B32_SNORM;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ return fetch_R32G32B32A32_SNORM;
+
+ case PIPE_FORMAT_R32_SSCALED:
+ return fetch_R32_SSCALED;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ return fetch_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ return fetch_R32G32B32_SSCALED;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ return fetch_R32G32B32A32_SSCALED;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return fetch_R16_UNORM;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return fetch_R16G16_UNORM;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return fetch_R16G16B16_UNORM;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return fetch_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R16_USCALED:
+ return fetch_R16_USCALED;
+ case PIPE_FORMAT_R16G16_USCALED:
+ return fetch_R16G16_USCALED;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return fetch_R16G16B16_USCALED;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return fetch_R16G16B16A16_USCALED;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return fetch_R16_SNORM;
+ case PIPE_FORMAT_R16G16_SNORM:
+ return fetch_R16G16_SNORM;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return fetch_R16G16B16_SNORM;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return fetch_R16G16B16A16_SNORM;
+
+ case PIPE_FORMAT_R16_SSCALED:
+ return fetch_R16_SSCALED;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ return fetch_R16G16_SSCALED;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return fetch_R16G16B16_SSCALED;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return fetch_R16G16B16A16_SSCALED;
+
+ case PIPE_FORMAT_R8_UNORM:
+ return fetch_R8_UNORM;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return fetch_R8G8_UNORM;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return fetch_R8G8B8_UNORM;
+ case PIPE_FORMAT_R8G8B8A8_UNORM:
+ return fetch_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_USCALED:
+ return fetch_R8_USCALED;
+ case PIPE_FORMAT_R8G8_USCALED:
+ return fetch_R8G8_USCALED;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return fetch_R8G8B8_USCALED;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return fetch_R8G8B8A8_USCALED;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return fetch_R8_SNORM;
+ case PIPE_FORMAT_R8G8_SNORM:
+ return fetch_R8G8_SNORM;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return fetch_R8G8B8_SNORM;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return fetch_R8G8B8A8_SNORM;
+
+ case PIPE_FORMAT_R8_SSCALED:
+ return fetch_R8_SSCALED;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return fetch_R8G8_SSCALED;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return fetch_R8G8B8_SSCALED;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return fetch_R8G8B8A8_SSCALED;
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return fetch_A8R8G8B8_UNORM;
+
+ case 0:
+ return NULL; /* not sure why this is needed */
+
+ default:
+ assert(0);
+ return NULL;
+ }
+}
+
+
+/**
+ * Fetch vertex attributes for 'count' vertices.
+ */
+static void generic_vertex_fetch(struct spu_vs_context *draw,
+ struct spu_exec_machine *machine,
+ const unsigned *elts,
+ unsigned count)
+{
+ unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
+ unsigned attr;
+
+ assert(count <= 4);
+
+ wait_on_mask(1 << TAG_VERTEX_BUFFER);
+
+#if DRAW_DBG
+ printf("SPU: %s count = %u, nr_attrs = %u\n",
+ __FUNCTION__, count, nr_attrs);
+#endif
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+ for (attr = 0; attr < nr_attrs; attr++) {
+ const unsigned pitch = draw->vertex_fetch.pitch[attr];
+ const uint64_t src = draw->vertex_fetch.src_ptr[attr];
+ const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr];
+ unsigned i;
+ qword p[4];
+
+
+ /* Fetch four attributes for four vertices.
+ *
+ * Could fetch directly into AOS format, but this is meant to be
+ * a prototype for an sse implementation, which would have
+ * difficulties doing that.
+ */
+ for (i = 0; i < count; i++) {
+ uint8_t buffer[32] ALIGN16_ATTRIB;
+ const uint64_t addr = src + (elts[i] * pitch);
+ const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32;
+
+#if DRAW_DBG
+ printf("SPU: fetching = 0x%llx\n", addr);
+#endif
+ mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0);
+ wait_on_mask(1 << TAG_VERTEX_BUFFER);
+
+ p[i] = (*fetch)(buffer + (addr & 0x0f));
+ }
+
+ /* Be nice and zero out any missing vertices:
+ */
+ for (/* empty */; i < 4; i++)
+ p[i] = si_xor(p[i], p[i]);
+
+ /* Transpose/swizzle into vector-friendly format. Currently
+ * assuming that all vertex shader inputs are float[4], but this
+ * isn't true -- if the vertex shader only wants tex0.xy, we
+ * could optimize for that.
+ *
+ * To do so fully without codegen would probably require an
+ * excessive number of fetch functions, but we could at least
+ * minimize the transpose step:
+ */
+ _transpose_matrix4x4(&machine->Inputs[attr].xyzw[0].q, p);
+ }
+}
+
+
+void spu_update_vertex_fetch( struct spu_vs_context *draw )
+{
+ unsigned i;
+
+
+ for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) {
+ draw->vertex_fetch.fetch[i] =
+ get_fetch_func(draw->vertex_fetch.format[i]);
+ }
+
+ draw->vertex_fetch.fetch_func = generic_vertex_fetch;
+}
diff --git a/src/mesa/pipe/cell/spu/spu_vertex_shader.c b/src/mesa/pipe/cell/spu/spu_vertex_shader.c
new file mode 100644
index 0000000000..c1cbbb6d1e
--- /dev/null
+++ b/src/mesa/pipe/cell/spu/spu_vertex_shader.c
@@ -0,0 +1,231 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Brian Paul
+ * Ian Romanick <idr@us.ibm.com>
+ */
+
+#include <spu_mfcio.h>
+
+#include "pipe/p_util.h"
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+#include "spu_vertex_shader.h"
+#include "spu_exec.h"
+#include "pipe/draw/draw_private.h"
+#include "pipe/draw/draw_context.h"
+#include "pipe/cell/common.h"
+#include "spu_main.h"
+
+static INLINE unsigned
+compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
+{
+ unsigned mask = 0;
+ unsigned i;
+
+ /* Do the hardwired planes first:
+ */
+ if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
+ if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
+ if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
+ if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
+ if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
+ if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
+
+ /* Followed by any remaining ones:
+ */
+ for (i = 6; i < nr; i++) {
+ if (dot4(clip, plane[i]) < 0)
+ mask |= (1<<i);
+ }
+
+ return mask;
+}
+
+
+/**
+ * Transform vertices with the current vertex program/shader
+ * Up to four vertices can be shaded at a time.
+ * \param vbuffer the input vertex data
+ * \param elts indexes of four input vertices
+ * \param count number of vertices to shade [1..4]
+ * \param vOut array of pointers to four output vertices
+ */
+static void
+run_vertex_program(struct spu_vs_context *draw,
+ unsigned elts[4], unsigned count,
+ const uint64_t *vOut)
+{
+ struct spu_exec_machine *machine = &draw->machine;
+ unsigned int j;
+
+ ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_ATTRIB_MAX);
+ ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_ATTRIB_MAX);
+ const float *scale = draw->viewport.scale;
+ const float *trans = draw->viewport.translate;
+
+ assert(count <= 4);
+
+ machine->Processor = TGSI_PROCESSOR_VERTEX;
+
+ ASSERT_ALIGN16(draw->constants);
+ machine->Consts = (float (*)[4]) draw->constants;
+
+ machine->Inputs = ALIGN16_ASSIGN(inputs);
+ machine->Outputs = ALIGN16_ASSIGN(outputs);
+
+ spu_vertex_fetch( draw, machine, elts, count );
+
+ /* run shader */
+ spu_exec_machine_run( machine );
+
+
+ /* store machine results */
+ for (j = 0; j < count; j++) {
+ unsigned slot;
+ float x, y, z, w;
+ unsigned char buffer[sizeof(struct vertex_header)
+ + MAX_VERTEX_SIZE] ALIGN16_ATTRIB;
+ struct vertex_header *const tmpOut =
+ (struct vertex_header *) buffer;
+ const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header)
+ + (sizeof(float) * 4
+ * draw->num_vs_outputs));
+
+ mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0);
+ wait_on_mask(1 << TAG_VERTEX_BUFFER);
+
+
+ /* Handle attr[0] (position) specially:
+ *
+ * XXX: Computing the clipmask should be done in the vertex
+ * program as a set of DP4 instructions appended to the
+ * user-provided code.
+ */
+ x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j];
+ y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j];
+ z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j];
+ w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j];
+
+ tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane,
+ draw->nr_planes);
+ tmpOut->edgeflag = 1;
+
+ /* divide by w */
+ w = 1.0f / w;
+ x *= w;
+ y *= w;
+ z *= w;
+
+ /* Viewport mapping */
+ tmpOut->data[0][0] = x * scale[0] + trans[0];
+ tmpOut->data[0][1] = y * scale[1] + trans[1];
+ tmpOut->data[0][2] = z * scale[2] + trans[2];
+ tmpOut->data[0][3] = w;
+
+ /* Remaining attributes are packed into sequential post-transform
+ * vertex attrib slots.
+ */
+ for (slot = 1; slot < draw->num_vs_outputs; slot++) {
+ tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ }
+
+ mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0);
+ } /* loop over vertices */
+}
+
+
+static void
+spu_bind_vertex_shader(struct spu_vs_context *draw,
+ void *uniforms,
+ void *planes,
+ unsigned nr_planes,
+ unsigned num_outputs
+ )
+{
+ draw->constants = (float (*)[4]) uniforms;
+
+ (void) memcpy(draw->plane, planes, sizeof(float) * 4 * nr_planes);
+ draw->nr_planes = nr_planes;
+ draw->num_vs_outputs = num_outputs;
+
+ /* specify the shader to interpret/execute */
+ spu_exec_machine_init(&draw->machine,
+ PIPE_MAX_SAMPLERS,
+ NULL /*samplers*/,
+ PIPE_SHADER_VERTEX);
+}
+
+
+unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32]
+ ALIGN16_ATTRIB;
+
+void
+spu_execute_vertex_shader(struct spu_vs_context *draw,
+ const struct cell_command_vs *vs)
+{
+ unsigned i;
+
+ const uint64_t immediate_addr = vs->shader.immediates;
+ const unsigned immediate_size =
+ ROUNDUP16((sizeof(float) * 4 * vs->shader.num_immediates)
+ + (immediate_addr & 0x0f));
+
+ mfc_get(immediates, immediate_addr & ~0x0f, immediate_size,
+ TAG_VERTEX_BUFFER, 0, 0);
+
+ draw->machine.Instructions = (struct tgsi_full_instruction *)
+ vs->shader.instructions;
+ draw->machine.NumInstructions = vs->shader.num_instructions;
+
+ draw->machine.Declarations = (struct tgsi_full_declaration *)
+ vs->shader.declarations;
+ draw->machine.NumDeclarations = vs->shader.num_declarations;
+
+ draw->vertex_fetch.nr_attrs = vs->nr_attrs;
+
+ wait_on_mask(1 << TAG_VERTEX_BUFFER);
+
+ (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f],
+ sizeof(float) * 4 * vs->shader.num_immediates);
+
+ spu_bind_vertex_shader(draw, vs->shader.uniforms,
+ vs->plane, vs->nr_planes,
+ vs->shader.num_outputs);
+
+ for (i = 0; i < vs->num_elts; i += 4) {
+ const unsigned batch_size = MIN2(vs->num_elts - i, 4);
+
+ run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]);
+ }
+}
diff --git a/src/mesa/pipe/cell/spu/spu_vertex_shader.h b/src/mesa/pipe/cell/spu/spu_vertex_shader.h
new file mode 100644
index 0000000000..c96b93ff0a
--- /dev/null
+++ b/src/mesa/pipe/cell/spu/spu_vertex_shader.h
@@ -0,0 +1,61 @@
+#ifndef SPU_VERTEX_SHADER_H
+#define SPU_VERTEX_SHADER_H
+
+#include "pipe/p_format.h"
+#include "spu_exec.h"
+
+struct spu_vs_context;
+
+typedef qword (*spu_fetch_func)(const void *ptr);
+typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw,
+ struct spu_exec_machine *machine,
+ const unsigned *elts,
+ unsigned count );
+
+struct spu_vs_context {
+ struct pipe_viewport_state viewport;
+
+ struct {
+ uint64_t src_ptr[PIPE_ATTRIB_MAX];
+ unsigned pitch[PIPE_ATTRIB_MAX];
+ enum pipe_format format[PIPE_ATTRIB_MAX];
+ unsigned nr_attrs;
+ boolean dirty;
+
+ spu_fetch_func fetch[PIPE_ATTRIB_MAX];
+ spu_full_fetch_func fetch_func;
+ } vertex_fetch;
+
+ /* Clip derived state:
+ */
+ float plane[12][4];
+ unsigned nr_planes;
+
+ struct spu_exec_machine machine;
+ const float (*constants)[4];
+
+ unsigned num_vs_outputs;
+};
+
+extern void spu_update_vertex_fetch(struct spu_vs_context *draw);
+
+static INLINE void spu_vertex_fetch(struct spu_vs_context *draw,
+ struct spu_exec_machine *machine,
+ const unsigned *elts,
+ unsigned count)
+{
+ if (draw->vertex_fetch.dirty) {
+ spu_update_vertex_fetch(draw);
+ draw->vertex_fetch.dirty = 0;
+ }
+
+ (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count);
+}
+
+struct cell_command_vs;
+
+extern void
+spu_execute_vertex_shader(struct spu_vs_context *draw,
+ const struct cell_command_vs *vs);
+
+#endif /* SPU_VERTEX_SHADER_H */
diff --git a/src/mesa/pipe/cell/spu/spu_ztest.h b/src/mesa/pipe/cell/spu/spu_ztest.h
new file mode 100644
index 0000000000..ce8ad00339
--- /dev/null
+++ b/src/mesa/pipe/cell/spu/spu_ztest.h
@@ -0,0 +1,135 @@
+/**************************************************************************
+ *
+ * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/**
+ * Zbuffer/depth test code.
+ */
+
+
+#ifndef SPU_ZTEST_H
+#define SPU_ZTEST_H
+
+
+#ifdef __SPU__
+#include <spu_intrinsics.h>
+#endif
+
+
+
+/**
+ * Perform Z testing for a 16-bit/value Z buffer.
+ *
+ * \param zvals vector of four fragment zvalues as floats
+ * \param zbuf ptr to vector of ushort[8] zbuffer values. Note that this
+ * contains the Z values for 2 quads, 8 pixels.
+ * \param x x coordinate of quad (only lsbit is significant)
+ * \param inMask indicates which fragments in the quad are alive
+ * \return new mask indicating which fragments are alive after ztest
+ */
+static INLINE vector unsigned int
+spu_z16_test_less(vector float zvals, vector unsigned short *zbuf,
+ uint x, vector unsigned int inMask)
+{
+#define ZERO 0x80
+ vector unsigned int zvals_ui4, zbuf_ui4, mask;
+
+ /* convert floats to uints in [0, 65535] */
+ zvals_ui4 = spu_convtu(zvals, 32); /* convert to [0, 2^32] */
+ zvals_ui4 = spu_rlmask(zvals_ui4, -16); /* right shift 16 */
+
+ /* XXX this conditional could be removed with a bit of work */
+ if (x & 1) {
+ /* convert zbuffer values from ushorts to uints */
+ /* gather lower four ushorts */
+ zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf,
+ (vector unsigned int) *zbuf,
+ ((vector unsigned char) {
+ ZERO, ZERO, 8, 9, ZERO, ZERO, 10, 11,
+ ZERO, ZERO, 12, 13, ZERO, ZERO, 14, 15}));
+ /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */
+ mask = spu_cmpgt(zbuf_ui4, zvals_ui4);
+ /* mask &= inMask */
+ mask = spu_and(mask, inMask);
+ /* zbuf = mask ? zval : zbuf */
+ zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask);
+ /* convert zbuffer values from uints back to ushorts, preserve lower 4 */
+ *zbuf = (vector unsigned short)
+ spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf,
+ ((vector unsigned char) {
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 2, 3, 6, 7, 10, 11, 14, 15}));
+ }
+ else {
+ /* convert zbuffer values from ushorts to uints */
+ /* gather upper four ushorts */
+ zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf,
+ (vector unsigned int) *zbuf,
+ ((vector unsigned char) {
+ ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3,
+ ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7}));
+ /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */
+ mask = spu_cmpgt(zbuf_ui4, zvals_ui4);
+ /* mask &= inMask */
+ mask = spu_and(mask, inMask);
+ /* zbuf = mask ? zval : zbuf */
+ zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask);
+ /* convert zbuffer values from uints back to ushorts, preserve upper 4 */
+ *zbuf = (vector unsigned short)
+ spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf,
+ ((vector unsigned char) {
+ 2, 3, 6, 7, 10, 11, 14, 15,
+ 24, 25, 26, 27, 28, 29, 30, 31}));
+ }
+ return mask;
+#undef ZERO
+}
+
+
+/**
+ * As above, but Zbuffer values as 32-bit uints
+ */
+static INLINE vector unsigned int
+spu_z32_test_less(vector float zvals, vector unsigned int *zbuf_ptr,
+ vector unsigned int inMask)
+{
+ vector unsigned int zvals_ui4, mask, zbuf = *zbuf_ptr;
+
+ /* convert floats to uints in [0, 0xffffffff] */
+ zvals_ui4 = spu_convtu(zvals, 32);
+ /* mask = (zbuf < zvals_ui4) ? ~0 : 0 */
+ mask = spu_cmpgt(zbuf, zvals_ui4);
+ /* mask &= inMask */
+ mask = spu_and(mask, inMask);
+ /* zbuf = mask ? zval : zbuf */
+ *zbuf_ptr = spu_sel(zbuf, zvals_ui4, mask);
+
+ return mask;
+}
+
+
+#endif /* SPU_ZTEST_H */
diff --git a/src/mesa/pipe/draw/Makefile b/src/mesa/pipe/draw/Makefile
new file mode 100644
index 0000000000..451911a354
--- /dev/null
+++ b/src/mesa/pipe/draw/Makefile
@@ -0,0 +1,2 @@
+default:
+ cd .. ; make
diff --git a/src/mesa/pipe/draw/draw_clip.c b/src/mesa/pipe/draw/draw_clip.c
index 2d410e3244..61130c5600 100644
--- a/src/mesa/pipe/draw/draw_clip.c
+++ b/src/mesa/pipe/draw/draw_clip.c
@@ -33,6 +33,8 @@
#include "pipe/p_util.h"
+#include "pipe/p_shader_tokens.h"
+
#include "draw_context.h"
#include "draw_private.h"
@@ -54,6 +56,12 @@
struct clipper {
struct draw_stage stage; /**< base class */
+ /* Basically duplicate some of the flatshading logic here:
+ */
+ boolean flat;
+ uint num_color_attribs;
+ uint color_attribs[4]; /* front/back primary/secondary colors */
+
float (*plane)[4];
};
@@ -82,6 +90,17 @@ static void interp_attr( float *fdst,
fdst[3] = LINTERP( t, fout[3], fin[3] );
}
+static void copy_colors( struct draw_stage *stage,
+ struct vertex_header *dst,
+ const struct vertex_header *src )
+{
+ const struct clipper *clipper = clipper_stage(stage);
+ uint i;
+ for (i = 0; i < clipper->num_color_attribs; i++) {
+ const uint attr = clipper->color_attribs[i];
+ COPY_4FV(dst->data[attr], src->data[attr]);
+ }
+}
@@ -134,27 +153,11 @@ static void interp( const struct clipper *clip,
}
}
-#if 0
-static INLINE void do_tri( struct draw_stage *next,
- struct prim_header *header )
-{
- unsigned i;
- for (i = 0; i < 3; i++) {
- float *ndc = header->v[i]->data[0];
- _mesa_printf("ndc %f %f %f\n", ndc[0], ndc[1], ndc[2]);
- assert(ndc[0] >= -1 && ndc[0] <= 641);
- assert(ndc[1] >= 30 && ndc[1] <= 481);
- }
- _mesa_printf("\n");
- next->tri(next, header);
-}
-#endif
-
static void emit_poly( struct draw_stage *stage,
struct vertex_header **inlist,
unsigned n,
- const struct prim_header *origPrim)
+ const struct prim_header *origPrim)
{
struct prim_header header;
unsigned i;
@@ -163,16 +166,16 @@ static void emit_poly( struct draw_stage *stage,
header.det = origPrim->det;
for (i = 2; i < n; i++) {
- header.v[0] = inlist[0];
- header.v[1] = inlist[i-1];
- header.v[2] = inlist[i];
+ header.v[0] = inlist[i-1];
+ header.v[1] = inlist[i];
+ header.v[2] = inlist[0]; /* keep in v[2] for flatshading */
{
- unsigned tmp0 = header.v[0]->edgeflag;
+ unsigned tmp1 = header.v[1]->edgeflag;
unsigned tmp2 = header.v[2]->edgeflag;
- if (i != 2) header.v[0]->edgeflag = 0;
- if (i != n-1) header.v[2]->edgeflag = 0;
+ if (i != n-1) header.v[1]->edgeflag = 0;
+ if (i != 2) header.v[2]->edgeflag = 0;
header.edgeflags = ((header.v[0]->edgeflag << 0) |
(header.v[1]->edgeflag << 1) |
@@ -180,27 +183,13 @@ static void emit_poly( struct draw_stage *stage,
stage->next->tri( stage->next, &header );
- header.v[0]->edgeflag = tmp0;
+ header.v[1]->edgeflag = tmp1;
header.v[2]->edgeflag = tmp2;
}
}
}
-#if 0
-static void emit_poly( struct draw_stage *stage )
-{
- unsigned i;
-
- for (i = 2; i < n; i++) {
- header->v[0] = inlist[0];
- header->v[1] = inlist[i-1];
- header->v[2] = inlist[i];
-
- stage->next->tri( stage->next, header );
- }
-}
-#endif
/* Clip a triangle against the viewport and user clip planes.
@@ -281,6 +270,18 @@ do_clip_tri( struct draw_stage *stage,
}
}
+ /* If flat-shading, copy color to new provoking vertex.
+ */
+ if (clipper->flat && inlist[0] != header->v[2]) {
+ if (1) {
+ inlist[0] = dup_vert(stage, inlist[0], tmpnr++);
+ }
+
+ copy_colors(stage, inlist[0], header->v[2]);
+ }
+
+
+
/* Emit the polygon as triangles to the setup stage:
*/
if (n >= 3)
@@ -328,6 +329,10 @@ do_clip_line( struct draw_stage *stage,
if (v0->clipmask) {
interp( clipper, stage->tmp[0], t0, v0, v1 );
+
+ if (clipper->flat)
+ copy_colors(stage, stage->tmp[0], v0);
+
newprim.v[0] = stage->tmp[0];
}
else {
@@ -393,8 +398,55 @@ clip_tri( struct draw_stage *stage,
}
}
-static void clip_flush( struct draw_stage *stage, unsigned flags )
+/* Update state. Could further delay this until we hit the first
+ * primitive that really requires clipping.
+ */
+static void
+clip_init_state( struct draw_stage *stage )
+{
+ struct clipper *clipper = clipper_stage( stage );
+
+ clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE;
+
+ if (clipper->flat) {
+ const struct pipe_shader_state *vs = stage->draw->vertex_shader->state;
+ uint i;
+
+ clipper->num_color_attribs = 0;
+ for (i = 0; i < vs->num_outputs; i++) {
+ if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR ||
+ vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) {
+ clipper->color_attribs[clipper->num_color_attribs++] = i;
+ }
+ }
+ }
+
+ stage->tri = clip_tri;
+ stage->line = clip_line;
+}
+
+
+
+static void clip_first_tri( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ clip_init_state( stage );
+ stage->tri( stage, header );
+}
+
+static void clip_first_line( struct draw_stage *stage,
+ struct prim_header *header )
+{
+ clip_init_state( stage );
+ stage->line( stage, header );
+}
+
+
+static void clip_flush( struct draw_stage *stage,
+ unsigned flags )
{
+ stage->tri = clip_first_tri;
+ stage->line = clip_first_line;
stage->next->flush( stage->next, flags );
}
@@ -420,12 +472,12 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw )
{
struct clipper *clipper = CALLOC_STRUCT(clipper);
- draw_alloc_tmps( &clipper->stage, MAX_CLIPPED_VERTICES );
+ draw_alloc_tmps( &clipper->stage, MAX_CLIPPED_VERTICES+1 );
clipper->stage.draw = draw;
clipper->stage.point = clip_point;
- clipper->stage.line = clip_line;
- clipper->stage.tri = clip_tri;
+ clipper->stage.line = clip_first_line;
+ clipper->stage.tri = clip_first_tri;
clipper->stage.flush = clip_flush;
clipper->stage.reset_stipple_counter = clip_reset_stipple_counter;
clipper->stage.destroy = clip_destroy;
diff --git a/src/mesa/pipe/draw/draw_context.c b/src/mesa/pipe/draw/draw_context.c
index e8ca1f035b..b15f57c824 100644
--- a/src/mesa/pipe/draw/draw_context.c
+++ b/src/mesa/pipe/draw/draw_context.c
@@ -71,12 +71,15 @@ struct draw_context *draw_create( void )
*/
{
uint i;
- char *tmp = (char*) MALLOC( Elements(draw->vcache.vertex) * MAX_VERTEX_SIZE );
+ const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f;
+ char *tmp = align_malloc(Elements(draw->vcache.vertex) * size, 16);
for (i = 0; i < Elements(draw->vcache.vertex); i++)
- draw->vcache.vertex[i] = (struct vertex_header *)(tmp + i * MAX_VERTEX_SIZE);
+ draw->vcache.vertex[i] = (struct vertex_header *)(tmp + i * size);
}
+ draw->shader_queue_flush = draw_vertex_shader_queue_flush;
+
draw->convert_wide_points = TRUE;
draw->convert_wide_lines = TRUE;
@@ -103,7 +106,7 @@ void draw_destroy( struct draw_context *draw )
if (draw->pipeline.rasterize)
draw->pipeline.rasterize->destroy( draw->pipeline.rasterize );
tgsi_exec_machine_free_data(&draw->machine);
- FREE( draw->vcache.vertex[0] ); /* Frees all the vertices. */
+ align_free( draw->vcache.vertex[0] ); /* Frees all the vertices. */
FREE( draw );
}
diff --git a/src/mesa/pipe/draw/draw_prim.c b/src/mesa/pipe/draw/draw_prim.c
index 243381aec0..51e2242719 100644
--- a/src/mesa/pipe/draw/draw_prim.c
+++ b/src/mesa/pipe/draw/draw_prim.c
@@ -30,6 +30,8 @@
* Keith Whitwell <keith@tungstengraphics.com>
*/
+#include "pipe/p_debug.h"
+
#include "draw_private.h"
#include "draw_context.h"
@@ -60,38 +62,55 @@ static void draw_prim_queue_flush( struct draw_context *draw )
unsigned i;
if (0)
- fprintf(stdout,"Flushing with %d prims, %d verts\n",
- draw->pq.queue_nr, draw->vs.queue_nr);
+ debug_printf("Flushing with %d prims, %d verts\n",
+ draw->pq.queue_nr, draw->vs.queue_nr);
- if (draw->pq.queue_nr == 0)
- return;
+ assert (draw->pq.queue_nr != 0);
/* NOTE: we cannot save draw->pipeline->first in a local var because
* draw->pipeline->first is often changed by the first call to tri(),
* line(), etc.
*/
- switch (draw->reduced_prim) {
- case RP_TRI:
- for (i = 0; i < draw->pq.queue_nr; i++) {
- if (draw->pq.queue[i].reset_line_stipple)
- draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
-
- draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] );
+ if (draw->rasterizer->line_stipple_enable) {
+ switch (draw->reduced_prim) {
+ case RP_TRI:
+ for (i = 0; i < draw->pq.queue_nr; i++) {
+ if (draw->pq.queue[i].reset_line_stipple)
+ draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
+
+ draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] );
+ }
+ break;
+ case RP_LINE:
+ for (i = 0; i < draw->pq.queue_nr; i++) {
+ if (draw->pq.queue[i].reset_line_stipple)
+ draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
+
+ draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] );
+ }
+ break;
+ case RP_POINT:
+ draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
+ for (i = 0; i < draw->pq.queue_nr; i++)
+ draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] );
+ break;
}
- break;
- case RP_LINE:
- for (i = 0; i < draw->pq.queue_nr; i++) {
- if (draw->pq.queue[i].reset_line_stipple)
- draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
-
- draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] );
+ }
+ else {
+ switch (draw->reduced_prim) {
+ case RP_TRI:
+ for (i = 0; i < draw->pq.queue_nr; i++)
+ draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] );
+ break;
+ case RP_LINE:
+ for (i = 0; i < draw->pq.queue_nr; i++)
+ draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] );
+ break;
+ case RP_POINT:
+ for (i = 0; i < draw->pq.queue_nr; i++)
+ draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] );
+ break;
}
- break;
- case RP_POINT:
- draw->pipeline.first->reset_stipple_counter( draw->pipeline.first );
- for (i = 0; i < draw->pq.queue_nr; i++)
- draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] );
- break;
}
draw->pq.queue_nr = 0;
@@ -103,16 +122,18 @@ static void draw_prim_queue_flush( struct draw_context *draw )
void draw_do_flush( struct draw_context *draw, unsigned flags )
{
if (0)
- fprintf(stdout,"Flushing with %d verts, %d prims\n",
- draw->vs.queue_nr,
- draw->pq.queue_nr );
+ debug_printf("Flushing with %d verts, %d prims\n",
+ draw->vs.queue_nr,
+ draw->pq.queue_nr );
if (flags >= DRAW_FLUSH_SHADER_QUEUE) {
- draw_vertex_shader_queue_flush(draw);
+ if (draw->vs.queue_nr)
+ (*draw->shader_queue_flush)(draw);
if (flags >= DRAW_FLUSH_PRIM_QUEUE) {
- draw_prim_queue_flush(draw);
+ if (draw->pq.queue_nr)
+ draw_prim_queue_flush(draw);
if (flags >= DRAW_FLUSH_VERTEX_CACHE) {
draw_vertex_cache_invalidate(draw);
@@ -138,11 +159,11 @@ static struct prim_header *get_queued_prim( struct draw_context *draw,
unsigned nr_verts )
{
if (!draw_vertex_cache_check_space( draw, nr_verts )) {
-// fprintf(stderr, "v");
+// debug_printf("v");
draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE );
}
else if (draw->pq.queue_nr == PRIM_QUEUE_LENGTH) {
-// fprintf(stderr, "p");
+// debug_printf("p");
draw_do_flush( draw, DRAW_FLUSH_PRIM_QUEUE );
}
@@ -230,7 +251,7 @@ static void do_ef_triangle( struct draw_context *draw,
}
-static void do_quad( struct draw_context *draw,
+static void do_ef_quad( struct draw_context *draw,
unsigned v0,
unsigned v1,
unsigned v2,
@@ -242,6 +263,16 @@ static void do_quad( struct draw_context *draw,
do_ef_triangle( draw, 0, omitEdge3, v1, v2, v3 );
}
+static void do_quad( struct draw_context *draw,
+ unsigned v0,
+ unsigned v1,
+ unsigned v2,
+ unsigned v3 )
+{
+ do_triangle( draw, v0, v1, v3 );
+ do_triangle( draw, v1, v2, v3 );
+}
+
/**
* Main entrypoint to draw some number of points/lines/triangles
@@ -251,8 +282,10 @@ draw_prim( struct draw_context *draw,
unsigned prim, unsigned start, unsigned count )
{
unsigned i;
+ boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL ||
+ draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL);
-// _mesa_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count );
+// debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count );
switch (prim) {
case PIPE_PRIM_POINTS:
@@ -288,24 +321,32 @@ draw_prim( struct draw_context *draw,
break;
case PIPE_PRIM_LINE_STRIP:
- if (count >= 2) {
- for (i = 1; i < count; i++) {
- do_line( draw,
- i == 1,
- start + i - 1,
- start + i );
- }
+ for (i = 1; i < count; i++) {
+ do_line( draw,
+ i == 1,
+ start + i - 1,
+ start + i );
}
break;
case PIPE_PRIM_TRIANGLES:
- for (i = 0; i+2 < count; i += 3) {
- do_ef_triangle( draw,
- 1,
- ~0,
+ if (unfilled) {
+ for (i = 0; i+2 < count; i += 3) {
+ do_ef_triangle( draw,
+ 1,
+ ~0,
+ start + i + 0,
+ start + i + 1,
+ start + i + 2 );
+ }
+ }
+ else {
+ for (i = 0; i+2 < count; i += 3) {
+ do_triangle( draw,
start + i + 0,
start + i + 1,
start + i + 2 );
+ }
}
break;
@@ -339,27 +380,49 @@ draw_prim( struct draw_context *draw,
case PIPE_PRIM_QUADS:
- for (i = 0; i+3 < count; i += 4) {
- do_quad( draw,
- start + i + 0,
- start + i + 1,
- start + i + 2,
- start + i + 3);
+ if (unfilled) {
+ for (i = 0; i+3 < count; i += 4) {
+ do_ef_quad( draw,
+ start + i + 0,
+ start + i + 1,
+ start + i + 2,
+ start + i + 3);
+ }
+ }
+ else {
+ for (i = 0; i+3 < count; i += 4) {
+ do_quad( draw,
+ start + i + 0,
+ start + i + 1,
+ start + i + 2,
+ start + i + 3);
+ }
}
break;
case PIPE_PRIM_QUAD_STRIP:
- for (i = 0; i+3 < count; i += 2) {
- do_quad( draw,
- start + i + 2,
- start + i + 0,
- start + i + 1,
- start + i + 3);
+ if (unfilled) {
+ for (i = 0; i+3 < count; i += 2) {
+ do_ef_quad( draw,
+ start + i + 2,
+ start + i + 0,
+ start + i + 1,
+ start + i + 3);
+ }
+ }
+ else {
+ for (i = 0; i+3 < count; i += 2) {
+ do_quad( draw,
+ start + i + 2,
+ start + i + 0,
+ start + i + 1,
+ start + i + 3);
+ }
}
break;
case PIPE_PRIM_POLYGON:
- if (count >= 3) {
+ if (unfilled) {
unsigned ef_mask = (1<<2) | (1<<0);
for (i = 0; i+2 < count; i++) {
@@ -377,6 +440,14 @@ draw_prim( struct draw_context *draw,
ef_mask &= ~(1<<2);
}
}
+ else {
+ for (i = 0; i+2 < count; i++) {
+ do_triangle( draw,
+ start + i + 1,
+ start + i + 2,
+ start + 0);
+ }
+ }
break;
default:
diff --git a/src/mesa/pipe/draw/draw_private.h b/src/mesa/pipe/draw/draw_private.h
index 1e59f5bd8d..7782db0477 100644
--- a/src/mesa/pipe/draw/draw_private.h
+++ b/src/mesa/pipe/draw/draw_private.h
@@ -141,6 +141,10 @@ struct draw_vertex_shader {
/* Internal function for vertex fetch.
*/
typedef void (*fetch_func)(const void *ptr, float *attrib);
+typedef void (*full_fetch_func)( struct draw_context *draw,
+ struct tgsi_exec_machine *machine,
+ const unsigned *elts,
+ unsigned count );
@@ -210,6 +214,7 @@ struct draw_context
unsigned pitch[PIPE_ATTRIB_MAX];
fetch_func fetch[PIPE_ATTRIB_MAX];
unsigned nr_attrs;
+ full_fetch_func fetch_func;
} vertex_fetch;
/* Post-tnl vertex cache:
@@ -235,6 +240,11 @@ struct draw_context
unsigned queue_nr;
} vs;
+ /**
+ * Run the vertex shader on all vertices in the vertex queue.
+ */
+ void (*shader_queue_flush)(struct draw_context *draw);
+
/* Prim pipeline queue:
*/
struct {
@@ -249,6 +259,8 @@ struct draw_context
#ifdef MESA_LLVM
struct gallivm_cpu_engine *engine;
#endif
+
+ void *driver_private;
};
@@ -287,10 +299,6 @@ extern void draw_vertex_shader_queue_flush_llvm( struct draw_context *draw );
struct tgsi_exec_machine;
extern void draw_update_vertex_fetch( struct draw_context *draw );
-extern void draw_vertex_fetch( struct draw_context *draw,
- struct tgsi_exec_machine *machine,
- const unsigned *elts,
- unsigned count );
#define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */
diff --git a/src/mesa/pipe/draw/draw_validate.c b/src/mesa/pipe/draw/draw_validate.c
index 86d5a5f814..4375ebabbc 100644
--- a/src/mesa/pipe/draw/draw_validate.c
+++ b/src/mesa/pipe/draw/draw_validate.c
@@ -78,6 +78,11 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
precalc_flat = 1; /* only needed for triangles really */
need_det = 1;
}
+
+ if (draw->rasterizer->flatshade && precalc_flat) {
+ draw->pipeline.flatshade->next = next;
+ next = draw->pipeline.flatshade;
+ }
if (draw->rasterizer->offset_cw ||
draw->rasterizer->offset_ccw) {
@@ -110,13 +115,8 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage )
{
draw->pipeline.clip->next = next;
next = draw->pipeline.clip;
- precalc_flat = 1; /* XXX: FIX ME! Only needed for clipped prims */
}
- if (draw->rasterizer->flatshade && precalc_flat) {
- draw->pipeline.flatshade->next = next;
- next = draw->pipeline.flatshade;
- }
draw->pipeline.first = next;
return next;
diff --git a/src/mesa/pipe/draw/draw_vbuf.c b/src/mesa/pipe/draw/draw_vbuf.c
index 1e260c6156..be96c8fdeb 100644
--- a/src/mesa/pipe/draw/draw_vbuf.c
+++ b/src/mesa/pipe/draw/draw_vbuf.c
@@ -34,13 +34,14 @@
*/
-#include <assert.h>
-
-#include "pipe/draw/draw_vbuf.h"
-#include "pipe/draw/draw_private.h"
-#include "pipe/draw/draw_vertex.h"
+#include "pipe/p_debug.h"
#include "pipe/p_util.h"
+#include "draw_vbuf.h"
+#include "draw_private.h"
+#include "draw_vertex.h"
+#include "draw_vf.h"
+
/**
* Vertex buffer emit stage.
@@ -55,6 +56,8 @@ struct vbuf_stage {
/** Vertex size in bytes */
unsigned vertex_size;
+ struct draw_vertex_fetch *vf;
+
/* FIXME: we have no guarantee that 'unsigned' is 32bit */
/** Vertices in hardware format */
@@ -110,88 +113,175 @@ check_space( struct vbuf_stage *vbuf, unsigned nr )
}
-/**
- * Extract the needed fields from post-transformed vertex and emit
- * a hardware(driver) vertex.
- * Recall that the vertices are constructed by the 'draw' module and
- * have a couple of slots at the beginning (1-dword header, 4-dword
- * clip pos) that we ignore here. We only use the vertex->data[] fields.
- */
-static INLINE void
-emit_vertex( struct vbuf_stage *vbuf,
- struct vertex_header *vertex )
+#if 0
+static INLINE void
+dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data)
{
- const struct vertex_info *vinfo = vbuf->vinfo;
-
- uint i;
- uint count = 0; /* for debug/sanity */
-
assert(vinfo == vbuf->render->get_vertex_info(vbuf->render));
-
-// fprintf(stderr, "emit vertex %d to %p\n",
-// vbuf->nr_vertices, vbuf->vertex_ptr);
-
- if(vertex->vertex_id != UNDEFINED_VERTEX_ID) {
- if(vertex->vertex_id < vbuf->nr_vertices)
- return;
- else
- fprintf(stderr, "Bad vertex id 0x%04x (>= 0x%04x)\n",
- vertex->vertex_id, vbuf->nr_vertices);
- return;
- }
-
- vertex->vertex_id = vbuf->nr_vertices++;
+ unsigned i, j, k;
for (i = 0; i < vinfo->num_attribs; i++) {
- uint j = vinfo->src_index[i];
+ j = vinfo->src_index[i];
switch (vinfo->emit[i]) {
case EMIT_OMIT:
- /* no-op */
+ debug_printf("EMIT_OMIT:");
break;
case EMIT_ALL:
- /* just copy the whole vertex as-is to the vbuf */
assert(i == 0);
- memcpy(vbuf->vertex_ptr, vertex, vinfo->size * 4);
- vbuf->vertex_ptr += vinfo->size;
- return;
+ assert(j == 0);
+ debug_printf("EMIT_ALL:\t");
+ for(k = 0; k < vinfo->size*4; ++k)
+ debug_printf("%02x ", *data++);
+ break;
case EMIT_1F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- count++;
+ debug_printf("EMIT_1F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
break;
case EMIT_1F_PSIZE:
- *vbuf->vertex_ptr++ = fui(vbuf->stage.draw->rasterizer->point_size);
- count++;
+ debug_printf("EMIT_1F_PSIZE:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
break;
case EMIT_2F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
- count += 2;
+ debug_printf("EMIT_2F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
break;
case EMIT_3F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][2]);
- count += 3;
+ debug_printf("EMIT_3F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ data += sizeof(float);
break;
case EMIT_4F:
- *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][2]);
- *vbuf->vertex_ptr++ = fui(vertex->data[j][3]);
- count += 4;
+ debug_printf("EMIT_4F:\t");
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
+ debug_printf("%f ", *(float *)data); data += sizeof(float);
break;
case EMIT_4UB:
- *vbuf->vertex_ptr++ = pack_ub4(float_to_ubyte( vertex->data[j][2] ),
- float_to_ubyte( vertex->data[j][1] ),
- float_to_ubyte( vertex->data[j][0] ),
- float_to_ubyte( vertex->data[j][3] ));
- count += 1;
+ debug_printf("EMIT_4UB:\t");
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
+ debug_printf("%u ", *data++);
break;
default:
assert(0);
}
+ debug_printf("\n");
+ }
+ debug_printf("\n");
+}
+#endif
+
+
+/**
+ * Extract the needed fields from post-transformed vertex and emit
+ * a hardware(driver) vertex.
+ * Recall that the vertices are constructed by the 'draw' module and
+ * have a couple of slots at the beginning (1-dword header, 4-dword
+ * clip pos) that we ignore here. We only use the vertex->data[] fields.
+ */
+static INLINE void
+emit_vertex( struct vbuf_stage *vbuf,
+ struct vertex_header *vertex )
+{
+#if 0
+ debug_printf("emit vertex %d to %p\n",
+ vbuf->nr_vertices, vbuf->vertex_ptr);
+#endif
+
+ if(vertex->vertex_id != UNDEFINED_VERTEX_ID) {
+ if(vertex->vertex_id < vbuf->nr_vertices)
+ return;
+ else
+ debug_printf("Bad vertex id 0x%04x (>= 0x%04x)\n",
+ vertex->vertex_id, vbuf->nr_vertices);
+ return;
+ }
+
+ vertex->vertex_id = vbuf->nr_vertices++;
+
+ if(!vbuf->vf) {
+ const struct vertex_info *vinfo = vbuf->vinfo;
+ uint i;
+ uint count = 0; /* for debug/sanity */
+
+ assert(vinfo == vbuf->render->get_vertex_info(vbuf->render));
+
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ uint j = vinfo->src_index[i];
+ switch (vinfo->emit[i]) {
+ case EMIT_OMIT:
+ /* no-op */
+ break;
+ case EMIT_ALL:
+ /* just copy the whole vertex as-is to the vbuf */
+ assert(i == 0);
+ assert(j == 0);
+ memcpy(vbuf->vertex_ptr, vertex, vinfo->size * 4);
+ vbuf->vertex_ptr += vinfo->size;
+ count += vinfo->size;
+ break;
+ case EMIT_1F:
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
+ count++;
+ break;
+ case EMIT_1F_PSIZE:
+ *vbuf->vertex_ptr++ = fui(vbuf->stage.draw->rasterizer->point_size);
+ count++;
+ break;
+ case EMIT_2F:
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
+ count += 2;
+ break;
+ case EMIT_3F:
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][2]);
+ count += 3;
+ break;
+ case EMIT_4F:
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][0]);
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][1]);
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][2]);
+ *vbuf->vertex_ptr++ = fui(vertex->data[j][3]);
+ count += 4;
+ break;
+ case EMIT_4UB:
+ *vbuf->vertex_ptr++ = pack_ub4(float_to_ubyte( vertex->data[j][2] ),
+ float_to_ubyte( vertex->data[j][1] ),
+ float_to_ubyte( vertex->data[j][0] ),
+ float_to_ubyte( vertex->data[j][3] ));
+ count += 1;
+ break;
+ default:
+ assert(0);
+ }
+ }
+ assert(count == vinfo->size);
+#if 0
+ {
+ static float data[256];
+ draw_vf_emit_vertex(vbuf->vf, vertex, data);
+ if(memcmp((uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size, data, vbuf->vertex_size)) {
+ debug_printf("With VF:\n");
+ dump_emitted_vertex(vbuf->vinfo, (uint8_t *)data);
+ debug_printf("Without VF:\n");
+ dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size);
+ assert(0);
+ }
+ }
+#endif
+ }
+ else {
+ draw_vf_emit_vertex(vbuf->vf, vertex, vbuf->vertex_ptr);
+
+ vbuf->vertex_ptr += vbuf->vertex_size/4;
}
- assert(count == vinfo->size);
}
@@ -269,6 +359,10 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint newprim )
vbuf->vinfo = vinfo;
vbuf->vertex_size = vertex_size;
+ if(vbuf->vf)
+ draw_vf_set_vertex_info(vbuf->vf,
+ vbuf->vinfo,
+ vbuf->stage.draw->rasterizer->point_size);
if (!vbuf->vertices)
vbuf_alloc_vertices(vbuf);
@@ -423,7 +517,12 @@ static void vbuf_destroy( struct draw_stage *stage )
{
struct vbuf_stage *vbuf = vbuf_stage( stage );
- align_free( vbuf->indices );
+ if(vbuf->indices)
+ align_free( vbuf->indices );
+
+ if(vbuf->vf)
+ draw_vf_destroy( vbuf->vf );
+
FREE( stage );
}
@@ -436,6 +535,9 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
{
struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage);
+ if(!vbuf)
+ return NULL;
+
vbuf->stage.draw = draw;
vbuf->stage.point = vbuf_first_point;
vbuf->stage.line = vbuf_first_line;
@@ -450,11 +552,16 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw,
vbuf->max_indices = render->max_indices;
vbuf->indices = (ushort *)
align_malloc( vbuf->max_indices * sizeof(vbuf->indices[0]), 16 );
+ if(!vbuf->indices)
+ vbuf_destroy(&vbuf->stage);
vbuf->vertices = NULL;
vbuf->vertex_ptr = vbuf->vertices;
vbuf->prim = ~0;
+ if(!GETENV("GALLIUM_NOVF"))
+ vbuf->vf = draw_vf_create();
+
return &vbuf->stage;
}
diff --git a/src/mesa/pipe/draw/draw_vertex_fetch.c b/src/mesa/pipe/draw/draw_vertex_fetch.c
index fb64723a19..e13df04605 100644
--- a/src/mesa/pipe/draw/draw_vertex_fetch.c
+++ b/src/mesa/pipe/draw/draw_vertex_fetch.c
@@ -62,50 +62,244 @@ fetch_##NAME(const void *ptr, float *attrib) \
} \
}
+#define CVT_64_FLOAT (float) ((double *) ptr)[i]
#define CVT_32_FLOAT ((float *) ptr)[i]
+
+#define CVT_8_USCALED (float) ((unsigned char *) ptr)[i]
+#define CVT_16_USCALED (float) ((unsigned short *) ptr)[i]
+#define CVT_32_USCALED (float) ((unsigned int *) ptr)[i]
+
+#define CVT_8_SSCALED (float) ((char *) ptr)[i]
+#define CVT_16_SSCALED (float) ((short *) ptr)[i]
#define CVT_32_SSCALED (float) ((int *) ptr)[i]
+
#define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f
+#define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f
+#define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f
+
+#define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f
+#define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f
+#define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f
+
+FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT )
+FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT )
+FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT )
+FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT )
FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT )
FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT )
FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT )
FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT )
+
+FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED )
+FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED )
+FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED )
+FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED )
+
FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED )
FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED )
FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED )
FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED )
+
+FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM )
+FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM )
+FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM )
+FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM )
+
+FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM )
+FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM )
+FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM )
+FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM )
+
+FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED )
+FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED )
+FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED )
+FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED )
+
+FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED )
+FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED )
+FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED )
+FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED )
+
+FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM )
+FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM )
+FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM )
+FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM )
+
+FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM )
+FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM )
+FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM )
+FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM )
+
+FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED )
+FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED )
+FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED )
+FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED )
+
+FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED )
+FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED )
+FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED )
+FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED )
+
+FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
+FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM )
+FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM )
+FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM )
+
+FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM )
+FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM )
+FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM )
+FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM )
+
FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM )
-FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
+//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM )
static fetch_func get_fetch_func( enum pipe_format format )
{
+#if 0
+ {
+ char tmp[80];
+ pf_sprint_name(tmp, format);
+ debug_printf("%s: %s\n", __FUNCTION__, tmp);
+ }
+#endif
+
switch (format) {
- case PIPE_FORMAT_R32G32B32A32_FLOAT:
- return fetch_R32G32B32A32_FLOAT;
- case PIPE_FORMAT_R32G32B32_FLOAT:
- return fetch_R32G32B32_FLOAT;
- case PIPE_FORMAT_R32G32_FLOAT:
- return fetch_R32G32_FLOAT;
+ case PIPE_FORMAT_R64_FLOAT:
+ return fetch_R64_FLOAT;
+ case PIPE_FORMAT_R64G64_FLOAT:
+ return fetch_R64G64_FLOAT;
+ case PIPE_FORMAT_R64G64B64_FLOAT:
+ return fetch_R64G64B64_FLOAT;
+ case PIPE_FORMAT_R64G64B64A64_FLOAT:
+ return fetch_R64G64B64A64_FLOAT;
+
case PIPE_FORMAT_R32_FLOAT:
return fetch_R32_FLOAT;
- case PIPE_FORMAT_R32G32B32A32_SSCALED:
- return fetch_R32G32B32A32_SSCALED;
- case PIPE_FORMAT_R32G32B32_SSCALED:
- return fetch_R32G32B32_SSCALED;
- case PIPE_FORMAT_R32G32_SSCALED:
- return fetch_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32_FLOAT:
+ return fetch_R32G32_FLOAT;
+ case PIPE_FORMAT_R32G32B32_FLOAT:
+ return fetch_R32G32B32_FLOAT;
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return fetch_R32G32B32A32_FLOAT;
+
+ case PIPE_FORMAT_R32_UNORM:
+ return fetch_R32_UNORM;
+ case PIPE_FORMAT_R32G32_UNORM:
+ return fetch_R32G32_UNORM;
+ case PIPE_FORMAT_R32G32B32_UNORM:
+ return fetch_R32G32B32_UNORM;
+ case PIPE_FORMAT_R32G32B32A32_UNORM:
+ return fetch_R32G32B32A32_UNORM;
+
+ case PIPE_FORMAT_R32_USCALED:
+ return fetch_R32_USCALED;
+ case PIPE_FORMAT_R32G32_USCALED:
+ return fetch_R32G32_USCALED;
+ case PIPE_FORMAT_R32G32B32_USCALED:
+ return fetch_R32G32B32_USCALED;
+ case PIPE_FORMAT_R32G32B32A32_USCALED:
+ return fetch_R32G32B32A32_USCALED;
+
+ case PIPE_FORMAT_R32_SNORM:
+ return fetch_R32_SNORM;
+ case PIPE_FORMAT_R32G32_SNORM:
+ return fetch_R32G32_SNORM;
+ case PIPE_FORMAT_R32G32B32_SNORM:
+ return fetch_R32G32B32_SNORM;
+ case PIPE_FORMAT_R32G32B32A32_SNORM:
+ return fetch_R32G32B32A32_SNORM;
+
case PIPE_FORMAT_R32_SSCALED:
return fetch_R32_SSCALED;
- case PIPE_FORMAT_A8R8G8B8_UNORM:
- return fetch_A8R8G8B8_UNORM;
+ case PIPE_FORMAT_R32G32_SSCALED:
+ return fetch_R32G32_SSCALED;
+ case PIPE_FORMAT_R32G32B32_SSCALED:
+ return fetch_R32G32B32_SSCALED;
+ case PIPE_FORMAT_R32G32B32A32_SSCALED:
+ return fetch_R32G32B32A32_SSCALED;
+
+ case PIPE_FORMAT_R16_UNORM:
+ return fetch_R16_UNORM;
+ case PIPE_FORMAT_R16G16_UNORM:
+ return fetch_R16G16_UNORM;
+ case PIPE_FORMAT_R16G16B16_UNORM:
+ return fetch_R16G16B16_UNORM;
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ return fetch_R16G16B16A16_UNORM;
+
+ case PIPE_FORMAT_R16_USCALED:
+ return fetch_R16_USCALED;
+ case PIPE_FORMAT_R16G16_USCALED:
+ return fetch_R16G16_USCALED;
+ case PIPE_FORMAT_R16G16B16_USCALED:
+ return fetch_R16G16B16_USCALED;
+ case PIPE_FORMAT_R16G16B16A16_USCALED:
+ return fetch_R16G16B16A16_USCALED;
+
+ case PIPE_FORMAT_R16_SNORM:
+ return fetch_R16_SNORM;
+ case PIPE_FORMAT_R16G16_SNORM:
+ return fetch_R16G16_SNORM;
+ case PIPE_FORMAT_R16G16B16_SNORM:
+ return fetch_R16G16B16_SNORM;
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ return fetch_R16G16B16A16_SNORM;
+
+ case PIPE_FORMAT_R16_SSCALED:
+ return fetch_R16_SSCALED;
+ case PIPE_FORMAT_R16G16_SSCALED:
+ return fetch_R16G16_SSCALED;
+ case PIPE_FORMAT_R16G16B16_SSCALED:
+ return fetch_R16G16B16_SSCALED;
+ case PIPE_FORMAT_R16G16B16A16_SSCALED:
+ return fetch_R16G16B16A16_SSCALED;
+
+ case PIPE_FORMAT_R8_UNORM:
+ return fetch_R8_UNORM;
+ case PIPE_FORMAT_R8G8_UNORM:
+ return fetch_R8G8_UNORM;
+ case PIPE_FORMAT_R8G8B8_UNORM:
+ return fetch_R8G8B8_UNORM;
case PIPE_FORMAT_R8G8B8A8_UNORM:
return fetch_R8G8B8A8_UNORM;
+
+ case PIPE_FORMAT_R8_USCALED:
+ return fetch_R8_USCALED;
+ case PIPE_FORMAT_R8G8_USCALED:
+ return fetch_R8G8_USCALED;
+ case PIPE_FORMAT_R8G8B8_USCALED:
+ return fetch_R8G8B8_USCALED;
+ case PIPE_FORMAT_R8G8B8A8_USCALED:
+ return fetch_R8G8B8A8_USCALED;
+
+ case PIPE_FORMAT_R8_SNORM:
+ return fetch_R8_SNORM;
+ case PIPE_FORMAT_R8G8_SNORM:
+ return fetch_R8G8_SNORM;
+ case PIPE_FORMAT_R8G8B8_SNORM:
+ return fetch_R8G8B8_SNORM;
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ return fetch_R8G8B8A8_SNORM;
+
+ case PIPE_FORMAT_R8_SSCALED:
+ return fetch_R8_SSCALED;
+ case PIPE_FORMAT_R8G8_SSCALED:
+ return fetch_R8G8_SSCALED;
+ case PIPE_FORMAT_R8G8B8_SSCALED:
+ return fetch_R8G8B8_SSCALED;
+ case PIPE_FORMAT_R8G8B8A8_SSCALED:
+ return fetch_R8G8B8A8_SSCALED;
+
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ return fetch_A8R8G8B8_UNORM;
+
case 0:
- return NULL;
+ return NULL; /* not sure why this is needed */
+
default:
- /* Lots of missing cases! */
assert(0);
return NULL;
}
@@ -126,47 +320,108 @@ transpose_4x4( float *out, const float *in )
}
-
-void draw_update_vertex_fetch( struct draw_context *draw )
+
+static void fetch_xyz_rgb( struct draw_context *draw,
+ struct tgsi_exec_machine *machine,
+ const unsigned *elts,
+ unsigned count )
{
- unsigned nr_attrs, i;
+ const unsigned *pitch = draw->vertex_fetch.pitch;
+ const ubyte **src = draw->vertex_fetch.src_ptr;
+ int i;
- /* this may happend during context init */
- if (!draw->vertex_shader)
- return;
+ assert(count <= 4);
- nr_attrs = draw->vertex_shader->state->num_inputs;
+// debug_printf("%s\n", __FUNCTION__);
- for (i = 0; i < nr_attrs; i++) {
- unsigned buf = draw->vertex_element[i].vertex_buffer_index;
- enum pipe_format format = draw->vertex_element[i].src_format;
+ /* loop over vertex attributes (vertex shader inputs)
+ */
- draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] +
- draw->vertex_buffer[buf].buffer_offset +
- draw->vertex_element[i].src_offset;
+ for (i = 0; i < 4; i++) {
+ {
+ const float *in = (const float *)(src[0] + elts[i] * pitch[0]);
+ float *out = &machine->Inputs[0].xyzw[0].f[i];
+ out[0] = in[0];
+ out[4] = in[1];
+ out[8] = in[2];
+ out[12] = 1.0f;
+ }
- draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch;
- draw->vertex_fetch.fetch[i] = get_fetch_func( format );
+ {
+ const float *in = (const float *)(src[1] + elts[i] * pitch[1]);
+ float *out = &machine->Inputs[1].xyzw[0].f[i];
+ out[0] = in[0];
+ out[4] = in[1];
+ out[8] = in[2];
+ out[12] = 1.0f;
+ }
}
+}
- draw->vertex_fetch.nr_attrs = nr_attrs;
+
+
+
+static void fetch_xyz_rgb_st( struct draw_context *draw,
+ struct tgsi_exec_machine *machine,
+ const unsigned *elts,
+ unsigned count )
+{
+ const unsigned *pitch = draw->vertex_fetch.pitch;
+ const ubyte **src = draw->vertex_fetch.src_ptr;
+ int i;
+
+ assert(count <= 4);
+
+ /* loop over vertex attributes (vertex shader inputs)
+ */
+
+ for (i = 0; i < 4; i++) {
+ {
+ const float *in = (const float *)(src[0] + elts[i] * pitch[0]);
+ float *out = &machine->Inputs[0].xyzw[0].f[i];
+ out[0] = in[0];
+ out[4] = in[1];
+ out[8] = in[2];
+ out[12] = 1.0f;
+ }
+
+ {
+ const float *in = (const float *)(src[1] + elts[i] * pitch[1]);
+ float *out = &machine->Inputs[1].xyzw[0].f[i];
+ out[0] = in[0];
+ out[4] = in[1];
+ out[8] = in[2];
+ out[12] = 1.0f;
+ }
+
+ {
+ const float *in = (const float *)(src[2] + elts[i] * pitch[2]);
+ float *out = &machine->Inputs[2].xyzw[0].f[i];
+ out[0] = in[0];
+ out[4] = in[1];
+ out[8] = 0.0f;
+ out[12] = 1.0f;
+ }
+ }
}
+
+
/**
* Fetch vertex attributes for 'count' vertices.
*/
-void draw_vertex_fetch( struct draw_context *draw,
- struct tgsi_exec_machine *machine,
- const unsigned *elts,
- unsigned count )
+static void generic_vertex_fetch( struct draw_context *draw,
+ struct tgsi_exec_machine *machine,
+ const unsigned *elts,
+ unsigned count )
{
unsigned nr_attrs = draw->vertex_fetch.nr_attrs;
unsigned attr;
assert(count <= 4);
-// _mesa_printf("%s %d\n", __FUNCTION__, count);
+// debug_printf("%s %d\n", __FUNCTION__, count);
/* loop over vertex attributes (vertex shader inputs)
*/
@@ -206,3 +461,50 @@ void draw_vertex_fetch( struct draw_context *draw,
}
}
+
+
+void draw_update_vertex_fetch( struct draw_context *draw )
+{
+ unsigned nr_attrs, i;
+
+// debug_printf("%s\n", __FUNCTION__);
+
+ /* this may happend during context init */
+ if (!draw->vertex_shader)
+ return;
+
+ nr_attrs = draw->vertex_shader->state->num_inputs;
+
+ for (i = 0; i < nr_attrs; i++) {
+ unsigned buf = draw->vertex_element[i].vertex_buffer_index;
+ enum pipe_format format = draw->vertex_element[i].src_format;
+
+ draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] +
+ draw->vertex_buffer[buf].buffer_offset +
+ draw->vertex_element[i].src_offset;
+
+ draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch;
+ draw->vertex_fetch.fetch[i] = get_fetch_func( format );
+ }
+
+ draw->vertex_fetch.nr_attrs = nr_attrs;
+
+ draw->vertex_fetch.fetch_func = generic_vertex_fetch;
+
+ switch (nr_attrs) {
+ case 2:
+ if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
+ draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT)
+ draw->vertex_fetch.fetch_func = fetch_xyz_rgb;
+ break;
+ case 3:
+ if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
+ draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT &&
+ draw->vertex_element[2].src_format == PIPE_FORMAT_R32G32_FLOAT)
+ draw->vertex_fetch.fetch_func = fetch_xyz_rgb_st;
+ break;
+ default:
+ break;
+ }
+
+}
diff --git a/src/mesa/pipe/draw/draw_vertex_shader.c b/src/mesa/pipe/draw/draw_vertex_shader.c
index 3041974b9a..5ca93aa615 100644
--- a/src/mesa/pipe/draw/draw_vertex_shader.c
+++ b/src/mesa/pipe/draw/draw_vertex_shader.c
@@ -110,7 +110,7 @@ run_vertex_program(struct draw_context *draw,
machine->Inputs = ALIGN16_ASSIGN(inputs);
machine->Outputs = ALIGN16_ASSIGN(outputs);
- draw_vertex_fetch( draw, machine, elts, count );
+ draw->vertex_fetch.fetch_func( draw, machine, elts, count );
/* run shader */
#if defined(__i386__) || defined(__386__)
@@ -121,11 +121,16 @@ run_vertex_program(struct draw_context *draw,
= (struct draw_vertex_shader *)draw->vertex_shader;
codegen_function func
= (codegen_function) x86_get_func( &shader->sse2_program );
- func(
- machine->Inputs,
- machine->Outputs,
- machine->Consts,
- machine->Temps );
+
+ if (func)
+ func(
+ machine->Inputs,
+ machine->Outputs,
+ machine->Consts,
+ machine->Temps );
+ else
+ /* interpreter */
+ tgsi_exec_machine_run( machine );
}
else
#endif
@@ -166,7 +171,7 @@ run_vertex_program(struct draw_context *draw,
vOut[j]->data[0][3] = w;
#if DBG_VS
- printf("output[%d]win: %f %f %f %f\n", j,
+ debug_printf("output[%d]win: %f %f %f %f\n", j,
vOut[j]->data[0][0],
vOut[j]->data[0][1],
vOut[j]->data[0][2],
@@ -181,7 +186,7 @@ run_vertex_program(struct draw_context *draw,
vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
#if DBG_VS
- printf("output[%d][%d]: %f %f %f %f\n", j, slot,
+ debug_printf("output[%d][%d]: %f %f %f %f\n", j, slot,
vOut[j]->data[slot][0],
vOut[j]->data[slot][1],
vOut[j]->data[slot][2],
@@ -199,13 +204,15 @@ run_vertex_program(struct draw_context *draw,
void
draw_vertex_shader_queue_flush(struct draw_context *draw)
{
- unsigned i, j;
+ unsigned i;
+
+ assert(draw->vs.queue_nr != 0);
/* XXX: do this on statechange:
*/
draw_update_vertex_fetch( draw );
-// fprintf(stderr, " q(%d) ", draw->vs.queue_nr );
+// debug_printf( " q(%d) ", draw->vs.queue_nr );
#ifdef MESA_LLVM
if (draw->vertex_shader->llvm_prog) {
draw_vertex_shader_queue_flush_llvm(draw);
@@ -217,14 +224,18 @@ draw_vertex_shader_queue_flush(struct draw_context *draw)
for (i = 0; i < draw->vs.queue_nr; i += 4) {
struct vertex_header *dests[4];
unsigned elts[4];
- int n;
+ int j, n = MIN2(4, draw->vs.queue_nr - i);
- for (j = 0; j < 4; j++) {
+ for (j = 0; j < n; j++) {
elts[j] = draw->vs.queue[i + j].elt;
dests[j] = draw->vs.queue[i + j].dest;
}
- n = MIN2(4, draw->vs.queue_nr - i);
+ for ( ; j < 4; j++) {
+ elts[j] = elts[0];
+ dests[j] = dests[0];
+ }
+
assert(n > 0);
assert(n <= 4);
@@ -263,7 +274,12 @@ draw_create_vertex_shader(struct draw_context *draw,
struct pipe_shader_state *sh = (struct pipe_shader_state *) shader;
x86_init_func( &vs->sse2_program );
- tgsi_emit_sse2( (struct tgsi_token *) sh->tokens, &vs->sse2_program );
+ if (!tgsi_emit_sse2( (struct tgsi_token *) sh->tokens,
+ &vs->sse2_program )) {
+ x86_release_func( (struct x86_function *) &vs->sse2_program );
+ fprintf(stdout /*err*/,
+ "tgsi_emit_sse2() failed, falling back to interpreter\n");
+ }
}
#endif
diff --git a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c b/src/mesa/pipe/draw/draw_vertex_shader_llvm.c
index 4228c4f388..63551c993e 100644
--- a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c
+++ b/src/mesa/pipe/draw/draw_vertex_shader_llvm.c
@@ -152,7 +152,7 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)
z = vOut->clip[2] = dests[0][2];
w = vOut->clip[3] = dests[0][3];
#if DBG
- printf("output %d: %f %f %f %f\n", 0, x, y, z, w);
+ debug_printf("output %d: %f %f %f %f\n", 0, x, y, z, w);
#endif
vOut->clipmask = compute_clipmask(vOut->clip, draw->plane, draw->nr_planes);
@@ -179,7 +179,7 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw)
vOut->data[slot][3] = dests[slot][3];
#if DBG
- printf("output %d: %f %f %f %f\n", slot,
+ debug_printf("output %d: %f %f %f %f\n", slot,
vOut->data[slot][0],
vOut->data[slot][1],
vOut->data[slot][2],
diff --git a/src/mesa/pipe/draw/draw_vf.c b/src/mesa/pipe/draw/draw_vf.c
new file mode 100644
index 0000000000..f23d7fcec5
--- /dev/null
+++ b/src/mesa/pipe/draw/draw_vf.c
@@ -0,0 +1,428 @@
+/*
+ * Copyright 2003 Tungsten Graphics, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Whitwell <keithw@tungstengraphics.com>
+ */
+
+
+#include <stddef.h>
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_util.h"
+
+#include "draw_vf.h"
+
+
+#define DRAW_VF_DBG 0
+
+
+/* TODO: remove this */
+extern void
+_mesa_exec_free( void *addr );
+
+
+static boolean match_fastpath( struct draw_vertex_fetch *vf,
+ const struct draw_vf_fastpath *fp)
+{
+ unsigned j;
+
+ if (vf->attr_count != fp->attr_count)
+ return FALSE;
+
+ for (j = 0; j < vf->attr_count; j++)
+ if (vf->attr[j].format != fp->attr[j].format ||
+ vf->attr[j].inputsize != fp->attr[j].size ||
+ vf->attr[j].vertoffset != fp->attr[j].offset)
+ return FALSE;
+
+ if (fp->match_strides) {
+ if (vf->vertex_stride != fp->vertex_stride)
+ return FALSE;
+
+ for (j = 0; j < vf->attr_count; j++)
+ if (vf->attr[j].inputstride != fp->attr[j].stride)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static boolean search_fastpath_emit( struct draw_vertex_fetch *vf )
+{
+ struct draw_vf_fastpath *fp = vf->fastpath;
+
+ for ( ; fp ; fp = fp->next) {
+ if (match_fastpath(vf, fp)) {
+ vf->emit = fp->func;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+void draw_vf_register_fastpath( struct draw_vertex_fetch *vf,
+ boolean match_strides )
+{
+ struct draw_vf_fastpath *fastpath = CALLOC_STRUCT(draw_vf_fastpath);
+ unsigned i;
+
+ fastpath->vertex_stride = vf->vertex_stride;
+ fastpath->attr_count = vf->attr_count;
+ fastpath->match_strides = match_strides;
+ fastpath->func = vf->emit;
+ fastpath->attr = (struct draw_vf_attr_type *)
+ MALLOC(vf->attr_count * sizeof(fastpath->attr[0]));
+
+ for (i = 0; i < vf->attr_count; i++) {
+ fastpath->attr[i].format = vf->attr[i].format;
+ fastpath->attr[i].stride = vf->attr[i].inputstride;
+ fastpath->attr[i].size = vf->attr[i].inputsize;
+ fastpath->attr[i].offset = vf->attr[i].vertoffset;
+ }
+
+ fastpath->next = vf->fastpath;
+ vf->fastpath = fastpath;
+}
+
+
+
+
+/***********************************************************************
+ * Build codegen functions or return generic ones:
+ */
+static void choose_emit_func( struct draw_vertex_fetch *vf,
+ unsigned count,
+ uint8_t *dest)
+{
+ vf->emit = NULL;
+
+ /* Does this match an existing (hardwired, codegen or known-bad)
+ * fastpath?
+ */
+ if (search_fastpath_emit(vf)) {
+ /* Use this result. If it is null, then it is already known
+ * that the current state will fail for codegen and there is no
+ * point trying again.
+ */
+ }
+ else if (vf->codegen_emit) {
+ vf->codegen_emit( vf );
+ }
+
+ if (!vf->emit) {
+ draw_vf_generate_hardwired_emit(vf);
+ }
+
+ /* Otherwise use the generic version:
+ */
+ if (!vf->emit)
+ vf->emit = draw_vf_generic_emit;
+
+ vf->emit( vf, count, dest );
+}
+
+
+
+
+
+/***********************************************************************
+ * Public entrypoints, mostly dispatch to the above:
+ */
+
+
+
+static unsigned
+draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf,
+ const struct draw_vf_attr_map *map,
+ unsigned nr,
+ unsigned vertex_stride )
+{
+ unsigned offset = 0;
+ unsigned i, j;
+
+ assert(nr < PIPE_ATTRIB_MAX);
+
+ for (j = 0, i = 0; i < nr; i++) {
+ const unsigned format = map[i].format;
+ if (format == DRAW_EMIT_PAD) {
+#if (DRAW_VF_DBG)
+ debug_printf("%d: pad %d, offset %d\n", i,
+ map[i].offset, offset);
+#endif
+
+ offset += map[i].offset;
+
+ }
+ else {
+ vf->attr[j].attrib = map[i].attrib;
+ vf->attr[j].format = format;
+ vf->attr[j].insert = draw_vf_format_info[format].insert;
+ vf->attr[j].vertattrsize = draw_vf_format_info[format].attrsize;
+ vf->attr[j].vertoffset = offset;
+ vf->attr[j].isconst = draw_vf_format_info[format].isconst;
+ if(vf->attr[j].isconst)
+ memcpy(vf->attr[j].data, &map[i].data, vf->attr[j].vertattrsize);
+
+#if (DRAW_VF_DBG)
+ debug_printf("%d: %s, offset %d\n", i,
+ draw_vf_format_info[format].name,
+ vf->attr[j].vertoffset);
+#endif
+
+ offset += draw_vf_format_info[format].attrsize;
+ j++;
+ }
+ }
+
+ vf->attr_count = j;
+ vf->vertex_stride = vertex_stride ? vertex_stride : offset;
+ vf->emit = choose_emit_func;
+
+ assert(vf->vertex_stride >= offset);
+ return vf->vertex_stride;
+}
+
+
+void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf,
+ const struct vertex_info *vinfo,
+ float point_size )
+{
+ unsigned i, j, k;
+ struct draw_vf_attr *a = vf->attr;
+ struct draw_vf_attr_map attrs[PIPE_MAX_SHADER_INPUTS];
+ unsigned count = 0; /* for debug/sanity */
+ unsigned nr_attrs = 0;
+
+ for (i = 0; i < vinfo->num_attribs; i++) {
+ j = vinfo->src_index[i];
+ switch (vinfo->emit[i]) {
+ case EMIT_OMIT:
+ /* no-op */
+ break;
+ case EMIT_ALL: {
+ /* just copy the whole vertex as-is to the vbuf */
+ unsigned s = vinfo->size;
+ assert(i == 0);
+ assert(j == 0);
+ /* copy the vertex header */
+ /* XXX: we actually don't copy the header, just pad it */
+ attrs[nr_attrs].attrib = 0;
+ attrs[nr_attrs].format = DRAW_EMIT_PAD;
+ attrs[nr_attrs].offset = offsetof(struct vertex_header, data);
+ s -= offsetof(struct vertex_header, data)/4;
+ count += offsetof(struct vertex_header, data)/4;
+ nr_attrs++;
+ /* copy the vertex data */
+ for(k = 0; k < (s & ~0x3); k += 4) {
+ attrs[nr_attrs].attrib = k/4;
+ attrs[nr_attrs].format = DRAW_EMIT_4F;
+ attrs[nr_attrs].offset = 0;
+ nr_attrs++;
+ count += 4;
+ }
+ /* tail */
+ /* XXX: actually, this shouldn't be needed */
+ attrs[nr_attrs].attrib = k/4;
+ attrs[nr_attrs].offset = 0;
+ switch(s & 0x3) {
+ case 0:
+ break;
+ case 1:
+ attrs[nr_attrs].format = DRAW_EMIT_1F;
+ nr_attrs++;
+ count += 1;
+ break;
+ case 2:
+ attrs[nr_attrs].format = DRAW_EMIT_2F;
+ nr_attrs++;
+ count += 2;
+ break;
+ case 3:
+ attrs[nr_attrs].format = DRAW_EMIT_3F;
+ nr_attrs++;
+ count += 3;
+ break;
+ }
+ break;
+ }
+ case EMIT_1F:
+ attrs[nr_attrs].attrib = j;
+ attrs[nr_attrs].format = DRAW_EMIT_1F;
+ attrs[nr_attrs].offset = 0;
+ nr_attrs++;
+ count++;
+ break;
+ case EMIT_1F_PSIZE:
+ attrs[nr_attrs].attrib = j;
+ attrs[nr_attrs].format = DRAW_EMIT_1F_CONST;
+ attrs[nr_attrs].offset = 0;
+ attrs[nr_attrs].data.f[0] = point_size;
+ nr_attrs++;
+ count++;
+ break;
+ case EMIT_2F:
+ attrs[nr_attrs].attrib = j;
+ attrs[nr_attrs].format = DRAW_EMIT_2F;
+ attrs[nr_attrs].offset = 0;
+ nr_attrs++;
+ count += 2;
+ break;
+ case EMIT_3F:
+ attrs[nr_attrs].attrib = j;
+ attrs[nr_attrs].format = DRAW_EMIT_3F;
+ attrs[nr_attrs].offset = 0;
+ nr_attrs++;
+ count += 3;
+ break;
+ case EMIT_4F:
+ attrs[nr_attrs].attrib = j;
+ attrs[nr_attrs].format = DRAW_EMIT_4F;
+ attrs[nr_attrs].offset = 0;
+ nr_attrs++;
+ count += 4;
+ break;
+ case EMIT_4UB:
+ attrs[nr_attrs].attrib = j;
+ attrs[nr_attrs].format = DRAW_EMIT_4UB_4F_BGRA;
+ attrs[nr_attrs].offset = 0;
+ nr_attrs++;
+ count += 1;
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ assert(count == vinfo->size);
+
+ draw_vf_set_vertex_attributes(vf,
+ attrs,
+ nr_attrs,
+ vinfo->size * sizeof(float) );
+
+ for (j = 0; j < vf->attr_count; j++) {
+ a[j].inputsize = 4;
+ a[j].do_insert = a[j].insert[4 - 1];
+ if(a[j].isconst) {
+ a[j].inputptr = a[j].data;
+ a[j].inputstride = 0;
+ }
+ }
+}
+
+
+#if 0
+/* Set attribute pointers, adjusted for start position:
+ */
+void draw_vf_set_sources( struct draw_vertex_fetch *vf,
+ GLvector4f * const sources[],
+ unsigned start )
+{
+ struct draw_vf_attr *a = vf->attr;
+ unsigned j;
+
+ for (j = 0; j < vf->attr_count; j++) {
+ const GLvector4f *vptr = sources[a[j].attrib];
+
+ if ((a[j].inputstride != vptr->stride) ||
+ (a[j].inputsize != vptr->size))
+ vf->emit = choose_emit_func;
+
+ a[j].inputstride = vptr->stride;
+ a[j].inputsize = vptr->size;
+ a[j].do_insert = a[j].insert[vptr->size - 1];
+ a[j].inputptr = ((uint8_t *)vptr->data) + start * vptr->stride;
+ }
+}
+#endif
+
+
+/**
+ * Emit a vertex to dest.
+ */
+void draw_vf_emit_vertex( struct draw_vertex_fetch *vf,
+ struct vertex_header *vertex,
+ void *dest )
+{
+ struct draw_vf_attr *a = vf->attr;
+ unsigned j;
+
+ for (j = 0; j < vf->attr_count; j++) {
+ if(!a[j].isconst) {
+ a[j].inputptr = (uint8_t *)&vertex->data[a[j].attrib][0];
+ a[j].inputstride = 0; /* XXX: one-vertex-max ATM */
+ }
+ }
+
+ vf->emit( vf, 1, (uint8_t*) dest );
+}
+
+
+
+struct draw_vertex_fetch *draw_vf_create( void )
+{
+ struct draw_vertex_fetch *vf = CALLOC_STRUCT(draw_vertex_fetch);
+ unsigned i;
+
+ for (i = 0; i < PIPE_ATTRIB_MAX; i++)
+ vf->attr[i].vf = vf;
+
+ vf->identity[0] = 0.0;
+ vf->identity[1] = 0.0;
+ vf->identity[2] = 0.0;
+ vf->identity[3] = 1.0;
+
+ vf->codegen_emit = NULL;
+
+#ifdef USE_SSE_ASM
+ if (!GETENV("GALLIUM_NO_CODEGEN"))
+ vf->codegen_emit = draw_vf_generate_sse_emit;
+#endif
+
+ return vf;
+}
+
+
+void draw_vf_destroy( struct draw_vertex_fetch *vf )
+{
+ struct draw_vf_fastpath *fp, *tmp;
+
+ for (fp = vf->fastpath ; fp ; fp = tmp) {
+ tmp = fp->next;
+ FREE(fp->attr);
+
+ /* KW: At the moment, fp->func is constrained to be allocated by
+ * _mesa_exec_alloc(), as the hardwired fastpaths in
+ * t_vertex_generic.c are handled specially. It would be nice
+ * to unify them, but this probably won't change until this
+ * module gets another overhaul.
+ */
+ //_mesa_exec_free((void *) fp->func);
+ FREE(fp);
+ }
+
+ vf->fastpath = NULL;
+ FREE(vf);
+}
diff --git a/src/mesa/pipe/draw/draw_vf.h b/src/mesa/pipe/draw/draw_vf.h
new file mode 100644
index 0000000000..e694b98675
--- /dev/null
+++ b/src/mesa/pipe/draw/draw_vf.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright 2003 Tungsten Graphics, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Whitwell <keithw@tungstengraphics.com>
+ */
+
+#ifndef DRAW_VF_H
+#define DRAW_VF_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
+#include "draw_vertex.h"
+#include "draw_private.h" // for vertex_header
+
+
+enum draw_vf_attr_format {
+ DRAW_EMIT_1F,
+ DRAW_EMIT_2F,
+ DRAW_EMIT_3F,
+ DRAW_EMIT_4F,
+ DRAW_EMIT_3F_XYW, /**< for projective texture */
+ DRAW_EMIT_1UB_1F, /**< for fog coordinate */
+ DRAW_EMIT_3UB_3F_RGB, /**< for specular color */
+ DRAW_EMIT_3UB_3F_BGR, /**< for specular color */
+ DRAW_EMIT_4UB_4F_RGBA, /**< for color */
+ DRAW_EMIT_4UB_4F_BGRA, /**< for color */
+ DRAW_EMIT_4UB_4F_ARGB, /**< for color */
+ DRAW_EMIT_4UB_4F_ABGR, /**< for color */
+ DRAW_EMIT_1F_CONST,
+ DRAW_EMIT_2F_CONST,
+ DRAW_EMIT_3F_CONST,
+ DRAW_EMIT_4F_CONST,
+ DRAW_EMIT_PAD, /**< leave a hole of 'offset' bytes */
+ DRAW_EMIT_MAX
+};
+
+struct draw_vf_attr_map
+{
+ /** Input attribute number */
+ unsigned attrib;
+
+ enum draw_vf_attr_format format;
+
+ unsigned offset;
+
+ /**
+ * Constant data for DRAW_EMIT_*_CONST
+ */
+ union {
+ uint8_t ub[4];
+ float f[4];
+ } data;
+};
+
+struct draw_vertex_fetch;
+
+
+
+#if 0
+unsigned
+draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf,
+ const struct draw_vf_attr_map *map,
+ unsigned nr,
+ unsigned vertex_stride );
+#endif
+
+void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf,
+ const struct vertex_info *vinfo,
+ float point_size );
+
+#if 0
+void
+draw_vf_set_sources( struct draw_vertex_fetch *vf,
+ GLvector4f * const attrib[],
+ unsigned start );
+#endif
+
+void
+draw_vf_emit_vertex( struct draw_vertex_fetch *vf,
+ struct vertex_header *vertex,
+ void *dest );
+
+struct draw_vertex_fetch *
+draw_vf_create( void );
+
+void
+draw_vf_destroy( struct draw_vertex_fetch *vf );
+
+
+
+/***********************************************************************
+ * Internal functions and structs:
+ */
+
+struct draw_vf_attr;
+
+typedef void (*draw_vf_extract_func)( const struct draw_vf_attr *a,
+ float *out,
+ const uint8_t *v );
+
+typedef void (*draw_vf_insert_func)( const struct draw_vf_attr *a,
+ uint8_t *v,
+ const float *in );
+
+typedef void (*draw_vf_emit_func)( struct draw_vertex_fetch *vf,
+ unsigned count,
+ uint8_t *dest );
+
+
+
+/**
+ * Describes how to convert/move a vertex attribute from a vertex
+ * array to a vertex structure.
+ */
+struct draw_vf_attr
+{
+ struct draw_vertex_fetch *vf;
+
+ unsigned format;
+ unsigned inputsize;
+ unsigned inputstride;
+ unsigned vertoffset; /**< position of the attrib in the vertex struct */
+
+ boolean isconst; /**< read from const data below */
+ uint8_t data[16];
+
+ unsigned attrib; /**< which vertex attrib (0=position, etc) */
+ unsigned vertattrsize; /**< size of the attribute in bytes */
+
+ uint8_t *inputptr;
+ const draw_vf_insert_func *insert;
+ draw_vf_insert_func do_insert;
+ draw_vf_extract_func extract;
+};
+
+struct draw_vertex_fetch
+{
+ struct draw_vf_attr attr[PIPE_ATTRIB_MAX];
+ unsigned attr_count;
+ unsigned vertex_stride;
+
+ draw_vf_emit_func emit;
+
+ /* Parameters and constants for codegen:
+ */
+ float identity[4];
+
+ struct draw_vf_fastpath *fastpath;
+
+ void (*codegen_emit)( struct draw_vertex_fetch *vf );
+};
+
+
+struct draw_vf_attr_type {
+ unsigned format;
+ unsigned size;
+ unsigned stride;
+ unsigned offset;
+};
+
+struct draw_vf_fastpath {
+ unsigned vertex_stride;
+ unsigned attr_count;
+ boolean match_strides;
+
+ struct draw_vf_attr_type *attr;
+
+ draw_vf_emit_func func;
+ struct draw_vf_fastpath *next;
+};
+
+
+void
+draw_vf_register_fastpath( struct draw_vertex_fetch *vtx,
+ boolean match_strides );
+
+void
+draw_vf_generic_emit( struct draw_vertex_fetch *vf,
+ unsigned count,
+ uint8_t *v );
+
+void
+draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf );
+
+void
+draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf );
+
+
+struct draw_vf_format_info {
+ const char *name;
+ draw_vf_insert_func insert[4];
+ const unsigned attrsize;
+ const boolean isconst;
+};
+
+extern const struct draw_vf_format_info
+draw_vf_format_info[DRAW_EMIT_MAX];
+
+
+#endif
diff --git a/src/mesa/pipe/draw/draw_vf_generic.c b/src/mesa/pipe/draw/draw_vf_generic.c
new file mode 100644
index 0000000000..7a60a9db9c
--- /dev/null
+++ b/src/mesa/pipe/draw/draw_vf_generic.c
@@ -0,0 +1,585 @@
+
+/*
+ * Copyright 2003 Tungsten Graphics, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Whitwell <keithw@tungstengraphics.com>
+ */
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_debug.h"
+#include "pipe/p_util.h"
+
+#include "draw_vf.h"
+
+
+
+static INLINE void insert_4f_4( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = in[3];
+}
+
+static INLINE void insert_4f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+ out[3] = 1;
+}
+
+static INLINE void insert_4f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = 0;
+ out[3] = 1;
+}
+
+static INLINE void insert_4f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = 0;
+ out[2] = 0;
+ out[3] = 1;
+}
+
+static INLINE void insert_3f_xyw_4( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[3];
+}
+
+static INLINE void insert_3f_xyw_err( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ (void) a; (void) v; (void) in;
+ assert(0);
+}
+
+static INLINE void insert_3f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = in[2];
+}
+
+static INLINE void insert_3f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = in[1];
+ out[2] = 0;
+}
+
+static INLINE void insert_3f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = 0;
+ out[2] = 0;
+}
+
+
+static INLINE void insert_2f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = in[1];
+}
+
+static INLINE void insert_2f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+ out[1] = 0;
+}
+
+static INLINE void insert_1f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ float *out = (float *)(v);
+ (void) a;
+
+ out[0] = in[0];
+}
+
+static INLINE void insert_null( const struct draw_vf_attr *a, uint8_t *v, const float *in )
+{
+ (void) a; (void) v; (void) in;
+}
+
+static INLINE void insert_4ub_4f_rgba_4( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]);
+}
+
+static INLINE void insert_4ub_4f_rgba_3( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
+ v[3] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_rgba_2( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ v[2] = 0;
+ v[3] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_rgba_1( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+ v[1] = 0;
+ v[2] = 0;
+ v[3] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_bgra_4( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]);
+}
+
+static INLINE void insert_4ub_4f_bgra_3( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
+ v[3] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_bgra_2( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ v[0] = 0;
+ v[3] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_bgra_1( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
+ v[1] = 0;
+ v[0] = 0;
+ v[3] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_argb_4( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]);
+}
+
+static INLINE void insert_4ub_4f_argb_3( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]);
+ v[0] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_argb_2( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
+ v[3] = 0x00;
+ v[0] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_argb_1( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]);
+ v[2] = 0x00;
+ v[3] = 0x00;
+ v[0] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_abgr_4( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]);
+}
+
+static INLINE void insert_4ub_4f_abgr_3( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]);
+ v[0] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_abgr_2( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]);
+ v[1] = 0x00;
+ v[0] = 0xff;
+}
+
+static INLINE void insert_4ub_4f_abgr_1( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]);
+ v[2] = 0x00;
+ v[1] = 0x00;
+ v[0] = 0xff;
+}
+
+static INLINE void insert_3ub_3f_rgb_3( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]);
+}
+
+static INLINE void insert_3ub_3f_rgb_2( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ v[2] = 0;
+}
+
+static INLINE void insert_3ub_3f_rgb_1( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+ v[1] = 0;
+ v[2] = 0;
+}
+
+static INLINE void insert_3ub_3f_bgr_3( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]);
+}
+
+static INLINE void insert_3ub_3f_bgr_2( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
+ UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]);
+ v[0] = 0;
+}
+
+static INLINE void insert_3ub_3f_bgr_1( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]);
+ v[1] = 0;
+ v[0] = 0;
+}
+
+
+static INLINE void insert_1ub_1f_1( const struct draw_vf_attr *a, uint8_t *v,
+ const float *in )
+{
+ (void) a;
+ UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]);
+}
+
+
+const struct draw_vf_format_info draw_vf_format_info[DRAW_EMIT_MAX] =
+{
+ { "1f",
+ { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 },
+ sizeof(float), FALSE },
+
+ { "2f",
+ { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 },
+ 2 * sizeof(float), FALSE },
+
+ { "3f",
+ { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 },
+ 3 * sizeof(float), FALSE },
+
+ { "4f",
+ { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 },
+ 4 * sizeof(float), FALSE },
+
+ { "3f_xyw",
+ { insert_3f_xyw_err, insert_3f_xyw_err, insert_3f_xyw_err,
+ insert_3f_xyw_4 },
+ 3 * sizeof(float), FALSE },
+
+ { "1ub_1f",
+ { insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1 },
+ sizeof(uint8_t), FALSE },
+
+ { "3ub_3f_rgb",
+ { insert_3ub_3f_rgb_1, insert_3ub_3f_rgb_2, insert_3ub_3f_rgb_3,
+ insert_3ub_3f_rgb_3 },
+ 3 * sizeof(uint8_t), FALSE },
+
+ { "3ub_3f_bgr",
+ { insert_3ub_3f_bgr_1, insert_3ub_3f_bgr_2, insert_3ub_3f_bgr_3,
+ insert_3ub_3f_bgr_3 },
+ 3 * sizeof(uint8_t), FALSE },
+
+ { "4ub_4f_rgba",
+ { insert_4ub_4f_rgba_1, insert_4ub_4f_rgba_2, insert_4ub_4f_rgba_3,
+ insert_4ub_4f_rgba_4 },
+ 4 * sizeof(uint8_t), FALSE },
+
+ { "4ub_4f_bgra",
+ { insert_4ub_4f_bgra_1, insert_4ub_4f_bgra_2, insert_4ub_4f_bgra_3,
+ insert_4ub_4f_bgra_4 },
+ 4 * sizeof(uint8_t), FALSE },
+
+ { "4ub_4f_argb",
+ { insert_4ub_4f_argb_1, insert_4ub_4f_argb_2, insert_4ub_4f_argb_3,
+ insert_4ub_4f_argb_4 },
+ 4 * sizeof(uint8_t), FALSE },
+
+ { "4ub_4f_abgr",
+ { insert_4ub_4f_abgr_1, insert_4ub_4f_abgr_2, insert_4ub_4f_abgr_3,
+ insert_4ub_4f_abgr_4 },
+ 4 * sizeof(uint8_t), FALSE },
+
+ { "1f_const",
+ { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 },
+ sizeof(float), TRUE },
+
+ { "2f_const",
+ { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 },
+ 2 * sizeof(float), TRUE },
+
+ { "3f_const",
+ { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 },
+ 3 * sizeof(float), TRUE },
+
+ { "4f_const",
+ { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 },
+ 4 * sizeof(float), TRUE },
+
+ { "pad",
+ { NULL, NULL, NULL, NULL },
+ 0, FALSE },
+
+};
+
+
+
+
+/***********************************************************************
+ * Hardwired fastpaths for emitting whole vertices or groups of
+ * vertices
+ */
+#define EMIT5(NR, F0, F1, F2, F3, F4, NAME) \
+static void NAME( struct draw_vertex_fetch *vf, \
+ unsigned count, \
+ uint8_t *v ) \
+{ \
+ struct draw_vf_attr *a = vf->attr; \
+ unsigned i; \
+ \
+ for (i = 0 ; i < count ; i++, v += vf->vertex_stride) { \
+ if (NR > 0) { \
+ F0( &a[0], v + a[0].vertoffset, (float *)a[0].inputptr ); \
+ a[0].inputptr += a[0].inputstride; \
+ } \
+ \
+ if (NR > 1) { \
+ F1( &a[1], v + a[1].vertoffset, (float *)a[1].inputptr ); \
+ a[1].inputptr += a[1].inputstride; \
+ } \
+ \
+ if (NR > 2) { \
+ F2( &a[2], v + a[2].vertoffset, (float *)a[2].inputptr ); \
+ a[2].inputptr += a[2].inputstride; \
+ } \
+ \
+ if (NR > 3) { \
+ F3( &a[3], v + a[3].vertoffset, (float *)a[3].inputptr ); \
+ a[3].inputptr += a[3].inputstride; \
+ } \
+ \
+ if (NR > 4) { \
+ F4( &a[4], v + a[4].vertoffset, (float *)a[4].inputptr ); \
+ a[4].inputptr += a[4].inputstride; \
+ } \
+ } \
+}
+
+
+#define EMIT2(F0, F1, NAME) EMIT5(2, F0, F1, insert_null, \
+ insert_null, insert_null, NAME)
+
+#define EMIT3(F0, F1, F2, NAME) EMIT5(3, F0, F1, F2, insert_null, \
+ insert_null, NAME)
+
+#define EMIT4(F0, F1, F2, F3, NAME) EMIT5(4, F0, F1, F2, F3, \
+ insert_null, NAME)
+
+
+EMIT2(insert_3f_3, insert_4ub_4f_rgba_4, emit_xyz3_rgba4)
+
+EMIT3(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, emit_xyzw4_rgba4_st2)
+
+EMIT4(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, insert_2f_2, emit_xyzw4_rgba4_st2_st2)
+
+
+/* Use the codegen paths to select one of a number of hardwired
+ * fastpaths.
+ */
+void draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf )
+{
+ draw_vf_emit_func func = NULL;
+
+ /* Does it fit a hardwired fastpath? Help! this is growing out of
+ * control!
+ */
+ switch (vf->attr_count) {
+ case 2:
+ if (vf->attr[0].do_insert == insert_3f_3 &&
+ vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
+ func = emit_xyz3_rgba4;
+ }
+ break;
+ case 3:
+ if (vf->attr[2].do_insert == insert_2f_2) {
+ if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
+ if (vf->attr[0].do_insert == insert_4f_4)
+ func = emit_xyzw4_rgba4_st2;
+ }
+ }
+ break;
+ case 4:
+ if (vf->attr[2].do_insert == insert_2f_2 &&
+ vf->attr[3].do_insert == insert_2f_2) {
+ if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) {
+ if (vf->attr[0].do_insert == insert_4f_4)
+ func = emit_xyzw4_rgba4_st2_st2;
+ }
+ }
+ break;
+ }
+
+ vf->emit = func;
+}
+
+/***********************************************************************
+ * Generic (non-codegen) functions for whole vertices or groups of
+ * vertices
+ */
+
+void draw_vf_generic_emit( struct draw_vertex_fetch *vf,
+ unsigned count,
+ uint8_t *v )
+{
+ struct draw_vf_attr *a = vf->attr;
+ const unsigned attr_count = vf->attr_count;
+ const unsigned stride = vf->vertex_stride;
+ unsigned i, j;
+
+ for (i = 0 ; i < count ; i++, v += stride) {
+ for (j = 0; j < attr_count; j++) {
+ float *in = (float *)a[j].inputptr;
+ a[j].inputptr += a[j].inputstride;
+ a[j].do_insert( &a[j], v + a[j].vertoffset, in );
+ }
+ }
+}
+
+
diff --git a/src/mesa/pipe/draw/draw_vf_sse.c b/src/mesa/pipe/draw/draw_vf_sse.c
new file mode 100644
index 0000000000..1ad2ae756d
--- /dev/null
+++ b/src/mesa/pipe/draw/draw_vf_sse.c
@@ -0,0 +1,614 @@
+/*
+ * Copyright 2003 Tungsten Graphics, inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Keith Whitwell <keithw@tungstengraphics.com>
+ */
+
+
+#include "simple_list.h"
+
+#include "pipe/p_compiler.h"
+
+#include "draw_vf.h"
+
+
+#if defined(USE_SSE_ASM)
+
+#include "x86/rtasm/x86sse.h"
+#include "x86/common_x86_asm.h"
+
+
+#define X 0
+#define Y 1
+#define Z 2
+#define W 3
+
+
+struct x86_program {
+ struct x86_function func;
+
+ struct draw_vertex_fetch *vf;
+ boolean inputs_safe;
+ boolean outputs_safe;
+ boolean have_sse2;
+
+ struct x86_reg identity;
+ struct x86_reg chan0;
+};
+
+
+static struct x86_reg get_identity( struct x86_program *p )
+{
+ return p->identity;
+}
+
+static void emit_load4f_4( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ sse_movups(&p->func, dest, arg0);
+}
+
+static void emit_load4f_3( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ /* Have to jump through some hoops:
+ *
+ * c 0 0 0
+ * c 0 0 1
+ * 0 0 c 1
+ * a b c 1
+ */
+ sse_movss(&p->func, dest, x86_make_disp(arg0, 8));
+ sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) );
+ sse_shufps(&p->func, dest, dest, SHUF(Y,Z,X,W) );
+ sse_movlps(&p->func, dest, arg0);
+}
+
+static void emit_load4f_2( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ /* Initialize from identity, then pull in low two words:
+ */
+ sse_movups(&p->func, dest, get_identity(p));
+ sse_movlps(&p->func, dest, arg0);
+}
+
+static void emit_load4f_1( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ /* Pull in low word, then swizzle in identity */
+ sse_movss(&p->func, dest, arg0);
+ sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) );
+}
+
+
+
+static void emit_load3f_3( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ /* Over-reads by 1 dword - potential SEGV if input is a vertex
+ * array.
+ */
+ if (p->inputs_safe) {
+ sse_movups(&p->func, dest, arg0);
+ }
+ else {
+ /* c 0 0 0
+ * c c c c
+ * a b c c
+ */
+ sse_movss(&p->func, dest, x86_make_disp(arg0, 8));
+ sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X));
+ sse_movlps(&p->func, dest, arg0);
+ }
+}
+
+static void emit_load3f_2( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ emit_load4f_2(p, dest, arg0);
+}
+
+static void emit_load3f_1( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ emit_load4f_1(p, dest, arg0);
+}
+
+static void emit_load2f_2( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ sse_movlps(&p->func, dest, arg0);
+}
+
+static void emit_load2f_1( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ emit_load4f_1(p, dest, arg0);
+}
+
+static void emit_load1f_1( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ sse_movss(&p->func, dest, arg0);
+}
+
+static void (*load[4][4])( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 ) = {
+ { emit_load1f_1,
+ emit_load1f_1,
+ emit_load1f_1,
+ emit_load1f_1 },
+
+ { emit_load2f_1,
+ emit_load2f_2,
+ emit_load2f_2,
+ emit_load2f_2 },
+
+ { emit_load3f_1,
+ emit_load3f_2,
+ emit_load3f_3,
+ emit_load3f_3 },
+
+ { emit_load4f_1,
+ emit_load4f_2,
+ emit_load4f_3,
+ emit_load4f_4 }
+};
+
+static void emit_load( struct x86_program *p,
+ struct x86_reg dest,
+ unsigned sz,
+ struct x86_reg src,
+ unsigned src_sz)
+{
+ load[sz-1][src_sz-1](p, dest, src);
+}
+
+static void emit_store4f( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ sse_movups(&p->func, dest, arg0);
+}
+
+static void emit_store3f( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ if (p->outputs_safe) {
+ /* Emit the extra dword anyway. This may hurt writecombining,
+ * may cause other problems.
+ */
+ sse_movups(&p->func, dest, arg0);
+ }
+ else {
+ /* Alternate strategy - emit two, shuffle, emit one.
+ */
+ sse_movlps(&p->func, dest, arg0);
+ sse_shufps(&p->func, arg0, arg0, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
+ sse_movss(&p->func, x86_make_disp(dest,8), arg0);
+ }
+}
+
+static void emit_store2f( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ sse_movlps(&p->func, dest, arg0);
+}
+
+static void emit_store1f( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 )
+{
+ sse_movss(&p->func, dest, arg0);
+}
+
+
+static void (*store[4])( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg arg0 ) =
+{
+ emit_store1f,
+ emit_store2f,
+ emit_store3f,
+ emit_store4f
+};
+
+static void emit_store( struct x86_program *p,
+ struct x86_reg dest,
+ unsigned sz,
+ struct x86_reg temp )
+
+{
+ store[sz-1](p, dest, temp);
+}
+
+static void emit_pack_store_4ub( struct x86_program *p,
+ struct x86_reg dest,
+ struct x86_reg temp )
+{
+ /* Scale by 255.0
+ */
+ sse_mulps(&p->func, temp, p->chan0);
+
+ if (p->have_sse2) {
+ sse2_cvtps2dq(&p->func, temp, temp);
+ sse2_packssdw(&p->func, temp, temp);
+ sse2_packuswb(&p->func, temp, temp);
+ sse_movss(&p->func, dest, temp);
+ }
+ else {
+ struct x86_reg mmx0 = x86_make_reg(file_MMX, 0);
+ struct x86_reg mmx1 = x86_make_reg(file_MMX, 1);
+ sse_cvtps2pi(&p->func, mmx0, temp);
+ sse_movhlps(&p->func, temp, temp);
+ sse_cvtps2pi(&p->func, mmx1, temp);
+ mmx_packssdw(&p->func, mmx0, mmx1);
+ mmx_packuswb(&p->func, mmx0, mmx0);
+ mmx_movd(&p->func, dest, mmx0);
+ }
+}
+
+static int get_offset( const void *a, const void *b )
+{
+ return (const char *)b - (const char *)a;
+}
+
+/* Not much happens here. Eventually use this function to try and
+ * avoid saving/reloading the source pointers each vertex (if some of
+ * them can fit in registers).
+ */
+static void get_src_ptr( struct x86_program *p,
+ struct x86_reg srcREG,
+ struct x86_reg vfREG,
+ struct draw_vf_attr *a )
+{
+ struct draw_vertex_fetch *vf = p->vf;
+ struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr));
+
+ /* Load current a[j].inputptr
+ */
+ x86_mov(&p->func, srcREG, ptr_to_src);
+}
+
+static void update_src_ptr( struct x86_program *p,
+ struct x86_reg srcREG,
+ struct x86_reg vfREG,
+ struct draw_vf_attr *a )
+{
+ if (a->inputstride) {
+ struct draw_vertex_fetch *vf = p->vf;
+ struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr));
+
+ /* add a[j].inputstride (hardcoded value - could just as easily
+ * pull the stride value from memory each time).
+ */
+ x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride));
+
+ /* save new value of a[j].inputptr
+ */
+ x86_mov(&p->func, ptr_to_src, srcREG);
+ }
+}
+
+
+/* Lots of hardcoding
+ *
+ * EAX -- pointer to current output vertex
+ * ECX -- pointer to current attribute
+ *
+ */
+static boolean build_vertex_emit( struct x86_program *p )
+{
+ struct draw_vertex_fetch *vf = p->vf;
+ unsigned j = 0;
+
+ struct x86_reg vertexEAX = x86_make_reg(file_REG32, reg_AX);
+ struct x86_reg srcECX = x86_make_reg(file_REG32, reg_CX);
+ struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP);
+ struct x86_reg vfESI = x86_make_reg(file_REG32, reg_SI);
+ struct x86_reg temp = x86_make_reg(file_XMM, 0);
+ uint8_t *fixup, *label;
+
+ /* Push a few regs?
+ */
+ x86_push(&p->func, countEBP);
+ x86_push(&p->func, vfESI);
+
+
+ /* Get vertex count, compare to zero
+ */
+ x86_xor(&p->func, srcECX, srcECX);
+ x86_mov(&p->func, countEBP, x86_fn_arg(&p->func, 2));
+ x86_cmp(&p->func, countEBP, srcECX);
+ fixup = x86_jcc_forward(&p->func, cc_E);
+
+ /* Initialize destination register.
+ */
+ x86_mov(&p->func, vertexEAX, x86_fn_arg(&p->func, 3));
+
+ /* Move argument 1 (vf) into a reg:
+ */
+ x86_mov(&p->func, vfESI, x86_fn_arg(&p->func, 1));
+
+
+ /* always load, needed or not:
+ */
+ sse_movups(&p->func, p->identity, x86_make_disp(vfESI, get_offset(vf, &vf->identity[0])));
+
+ /* Note address for loop jump */
+ label = x86_get_label(&p->func);
+
+ /* Emit code for each of the attributes. Currently routes
+ * everything through SSE registers, even when it might be more
+ * efficient to stick with regular old x86. No optimization or
+ * other tricks - enough new ground to cover here just getting
+ * things working.
+ */
+ while (j < vf->attr_count) {
+ struct draw_vf_attr *a = &vf->attr[j];
+ struct x86_reg dest = x86_make_disp(vertexEAX, a->vertoffset);
+
+ /* Now, load an XMM reg from src, perhaps transform, then save.
+ * Could be shortcircuited in specific cases:
+ */
+ switch (a->format) {
+ case DRAW_EMIT_1F:
+ case DRAW_EMIT_1F_CONST:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize);
+ emit_store(p, dest, 1, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+ case DRAW_EMIT_2F:
+ case DRAW_EMIT_2F_CONST:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize);
+ emit_store(p, dest, 2, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+ case DRAW_EMIT_3F:
+ case DRAW_EMIT_3F_CONST:
+ /* Potentially the worst case - hardcode 2+1 copying:
+ */
+ if (0) {
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
+ emit_store(p, dest, 3, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ }
+ else {
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize);
+ emit_store(p, dest, 2, temp);
+ if (a->inputsize > 2) {
+ emit_load(p, temp, 1, x86_make_disp(srcECX, 8), 1);
+ emit_store(p, x86_make_disp(dest,8), 1, temp);
+ }
+ else {
+ sse_movss(&p->func, x86_make_disp(dest,8), get_identity(p));
+ }
+ update_src_ptr(p, srcECX, vfESI, a);
+ }
+ break;
+ case DRAW_EMIT_4F:
+ case DRAW_EMIT_4F_CONST:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
+ emit_store(p, dest, 4, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+ case DRAW_EMIT_3F_XYW:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
+ sse_shufps(&p->func, temp, temp, SHUF(X,Y,W,Z));
+ emit_store(p, dest, 3, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+
+ case DRAW_EMIT_1UB_1F:
+ /* Test for PAD3 + 1UB:
+ */
+ if (j > 0 &&
+ a[-1].vertoffset + a[-1].vertattrsize <= a->vertoffset - 3)
+ {
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize);
+ sse_shufps(&p->func, temp, temp, SHUF(X,X,X,X));
+ emit_pack_store_4ub(p, x86_make_disp(dest, -3), temp); /* overkill! */
+ update_src_ptr(p, srcECX, vfESI, a);
+ }
+ else {
+ debug_printf("Can't emit 1ub %x %x %d\n",
+ a->vertoffset, a[-1].vertoffset, a[-1].vertattrsize );
+ return FALSE;
+ }
+ break;
+ case DRAW_EMIT_3UB_3F_RGB:
+ case DRAW_EMIT_3UB_3F_BGR:
+ /* Test for 3UB + PAD1:
+ */
+ if (j == vf->attr_count - 1 ||
+ a[1].vertoffset >= a->vertoffset + 4) {
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
+ if (a->format == DRAW_EMIT_3UB_3F_BGR)
+ sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W));
+ emit_pack_store_4ub(p, dest, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ }
+ /* Test for 3UB + 1UB:
+ */
+ else if (j < vf->attr_count - 1 &&
+ a[1].format == DRAW_EMIT_1UB_1F &&
+ a[1].vertoffset == a->vertoffset + 3) {
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize);
+ update_src_ptr(p, srcECX, vfESI, a);
+
+ /* Make room for incoming value:
+ */
+ sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z));
+
+ get_src_ptr(p, srcECX, vfESI, &a[1]);
+ emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize);
+ update_src_ptr(p, srcECX, vfESI, &a[1]);
+
+ /* Rearrange and possibly do BGR conversion:
+ */
+ if (a->format == DRAW_EMIT_3UB_3F_BGR)
+ sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X));
+ else
+ sse_shufps(&p->func, temp, temp, SHUF(Y,Z,W,X));
+
+ emit_pack_store_4ub(p, dest, temp);
+ j++; /* NOTE: two attrs consumed */
+ }
+ else {
+ debug_printf("Can't emit 3ub\n");
+ }
+ return FALSE; /* add this later */
+ break;
+
+ case DRAW_EMIT_4UB_4F_RGBA:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
+ emit_pack_store_4ub(p, dest, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+ case DRAW_EMIT_4UB_4F_BGRA:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
+ sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W));
+ emit_pack_store_4ub(p, dest, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+ case DRAW_EMIT_4UB_4F_ARGB:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
+ sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z));
+ emit_pack_store_4ub(p, dest, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+ case DRAW_EMIT_4UB_4F_ABGR:
+ get_src_ptr(p, srcECX, vfESI, a);
+ emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize);
+ sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X));
+ emit_pack_store_4ub(p, dest, temp);
+ update_src_ptr(p, srcECX, vfESI, a);
+ break;
+ default:
+ debug_printf("unknown a[%d].format %d\n", j, a->format);
+ return FALSE; /* catch any new opcodes */
+ }
+
+ /* Increment j by at least 1 - may have been incremented above also:
+ */
+ j++;
+ }
+
+ /* Next vertex:
+ */
+ x86_lea(&p->func, vertexEAX, x86_make_disp(vertexEAX, vf->vertex_stride));
+
+ /* decr count, loop if not zero
+ */
+ x86_dec(&p->func, countEBP);
+ x86_test(&p->func, countEBP, countEBP);
+ x86_jcc(&p->func, cc_NZ, label);
+
+ /* Exit mmx state?
+ */
+ if (p->func.need_emms)
+ mmx_emms(&p->func);
+
+ /* Land forward jump here:
+ */
+ x86_fixup_fwd_jump(&p->func, fixup);
+
+ /* Pop regs and return
+ */
+ x86_pop(&p->func, x86_get_base_reg(vfESI));
+ x86_pop(&p->func, countEBP);
+ x86_ret(&p->func);
+
+ vf->emit = (draw_vf_emit_func)x86_get_func(&p->func);
+ return TRUE;
+}
+
+
+
+void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf )
+{
+ struct x86_program p;
+
+ if (!cpu_has_xmm) {
+ vf->codegen_emit = NULL;
+ return;
+ }
+
+ memset(&p, 0, sizeof(p));
+
+ p.vf = vf;
+ p.inputs_safe = 0; /* for now */
+ p.outputs_safe = 1; /* for now */
+ p.have_sse2 = cpu_has_xmm2;
+ p.identity = x86_make_reg(file_XMM, 6);
+ p.chan0 = x86_make_reg(file_XMM, 7);
+
+ x86_init_func(&p.func);
+
+ if (build_vertex_emit(&p)) {
+ draw_vf_register_fastpath( vf, TRUE );
+ }
+ else {
+ /* Note the failure so that we don't keep trying to codegen an
+ * impossible state:
+ */
+ draw_vf_register_fastpath( vf, FALSE );
+ x86_release_func(&p.func);
+ }
+}
+
+#else
+
+void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf )
+{
+ /* Dummy version for when USE_SSE_ASM not defined */
+}
+
+#endif
diff --git a/src/mesa/pipe/failover/fo_context.c b/src/mesa/pipe/failover/fo_context.c
index cf6c9fed50..7ce4a7df17 100644
--- a/src/mesa/pipe/failover/fo_context.c
+++ b/src/mesa/pipe/failover/fo_context.c
@@ -114,6 +114,8 @@ struct pipe_context *failover_create( struct pipe_context *hw,
if (failover == NULL)
return NULL;
+ failover->hw = hw;
+ failover->sw = sw;
failover->pipe.winsys = hw->winsys;
failover->pipe.destroy = failover_destroy;
failover->pipe.is_format_supported = hw->is_format_supported;
diff --git a/src/mesa/pipe/failover/fo_state.c b/src/mesa/pipe/failover/fo_state.c
index fa700b9674..0fc5568da1 100644
--- a/src/mesa/pipe/failover/fo_state.c
+++ b/src/mesa/pipe/failover/fo_state.c
@@ -54,8 +54,8 @@ failover_create_blend_state( struct pipe_context *pipe,
struct fo_state *state = malloc(sizeof(struct fo_state));
struct failover_context *failover = failover_context(pipe);
- state->sw_state = failover->sw->create_blend_state(pipe, blend);
- state->hw_state = failover->hw->create_blend_state(pipe, blend);
+ state->sw_state = failover->sw->create_blend_state(failover->sw, blend);
+ state->hw_state = failover->hw->create_blend_state(failover->hw, blend);
return state;
}
@@ -68,6 +68,7 @@ failover_bind_blend_state( struct pipe_context *pipe,
struct fo_state *state = (struct fo_state *)blend;
failover->blend = state;
failover->dirty |= FO_NEW_BLEND;
+ failover->sw->bind_blend_state( failover->sw, state->sw_state );
failover->hw->bind_blend_state( failover->hw, state->hw_state );
}
@@ -78,8 +79,8 @@ failover_delete_blend_state( struct pipe_context *pipe,
struct fo_state *state = (struct fo_state*)blend;
struct failover_context *failover = failover_context(pipe);
- failover->sw->delete_blend_state(pipe, state->sw_state);
- failover->hw->delete_blend_state(pipe, state->hw_state);
+ failover->sw->delete_blend_state(failover->sw, state->sw_state);
+ failover->hw->delete_blend_state(failover->hw, state->hw_state);
state->sw_state = 0;
state->hw_state = 0;
free(state);
@@ -93,6 +94,7 @@ failover_set_blend_color( struct pipe_context *pipe,
failover->blend_color = *blend_color;
failover->dirty |= FO_NEW_BLEND_COLOR;
+ failover->sw->set_blend_color( failover->sw, blend_color );
failover->hw->set_blend_color( failover->hw, blend_color );
}
@@ -104,6 +106,7 @@ failover_set_clip_state( struct pipe_context *pipe,
failover->clip = *clip;
failover->dirty |= FO_NEW_CLIP;
+ failover->sw->set_clip_state( failover->sw, clip );
failover->hw->set_clip_state( failover->hw, clip );
}
@@ -115,8 +118,8 @@ failover_create_depth_stencil_state(struct pipe_context *pipe,
struct fo_state *state = malloc(sizeof(struct fo_state));
struct failover_context *failover = failover_context(pipe);
- state->sw_state = failover->sw->create_depth_stencil_alpha_state(pipe, templ);
- state->hw_state = failover->hw->create_depth_stencil_alpha_state(pipe, templ);
+ state->sw_state = failover->sw->create_depth_stencil_alpha_state(failover->sw, templ);
+ state->hw_state = failover->hw->create_depth_stencil_alpha_state(failover->hw, templ);
return state;
}
@@ -129,6 +132,7 @@ failover_bind_depth_stencil_state(struct pipe_context *pipe,
struct fo_state *state = (struct fo_state *)depth_stencil;
failover->depth_stencil = state;
failover->dirty |= FO_NEW_DEPTH_STENCIL;
+ failover->sw->bind_depth_stencil_alpha_state(failover->sw, state->sw_state);
failover->hw->bind_depth_stencil_alpha_state(failover->hw, state->hw_state);
}
@@ -139,8 +143,8 @@ failover_delete_depth_stencil_state(struct pipe_context *pipe,
struct fo_state *state = (struct fo_state*)ds;
struct failover_context *failover = failover_context(pipe);
- failover->sw->delete_depth_stencil_alpha_state(pipe, state->sw_state);
- failover->hw->delete_depth_stencil_alpha_state(pipe, state->hw_state);
+ failover->sw->delete_depth_stencil_alpha_state(failover->sw, state->sw_state);
+ failover->hw->delete_depth_stencil_alpha_state(failover->hw, state->hw_state);
state->sw_state = 0;
state->hw_state = 0;
free(state);
@@ -154,6 +158,7 @@ failover_set_framebuffer_state(struct pipe_context *pipe,
failover->framebuffer = *framebuffer;
failover->dirty |= FO_NEW_FRAMEBUFFER;
+ failover->sw->set_framebuffer_state( failover->sw, framebuffer );
failover->hw->set_framebuffer_state( failover->hw, framebuffer );
}
@@ -165,8 +170,8 @@ failover_create_fs_state(struct pipe_context *pipe,
struct fo_state *state = malloc(sizeof(struct fo_state));
struct failover_context *failover = failover_context(pipe);
- state->sw_state = failover->sw->create_fs_state(pipe, templ);
- state->hw_state = failover->hw->create_fs_state(pipe, templ);
+ state->sw_state = failover->sw->create_fs_state(failover->sw, templ);
+ state->hw_state = failover->hw->create_fs_state(failover->hw, templ);
return state;
}
@@ -178,6 +183,7 @@ failover_bind_fs_state(struct pipe_context *pipe, void *fs)
struct fo_state *state = (struct fo_state*)fs;
failover->fragment_shader = state;
failover->dirty |= FO_NEW_FRAGMENT_SHADER;
+ failover->sw->bind_fs_state(failover->sw, state->sw_state);
failover->hw->bind_fs_state(failover->hw, state->hw_state);
}
@@ -188,8 +194,8 @@ failover_delete_fs_state(struct pipe_context *pipe,
struct fo_state *state = (struct fo_state*)fs;
struct failover_context *failover = failover_context(pipe);
- failover->sw->delete_fs_state(pipe, state->sw_state);
- failover->hw->delete_fs_state(pipe, state->hw_state);
+ failover->sw->delete_fs_state(failover->sw, state->sw_state);
+ failover->hw->delete_fs_state(failover->hw, state->hw_state);
state->sw_state = 0;
state->hw_state = 0;
free(state);
@@ -202,8 +208,8 @@ failover_create_vs_state(struct pipe_context *pipe,
struct fo_state *state = malloc(sizeof(struct fo_state));
struct failover_context *failover = failover_context(pipe);
- state->sw_state = failover->sw->create_vs_state(pipe, templ);
- state->hw_state = failover->hw->create_vs_state(pipe, templ);
+ state->sw_state = failover->sw->create_vs_state(failover->sw, templ);
+ state->hw_state = failover->hw->create_vs_state(failover->hw, templ);
return state;
}
@@ -217,6 +223,7 @@ failover_bind_vs_state(struct pipe_context *pipe,
struct fo_state *state = (struct fo_state*)vs;
failover->vertex_shader = state;
failover->dirty |= FO_NEW_VERTEX_SHADER;
+ failover->sw->bind_vs_state(failover->sw, state->sw_state);
failover->hw->bind_vs_state(failover->hw, state->hw_state);
}
@@ -227,8 +234,8 @@ failover_delete_vs_state(struct pipe_context *pipe,
struct fo_state *state = (struct fo_state*)vs;
struct failover_context *failover = failover_context(pipe);
- failover->sw->delete_vs_state(pipe, state->sw_state);
- failover->hw->delete_vs_state(pipe, state->hw_state);
+ failover->sw->delete_vs_state(failover->sw, state->sw_state);
+ failover->hw->delete_vs_state(failover->hw, state->hw_state);
state->sw_state = 0;
state->hw_state = 0;
free(state);
@@ -242,6 +249,7 @@ failover_set_polygon_stipple( struct pipe_context *pipe,
failover->poly_stipple = *stipple;
failover->dirty |= FO_NEW_STIPPLE;
+ failover->sw->set_polygon_stipple( failover->sw, stipple );
failover->hw->set_polygon_stipple( failover->hw, stipple );
}
@@ -253,8 +261,8 @@ failover_create_rasterizer_state(struct pipe_context *pipe,
struct fo_state *state = malloc(sizeof(struct fo_state));
struct failover_context *failover = failover_context(pipe);
- state->sw_state = failover->sw->create_rasterizer_state(pipe, templ);
- state->hw_state = failover->hw->create_rasterizer_state(pipe, templ);
+ state->sw_state = failover->sw->create_rasterizer_state(failover->sw, templ);
+ state->hw_state = failover->hw->create_rasterizer_state(failover->hw, templ);
return state;
}
@@ -268,6 +276,7 @@ failover_bind_rasterizer_state(struct pipe_context *pipe,
struct fo_state *state = (struct fo_state*)raster;
failover->rasterizer = state;
failover->dirty |= FO_NEW_RASTERIZER;
+ failover->sw->bind_rasterizer_state(failover->sw, state->sw_state);
failover->hw->bind_rasterizer_state(failover->hw, state->hw_state);
}
@@ -278,8 +287,8 @@ failover_delete_rasterizer_state(struct pipe_context *pipe,
struct fo_state *state = (struct fo_state*)raster;
struct failover_context *failover = failover_context(pipe);
- failover->sw->delete_rasterizer_state(pipe, state->sw_state);
- failover->hw->delete_rasterizer_state(pipe, state->hw_state);
+ failover->sw->delete_rasterizer_state(failover->sw, state->sw_state);
+ failover->hw->delete_rasterizer_state(failover->hw, state->hw_state);
state->sw_state = 0;
state->hw_state = 0;
free(state);
@@ -294,6 +303,7 @@ failover_set_scissor_state( struct pipe_context *pipe,
failover->scissor = *scissor;
failover->dirty |= FO_NEW_SCISSOR;
+ failover->sw->set_scissor_state( failover->sw, scissor );
failover->hw->set_scissor_state( failover->hw, scissor );
}
@@ -305,8 +315,8 @@ failover_create_sampler_state(struct pipe_context *pipe,
struct fo_state *state = malloc(sizeof(struct fo_state));
struct failover_context *failover = failover_context(pipe);
- state->sw_state = failover->sw->create_sampler_state(pipe, templ);
- state->hw_state = failover->hw->create_sampler_state(pipe, templ);
+ state->sw_state = failover->sw->create_sampler_state(failover->sw, templ);
+ state->hw_state = failover->hw->create_sampler_state(failover->hw, templ);
return state;
}
@@ -320,6 +330,8 @@ failover_bind_sampler_state(struct pipe_context *pipe,
failover->sampler[unit] = state;
failover->dirty |= FO_NEW_SAMPLER;
failover->dirty_sampler |= (1<<unit);
+ failover->sw->bind_sampler_state(failover->sw, unit,
+ state->sw_state);
failover->hw->bind_sampler_state(failover->hw, unit,
state->hw_state);
}
@@ -330,8 +342,8 @@ failover_delete_sampler_state(struct pipe_context *pipe, void *sampler)
struct fo_state *state = (struct fo_state*)sampler;
struct failover_context *failover = failover_context(pipe);
- failover->sw->delete_sampler_state(pipe, state->sw_state);
- failover->hw->delete_sampler_state(pipe, state->hw_state);
+ failover->sw->delete_sampler_state(failover->sw, state->sw_state);
+ failover->hw->delete_sampler_state(failover->hw, state->hw_state);
state->sw_state = 0;
state->hw_state = 0;
free(state);
@@ -348,6 +360,7 @@ failover_set_sampler_texture(struct pipe_context *pipe,
failover->texture[unit] = texture;
failover->dirty |= FO_NEW_TEXTURE;
failover->dirty_texture |= (1<<unit);
+ failover->sw->set_sampler_texture( failover->sw, unit, texture );
failover->hw->set_sampler_texture( failover->hw, unit, texture );
}
@@ -360,6 +373,7 @@ failover_set_viewport_state( struct pipe_context *pipe,
failover->viewport = *viewport;
failover->dirty |= FO_NEW_VIEWPORT;
+ failover->sw->set_viewport_state( failover->sw, viewport );
failover->hw->set_viewport_state( failover->hw, viewport );
}
@@ -374,6 +388,7 @@ failover_set_vertex_buffer(struct pipe_context *pipe,
failover->vertex_buffer[unit] = *vertex_buffer;
failover->dirty |= FO_NEW_VERTEX_BUFFER;
failover->dirty_vertex_buffer |= (1<<unit);
+ failover->sw->set_vertex_buffer( failover->sw, unit, vertex_buffer );
failover->hw->set_vertex_buffer( failover->hw, unit, vertex_buffer );
}
@@ -388,9 +403,24 @@ failover_set_vertex_element(struct pipe_context *pipe,
failover->vertex_element[unit] = *vertex_element;
failover->dirty |= FO_NEW_VERTEX_ELEMENT;
failover->dirty_vertex_element |= (1<<unit);
+ failover->sw->set_vertex_element( failover->sw, unit, vertex_element );
failover->hw->set_vertex_element( failover->hw, unit, vertex_element );
}
+void
+failover_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ const struct pipe_constant_buffer *buf)
+{
+ struct failover_context *failover = failover_context(pipe);
+
+ assert(shader < PIPE_SHADER_TYPES);
+ assert(index == 0);
+
+ failover->sw->set_constant_buffer(failover->sw, shader, index, buf);
+ failover->hw->set_constant_buffer(failover->hw, shader, index, buf);
+}
+
void
failover_init_state_functions( struct failover_context *failover )
@@ -423,4 +453,5 @@ failover_init_state_functions( struct failover_context *failover )
failover->pipe.set_viewport_state = failover_set_viewport_state;
failover->pipe.set_vertex_buffer = failover_set_vertex_buffer;
failover->pipe.set_vertex_element = failover_set_vertex_element;
+ failover->pipe.set_constant_buffer = failover_set_constant_buffer;
}
diff --git a/src/mesa/pipe/i915simple/SConscript b/src/mesa/pipe/i915simple/SConscript
new file mode 100644
index 0000000000..f5fb96b995
--- /dev/null
+++ b/src/mesa/pipe/i915simple/SConscript
@@ -0,0 +1,29 @@
+Import('*')
+
+env = env.Clone()
+
+i915simple = env.ConvenienceLibrary(
+ target = 'i915simple',
+ source = [
+ 'i915_blit.c',
+ 'i915_clear.c',
+ 'i915_context.c',
+ 'i915_debug.c',
+ 'i915_debug_fp.c',
+ 'i915_flush.c',
+ 'i915_fpc_emit.c',
+ 'i915_fpc_translate.c',
+ 'i915_prim_emit.c',
+ 'i915_prim_vbuf.c',
+ 'i915_state.c',
+ 'i915_state_derived.c',
+ 'i915_state_dynamic.c',
+ 'i915_state_emit.c',
+ 'i915_state_immediate.c',
+ 'i915_state_sampler.c',
+ 'i915_strings.c',
+ 'i915_surface.c',
+ 'i915_texture.c',
+ ])
+
+Export('i915simple')
diff --git a/src/mesa/pipe/i915simple/i915_fpc_translate.c b/src/mesa/pipe/i915simple/i915_fpc_translate.c
index 0185512aeb..868f0c7e04 100644
--- a/src/mesa/pipe/i915simple/i915_fpc_translate.c
+++ b/src/mesa/pipe/i915simple/i915_fpc_translate.c
@@ -100,7 +100,7 @@ negate(int reg, int x, int y, int z, int w)
static void
i915_use_passthrough_shader(struct i915_context *i915)
{
- fprintf(stderr, "**** Using i915 pass-through fragment shader\n");
+ debug_printf("**** Using i915 pass-through fragment shader\n");
i915->current.program = (uint *) MALLOC(sizeof(passthrough));
if (i915->current.program) {
@@ -119,12 +119,12 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
va_list args;
char buffer[1024];
- fprintf(stderr, "i915_program_error: ");
+ debug_printf("i915_program_error: ");
va_start( args, msg );
vsprintf( buffer, msg, args );
va_end( args );
- fprintf(stderr, buffer);
- fprintf(stderr, "\n");
+ debug_printf(buffer);
+ debug_printf("\n");
p->error = 1;
}
@@ -169,7 +169,7 @@ src_vector(struct i915_fp_compile *p,
switch (sem_name) {
case TGSI_SEMANTIC_POSITION:
- fprintf(stderr, "SKIP SEM POS\n");
+ debug_printf("SKIP SEM POS\n");
/*
assert(p->wpos_tex != -1);
src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
@@ -913,7 +913,7 @@ i915_translate_instructions(struct i915_fp_compile *p,
ind = parse.FullToken.FullDeclaration.u.DeclarationRange.First;
sem = parse.FullToken.FullDeclaration.Semantic.SemanticName;
semi = parse.FullToken.FullDeclaration.Semantic.SemanticIndex;
- /*printf("FS Input DECL [%u] sem %u\n", ind, sem);*/
+ /*debug_printf("FS Input DECL [%u] sem %u\n", ind, sem);*/
p->input_semantic_name[ind] = sem;
p->input_semantic_index[ind] = semi;
}
@@ -924,7 +924,7 @@ i915_translate_instructions(struct i915_fp_compile *p,
ind = parse.FullToken.FullDeclaration.u.DeclarationRange.First;
sem = parse.FullToken.FullDeclaration.Semantic.SemanticName;
semi = parse.FullToken.FullDeclaration.Semantic.SemanticIndex;
- /*printf("FS Output DECL [%u] sem %u\n", ind, sem);*/
+ /*debug_printf("FS Output DECL [%u] sem %u\n", ind, sem);*/
p->output_semantic_name[ind] = sem;
p->output_semantic_index[ind] = semi;
}
diff --git a/src/mesa/pipe/i915simple/i915_prim_vbuf.c b/src/mesa/pipe/i915simple/i915_prim_vbuf.c
index 39154b2488..e069773fd4 100644
--- a/src/mesa/pipe/i915simple/i915_prim_vbuf.c
+++ b/src/mesa/pipe/i915simple/i915_prim_vbuf.c
@@ -38,9 +38,8 @@
*/
-#include <assert.h>
-
#include "pipe/draw/draw_vbuf.h"
+#include "pipe/p_debug.h"
#include "pipe/p_util.h"
#include "pipe/p_inlines.h"
#include "pipe/p_winsys.h"
diff --git a/src/mesa/pipe/i915simple/i915_state_derived.c b/src/mesa/pipe/i915simple/i915_state_derived.c
index 62741e30f8..653983e4a9 100644
--- a/src/mesa/pipe/i915simple/i915_state_derived.c
+++ b/src/mesa/pipe/i915simple/i915_state_derived.c
@@ -87,7 +87,7 @@ static void calculate_vertex_layout( struct i915_context *i915 )
}
break;
case TGSI_SEMANTIC_FOG:
- fprintf(stderr, "i915 fogcoord not implemented yet\n");
+ debug_printf("i915 fogcoord not implemented yet\n");
draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src++);
break;
default:
diff --git a/src/mesa/pipe/i915simple/i915_state_emit.c b/src/mesa/pipe/i915simple/i915_state_emit.c
index 657f523893..3339287f49 100644
--- a/src/mesa/pipe/i915simple/i915_state_emit.c
+++ b/src/mesa/pipe/i915simple/i915_state_emit.c
@@ -107,7 +107,7 @@ i915_emit_hardware_state(struct i915_context *i915 )
) * 3/2; /* plus 50% margin */
#if 0
- fprintf (stderr, "i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs);
+ debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs);
#endif
if(!BEGIN_BATCH(dwords, relocs)) {
diff --git a/src/mesa/pipe/i915simple/i915_state_immediate.c b/src/mesa/pipe/i915simple/i915_state_immediate.c
index 752d25f233..07031fc6c5 100644
--- a/src/mesa/pipe/i915simple/i915_state_immediate.c
+++ b/src/mesa/pipe/i915simple/i915_state_immediate.c
@@ -97,7 +97,7 @@ static void upload_S2S4(struct i915_context *i915)
LIS2 = i915->current.vertex_info.hwfmt[1];
LIS4 = i915->current.vertex_info.hwfmt[0];
/*
- printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4);
+ debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4);
*/
assert(LIS4); /* should never be zero? */
}
diff --git a/src/mesa/pipe/i915simple/i915_state_sampler.c b/src/mesa/pipe/i915simple/i915_state_sampler.c
index 59408b6ba0..0dbbc5241d 100644
--- a/src/mesa/pipe/i915simple/i915_state_sampler.c
+++ b/src/mesa/pipe/i915simple/i915_state_sampler.c
@@ -169,7 +169,7 @@ translate_texture_format(enum pipe_format pipeFormat)
case PIPE_FORMAT_S8Z24_UNORM:
return (MAPSURF_32BIT | MT_32BIT_xL824);
default:
- fprintf(stderr, "i915: translate_texture_format() bad image format %x\n",
+ debug_printf("i915: translate_texture_format() bad image format %x\n",
pipeFormat);
assert(0);
return 0;
diff --git a/src/mesa/pipe/i915simple/i915_texture.c b/src/mesa/pipe/i915simple/i915_texture.c
index 61944fe7d9..6faeab134a 100644
--- a/src/mesa/pipe/i915simple/i915_texture.c
+++ b/src/mesa/pipe/i915simple/i915_texture.c
@@ -477,17 +477,17 @@ i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex)
return TRUE;
}
-void
-i915_texture_create(struct pipe_context *pipe, struct pipe_texture **pt)
+
+struct pipe_texture *
+i915_texture_create(struct pipe_context *pipe,
+ const struct pipe_texture *templat)
{
- struct i915_texture *tex = REALLOC(*pt, sizeof(struct pipe_texture),
- sizeof(struct i915_texture));
+ struct i915_texture *tex = CALLOC_STRUCT(i915_texture);
if (tex) {
struct i915_context *i915 = i915_context(pipe);
- memset(&tex->base + 1, 0,
- sizeof(struct i915_texture) - sizeof(struct pipe_texture));
+ tex->base = *templat;
if (i915->flags.is_i945 ? i945_miptree_layout(pipe, tex) :
i915_miptree_layout(pipe, tex))
@@ -498,13 +498,14 @@ i915_texture_create(struct pipe_context *pipe, struct pipe_texture **pt)
if (!tex->buffer) {
FREE(tex);
- tex = NULL;
+ return NULL;
}
}
- *pt = &tex->base;
+ return &tex->base;
}
+
void
i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt)
{
diff --git a/src/mesa/pipe/i915simple/i915_texture.h b/src/mesa/pipe/i915simple/i915_texture.h
index 84a0502e81..330d111dc7 100644
--- a/src/mesa/pipe/i915simple/i915_texture.h
+++ b/src/mesa/pipe/i915simple/i915_texture.h
@@ -6,8 +6,9 @@ struct pipe_context;
struct pipe_texture;
-extern void
-i915_texture_create(struct pipe_context *pipe, struct pipe_texture **pt);
+struct pipe_texture *
+i915_texture_create(struct pipe_context *pipe,
+ const struct pipe_texture *templat);
extern void
i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt);
diff --git a/src/mesa/pipe/i965simple/SConscript b/src/mesa/pipe/i965simple/SConscript
new file mode 100644
index 0000000000..74621de84c
--- /dev/null
+++ b/src/mesa/pipe/i965simple/SConscript
@@ -0,0 +1,55 @@
+Import('*')
+
+env = env.Clone()
+
+i965simple = env.ConvenienceLibrary(
+ target = 'i965simple',
+ source = [
+ 'brw_blit.c',
+ 'brw_cc.c',
+ 'brw_clip.c',
+ 'brw_clip_line.c',
+ 'brw_clip_point.c',
+ 'brw_clip_state.c',
+ 'brw_clip_tri.c',
+ 'brw_clip_util.c',
+ 'brw_context.c',
+ 'brw_curbe.c',
+ 'brw_draw.c',
+ 'brw_draw_upload.c',
+ 'brw_eu.c',
+ 'brw_eu_debug.c',
+ 'brw_eu_emit.c',
+ 'brw_eu_util.c',
+ 'brw_flush.c',
+ 'brw_gs.c',
+ 'brw_gs_emit.c',
+ 'brw_gs_state.c',
+ 'brw_misc_state.c',
+ 'brw_sf.c',
+ 'brw_sf_emit.c',
+ 'brw_sf_state.c',
+ 'brw_shader_info.c',
+ 'brw_state.c',
+ 'brw_state_batch.c',
+ 'brw_state_cache.c',
+ 'brw_state_pool.c',
+ 'brw_state_upload.c',
+ 'brw_strings.c',
+ 'brw_surface.c',
+ 'brw_tex_layout.c',
+ 'brw_urb.c',
+ 'brw_util.c',
+ 'brw_vs.c',
+ 'brw_vs_emit.c',
+ 'brw_vs_state.c',
+ 'brw_wm.c',
+ 'brw_wm_decl.c',
+ 'brw_wm_glsl.c',
+ 'brw_wm_iz.c',
+ 'brw_wm_sampler_state.c',
+ 'brw_wm_state.c',
+ 'brw_wm_surface_state.c',
+ ])
+
+Export('i965simple')
diff --git a/src/mesa/pipe/i965simple/brw_cc.c b/src/mesa/pipe/i965simple/brw_cc.c
index dcee731895..337e4f95f6 100644
--- a/src/mesa/pipe/i965simple/brw_cc.c
+++ b/src/mesa/pipe/i965simple/brw_cc.c
@@ -58,7 +58,7 @@ static int brw_translate_compare_func(int func)
return BRW_COMPAREFUNCTION_ALWAYS;
}
- fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
+ debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func);
return BRW_COMPAREFUNCTION_ALWAYS;
}
diff --git a/src/mesa/pipe/i965simple/brw_curbe.c b/src/mesa/pipe/i965simple/brw_curbe.c
index 2733eb4e75..52bbd525c1 100644
--- a/src/mesa/pipe/i965simple/brw_curbe.c
+++ b/src/mesa/pipe/i965simple/brw_curbe.c
@@ -273,10 +273,10 @@ static void upload_constant_buffer(struct brw_context *brw)
if (1) {
for (i = 0; i < sz; i+=4)
- _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+ debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
- _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
+ debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
brw->curbe.last_buf, buf,
bufsz, brw->curbe.last_bufsz,
brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
@@ -299,7 +299,7 @@ static void upload_constant_buffer(struct brw_context *brw)
bufsz,
1 << 6,
&brw->curbe.gs_offset)) {
- _mesa_printf("out of GS memory for curbe\n");
+ debug_printf("out of GS memory for curbe\n");
assert(0);
return;
}
diff --git a/src/mesa/pipe/i965simple/brw_eu_debug.c b/src/mesa/pipe/i965simple/brw_eu_debug.c
index be692f6502..4a94ddefa6 100644
--- a/src/mesa/pipe/i965simple/brw_eu_debug.c
+++ b/src/mesa/pipe/i965simple/brw_eu_debug.c
@@ -30,6 +30,8 @@
*/
+#include "pipe/p_debug.h"
+
#include "brw_eu.h"
void brw_print_reg( struct brw_reg hwreg )
@@ -52,7 +54,7 @@ void brw_print_reg( struct brw_reg hwreg )
"f"
};
- _mesa_printf("%s%s",
+ debug_printf("%s%s",
hwreg.abs ? "abs/" : "",
hwreg.negate ? "-" : "");
@@ -63,17 +65,17 @@ void brw_print_reg( struct brw_reg hwreg )
hwreg.width == BRW_WIDTH_8 &&
hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
hwreg.type == BRW_REGISTER_TYPE_F) {
- _mesa_printf("vec%d", hwreg.nr);
+ debug_printf("vec%d", hwreg.nr);
}
else if (hwreg.file == BRW_GENERAL_REGISTER_FILE &&
hwreg.vstride == BRW_VERTICAL_STRIDE_0 &&
hwreg.width == BRW_WIDTH_1 &&
hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 &&
hwreg.type == BRW_REGISTER_TYPE_F) {
- _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
+ debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4);
}
else {
- _mesa_printf("%s%d.%d<%d;%d,%d>:%s",
+ debug_printf("%s%d.%d<%d;%d,%d>:%s",
file[hwreg.file],
hwreg.nr,
hwreg.subnr / type_sz(hwreg.type),
diff --git a/src/mesa/pipe/i965simple/brw_eu_emit.c b/src/mesa/pipe/i965simple/brw_eu_emit.c
index 2423536dd1..400a80b6fb 100644
--- a/src/mesa/pipe/i965simple/brw_eu_emit.c
+++ b/src/mesa/pipe/i965simple/brw_eu_emit.c
@@ -953,7 +953,7 @@ void brw_SAMPLE(struct brw_compile *p,
boolean need_stall = 0;
if(writemask == 0) {
-/* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */
+/* debug_printf("%s: zero writemask??\n", __FUNCTION__); */
return;
}
@@ -985,7 +985,7 @@ void brw_SAMPLE(struct brw_compile *p,
if (newmask != writemask) {
need_stall = 1;
-/* _mesa_printf("need stall %x %x\n", newmask , writemask); */
+/* debug_printf("need stall %x %x\n", newmask , writemask); */
}
else {
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
diff --git a/src/mesa/pipe/i965simple/brw_sf.c b/src/mesa/pipe/i965simple/brw_sf.c
index b89b2e4087..7c83b81c85 100644
--- a/src/mesa/pipe/i965simple/brw_sf.c
+++ b/src/mesa/pipe/i965simple/brw_sf.c
@@ -175,7 +175,7 @@ static void upload_sf_prog( struct brw_context *brw )
//int semantic = parse.FullToken.FullDeclaration.Semantic.SemanticName;
//int semantic_index = parse.FullToken.FullDeclaration.Semantic.SemanticIndex;
- fprintf(stderr, "fs input %d..%d interp mode %d\n", first, last, interp_mode);
+ debug_printf("fs input %d..%d interp mode %d\n", first, last, interp_mode);
switch (interp_mode) {
case TGSI_INTERPOLATE_CONSTANT:
@@ -213,9 +213,9 @@ static void upload_sf_prog( struct brw_context *brw )
key.linear_mask |= 1;
key.const_mask <<= 1;
- fprintf(stderr, "key.persp_mask: %x\n", key.persp_mask);
- fprintf(stderr, "key.linear_mask: %x\n", key.linear_mask);
- fprintf(stderr, "key.const_mask: %x\n", key.const_mask);
+ debug_printf("key.persp_mask: %x\n", key.persp_mask);
+ debug_printf("key.linear_mask: %x\n", key.linear_mask);
+ debug_printf("key.const_mask: %x\n", key.const_mask);
// key.do_point_sprite = brw->attribs.Point->PointSprite;
diff --git a/src/mesa/pipe/i965simple/brw_sf_emit.c b/src/mesa/pipe/i965simple/brw_sf_emit.c
index 6ff5254ff7..78d6fa5e9e 100644
--- a/src/mesa/pipe/i965simple/brw_sf_emit.c
+++ b/src/mesa/pipe/i965simple/brw_sf_emit.c
@@ -137,8 +137,8 @@ static boolean calculate_masks( struct brw_sf_compile *c,
unsigned persp_mask = c->key.persp_mask;
unsigned linear_mask = c->key.linear_mask;
- fprintf(stderr, "persp_mask: %x\n", persp_mask);
- fprintf(stderr, "linear_mask: %x\n", linear_mask);
+ debug_printf("persp_mask: %x\n", persp_mask);
+ debug_printf("linear_mask: %x\n", linear_mask);
*pc_persp = 0;
*pc_linear = 0;
@@ -162,9 +162,9 @@ static boolean calculate_masks( struct brw_sf_compile *c,
*pc_linear |= 0xf0;
}
- fprintf(stderr, "pc: %x\n", *pc);
- fprintf(stderr, "pc_persp: %x\n", *pc_persp);
- fprintf(stderr, "pc_linear: %x\n", *pc_linear);
+ debug_printf("pc: %x\n", *pc);
+ debug_printf("pc_persp: %x\n", *pc_persp);
+ debug_printf("pc_linear: %x\n", *pc_linear);
return is_last_attr;
@@ -177,7 +177,7 @@ void brw_emit_tri_setup( struct brw_sf_compile *c )
struct brw_compile *p = &c->func;
unsigned i;
- fprintf(stderr, "%s START ==============\n", __FUNCTION__);
+ debug_printf("%s START ==============\n", __FUNCTION__);
c->nr_verts = 3;
alloc_regs(c);
@@ -250,7 +250,7 @@ void brw_emit_tri_setup( struct brw_sf_compile *c )
}
}
- fprintf(stderr, "%s DONE ==============\n", __FUNCTION__);
+ debug_printf("%s DONE ==============\n", __FUNCTION__);
}
diff --git a/src/mesa/pipe/i965simple/brw_state.c b/src/mesa/pipe/i965simple/brw_state.c
index daf14ff4ff..95dfce88e4 100644
--- a/src/mesa/pipe/i965simple/brw_state.c
+++ b/src/mesa/pipe/i965simple/brw_state.c
@@ -225,7 +225,7 @@ static void brw_bind_vs_state(struct pipe_context *pipe, void *vs)
brw->attribs.VertexProgram = (struct brw_vertex_program *)vs;
brw->state.dirty.brw |= BRW_NEW_VS;
- printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n");
+ debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n");
}
static void brw_delete_vs_state(struct pipe_context *pipe, void *shader)
diff --git a/src/mesa/pipe/i965simple/brw_state_cache.c b/src/mesa/pipe/i965simple/brw_state_cache.c
index c5738733f4..b3a5124461 100644
--- a/src/mesa/pipe/i965simple/brw_state_cache.c
+++ b/src/mesa/pipe/i965simple/brw_state_cache.c
@@ -149,7 +149,7 @@ unsigned brw_upload_cache( struct brw_cache *cache,
if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) {
/* Should not be possible:
*/
- printf("brw_pool_alloc failed\n");
+ debug_printf("brw_pool_alloc failed\n");
exit(1);
}
@@ -177,7 +177,7 @@ unsigned brw_upload_cache( struct brw_cache *cache,
}
if (BRW_DEBUG & DEBUG_STATE)
- printf("upload %s: %d bytes to pool buffer %p offset %x\n",
+ debug_printf("upload %s: %d bytes to pool buffer %p offset %x\n",
cache->name,
data_size,
(void*)cache->pool->buffer,
@@ -416,7 +416,7 @@ void brw_clear_all_caches( struct brw_context *brw )
int i;
if (BRW_DEBUG & DEBUG_STATE)
- fprintf(stderr, "%s\n", __FUNCTION__);
+ debug_printf("%s\n", __FUNCTION__);
for (i = 0; i < BRW_MAX_CACHE; i++)
clear_cache(&brw->cache[i]);
diff --git a/src/mesa/pipe/i965simple/brw_state_pool.c b/src/mesa/pipe/i965simple/brw_state_pool.c
index 7c67f0ee25..f3174bfe0a 100644
--- a/src/mesa/pipe/i965simple/brw_state_pool.c
+++ b/src/mesa/pipe/i965simple/brw_state_pool.c
@@ -58,7 +58,7 @@ boolean brw_pool_alloc( struct brw_mem_pool *pool,
size = align(size, 4);
if (pool->offset + fixup + size >= pool->size) {
- printf("%s failed\n", __FUNCTION__);
+ debug_printf("%s failed\n", __FUNCTION__);
assert(0);
exit(0);
}
@@ -74,7 +74,7 @@ static
void brw_invalidate_pool( struct brw_mem_pool *pool )
{
if (BRW_DEBUG & DEBUG_STATE)
- printf("\n\n\n %s \n\n\n", __FUNCTION__);
+ debug_printf("\n\n\n %s \n\n\n", __FUNCTION__);
pool->offset = 0;
diff --git a/src/mesa/pipe/i965simple/brw_tex_layout.c b/src/mesa/pipe/i965simple/brw_tex_layout.c
index b8b6b579e2..405fd1f794 100644
--- a/src/mesa/pipe/i965simple/brw_tex_layout.c
+++ b/src/mesa/pipe/i965simple/brw_tex_layout.c
@@ -299,15 +299,14 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture
return TRUE;
}
-void
-brw_texture_create(struct pipe_context *pipe, struct pipe_texture **pt)
+
+struct pipe_texture *
+brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat)
{
- struct brw_texture *tex = REALLOC(*pt, sizeof(struct pipe_texture),
- sizeof(struct brw_texture));
+ struct brw_texture *tex = CALLOC_STRUCT(brw_texture);
if (tex) {
- memset(&tex->base + 1, 0,
- sizeof(struct brw_texture) - sizeof(struct pipe_texture));
+ tex->base = *templat;
if (brw_miptree_layout(pipe, tex))
tex->buffer = pipe->winsys->buffer_create(pipe->winsys, 64,
@@ -317,11 +316,11 @@ brw_texture_create(struct pipe_context *pipe, struct pipe_texture **pt)
if (!tex->buffer) {
FREE(tex);
- tex = NULL;
+ return NULL;
}
}
- *pt = &tex->base;
+ return &tex->base;
}
void
diff --git a/src/mesa/pipe/i965simple/brw_tex_layout.h b/src/mesa/pipe/i965simple/brw_tex_layout.h
index 15e275058a..cfd6b1ef3a 100644
--- a/src/mesa/pipe/i965simple/brw_tex_layout.h
+++ b/src/mesa/pipe/i965simple/brw_tex_layout.h
@@ -6,8 +6,8 @@
struct pipe_context;
struct pipe_texture;
-extern void
-brw_texture_create(struct pipe_context *pipe, struct pipe_texture **pt);
+extern struct pipe_texture *
+brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat);
extern void
brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt);
diff --git a/src/mesa/pipe/i965simple/brw_urb.c b/src/mesa/pipe/i965simple/brw_urb.c
index b284526aa6..101a4367b9 100644
--- a/src/mesa/pipe/i965simple/brw_urb.c
+++ b/src/mesa/pipe/i965simple/brw_urb.c
@@ -120,18 +120,18 @@ static void recalculate_urb_fence( struct brw_context *brw )
* entries and the values for minimum nr of entries
* provided above.
*/
- fprintf(stderr, "couldn't calculate URB layout!\n");
+ debug_printf("couldn't calculate URB layout!\n");
exit(1);
}
if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS))
- printf("URB CONSTRAINED\n");
+ debug_printf("URB CONSTRAINED\n");
}
else
brw->urb.constrained = 0;
if (BRW_DEBUG & DEBUG_URB)
- printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
+ debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
brw->urb.vs_start,
brw->urb.gs_start,
brw->urb.clip_start,
diff --git a/src/mesa/pipe/i965simple/brw_vs_emit.c b/src/mesa/pipe/i965simple/brw_vs_emit.c
index b32c233dd2..98915ba101 100644
--- a/src/mesa/pipe/i965simple/brw_vs_emit.c
+++ b/src/mesa/pipe/i965simple/brw_vs_emit.c
@@ -1228,7 +1228,7 @@ static void process_instruction(struct brw_vs_compile *c,
case TGSI_OPCODE_ENDSUB:
break;
default:
- printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode);
+ debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode);
break;
}
diff --git a/src/mesa/pipe/i965simple/brw_wm.c b/src/mesa/pipe/i965simple/brw_wm.c
index 0ee0fbed51..539b170744 100644
--- a/src/mesa/pipe/i965simple/brw_wm.c
+++ b/src/mesa/pipe/i965simple/brw_wm.c
@@ -57,7 +57,7 @@ static void do_wm_prog( struct brw_context *brw,
c->pixel_w = brw_null_reg();
- fprintf(stderr, "XXXXXXXX FP\n");
+ debug_printf("XXXXXXXX FP\n");
brw_wm_glsl_emit(c);
diff --git a/src/mesa/pipe/i965simple/brw_wm_glsl.c b/src/mesa/pipe/i965simple/brw_wm_glsl.c
index f4b5c13c06..d95645d108 100644
--- a/src/mesa/pipe/i965simple/brw_wm_glsl.c
+++ b/src/mesa/pipe/i965simple/brw_wm_glsl.c
@@ -982,7 +982,7 @@ static void brw_wm_emit_instruction( struct brw_wm_compile *c,
break;
default:
- _mesa_printf("unsupported IR in fragment shader %d\n",
+ debug_printf("unsupported IR in fragment shader %d\n",
inst->Instruction.Opcode);
}
#if 0
diff --git a/src/mesa/pipe/i965simple/brw_wm_sampler_state.c b/src/mesa/pipe/i965simple/brw_wm_sampler_state.c
index cfb430eb09..de42ffc5b1 100644
--- a/src/mesa/pipe/i965simple/brw_wm_sampler_state.c
+++ b/src/mesa/pipe/i965simple/brw_wm_sampler_state.c
@@ -71,7 +71,7 @@ static int intel_translate_shadow_compare_func(unsigned func)
return COMPAREFUNC_NEVER;
}
- fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func);
+ debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func);
return COMPAREFUNC_NEVER;
}
diff --git a/src/mesa/pipe/p_compiler.h b/src/mesa/pipe/p_compiler.h
index e939d9cd9b..30cd729c56 100644
--- a/src/mesa/pipe/p_compiler.h
+++ b/src/mesa/pipe/p_compiler.h
@@ -28,10 +28,9 @@
#ifndef P_COMPILER_H
#define P_COMPILER_H
-#include <assert.h>
+
#include <stdlib.h>
#include <string.h>
-#include <stdio.h>
#if defined(_WIN32) && !defined(__WIN32__)
diff --git a/src/mesa/pipe/p_context.h b/src/mesa/pipe/p_context.h
index 0dda06c53b..92a1cd70c4 100644
--- a/src/mesa/pipe/p_context.h
+++ b/src/mesa/pipe/p_context.h
@@ -199,8 +199,8 @@ struct pipe_context {
/*
* Texture functions
*/
- void (*texture_create)(struct pipe_context *pipe,
- struct pipe_texture **pt);
+ struct pipe_texture * (*texture_create)(struct pipe_context *pipe,
+ const struct pipe_texture *templat);
void (*texture_release)(struct pipe_context *pipe,
struct pipe_texture **pt);
diff --git a/src/mesa/pipe/p_debug.h b/src/mesa/pipe/p_debug.h
new file mode 100644
index 0000000000..2a11627b36
--- /dev/null
+++ b/src/mesa/pipe/p_debug.h
@@ -0,0 +1,86 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Cross-platform debugging helpers.
+ *
+ * For now it just has assert and printf replacements, but it might be extended
+ * with stack trace reports and more advanced logging in the near future.
+ *
+ * @author Jose Fonseca <jrfonseca@tungstengraphics.com>
+ */
+
+#ifndef P_DEBUG_H_
+#define P_DEBUG_H_
+
+
+#include <stdarg.h>
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#ifdef DBG
+#ifndef DEBUG
+#define DEBUG 1
+#endif
+#else
+#ifndef NDEBUG
+#define NDEBUG 1
+#endif
+#endif
+
+
+void debug_printf(const char *format, ...);
+
+void debug_vprintf(const char *format, va_list ap);
+
+void debug_assert_fail(const char *expr, const char *file, unsigned line);
+
+
+/** Assert macro */
+#ifdef DEBUG
+#define debug_assert(expr) ((expr) ? (void)0 : debug_assert_fail(#expr, __FILE__, __LINE__))
+#else
+#define debug_assert(expr) ((void)0)
+#endif
+
+
+#ifdef assert
+#undef assert
+#endif
+#define assert(expr) debug_assert(expr)
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* P_DEBUG_H_ */
diff --git a/src/mesa/pipe/p_defines.h b/src/mesa/pipe/p_defines.h
index 85adf2d61d..0bf53ecb79 100644
--- a/src/mesa/pipe/p_defines.h
+++ b/src/mesa/pipe/p_defines.h
@@ -265,6 +265,6 @@ enum pipe_texture_target {
#define PIPE_CAP_MAX_POINT_WIDTH_AA 17
#define PIPE_CAP_MAX_TEXTURE_ANISOTROPY 18
#define PIPE_CAP_MAX_TEXTURE_LOD_BIAS 19
-
+#define PIPE_CAP_BITMAP_TEXCOORD_BIAS 20
#endif
diff --git a/src/mesa/pipe/p_format.h b/src/mesa/pipe/p_format.h
index 9f60cdbb04..c9ad324315 100644
--- a/src/mesa/pipe/p_format.h
+++ b/src/mesa/pipe/p_format.h
@@ -28,7 +28,10 @@
#ifndef PIPE_FORMAT_H
#define PIPE_FORMAT_H
+#include <stdio.h> // for sprintf
+
#include "p_compiler.h"
+#include "p_debug.h"
/**
* The PIPE_FORMAT is a 32-bit wide bitfield that encodes all the information
diff --git a/src/mesa/pipe/p_shader_tokens.h b/src/mesa/pipe/p_shader_tokens.h
index e9d1d66bda..3ce35310f6 100644
--- a/src/mesa/pipe/p_shader_tokens.h
+++ b/src/mesa/pipe/p_shader_tokens.h
@@ -626,7 +626,7 @@ struct tgsi_src_register_ext
/*
* If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_SWZ,
- * it should be cast to tgsi_src_register_ext_extswz.
+ * it should be cast to tgsi_src_register_ext_swz.
*
* If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_MOD,
* it should be cast to tgsi_src_register_ext_mod.
diff --git a/src/mesa/pipe/p_util.h b/src/mesa/pipe/p_util.h
index 059528787d..469920efee 100644
--- a/src/mesa/pipe/p_util.h
+++ b/src/mesa/pipe/p_util.h
@@ -29,6 +29,7 @@
#define P_UTIL_H
#include "p_compiler.h"
+#include "p_debug.h"
#include <math.h>
@@ -183,6 +184,20 @@ align_free(void *ptr)
+/**
+ * Duplicate of a block of memory
+ */
+static INLINE void *
+mem_dup(const void *src, uint size)
+{
+ void *dup = malloc(size);
+ if (dup)
+ memcpy(dup, src, size);
+ return dup;
+}
+
+
+
#define CLAMP( X, MIN, MAX ) ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) )
#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) )
#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) )
@@ -381,10 +396,6 @@ static INLINE int align(int value, int alignment)
return (value + alignment - 1) & ~(alignment - 1);
}
-/* Convenient...
- */
-extern void _mesa_printf(const char *str, ...);
-
/* util/p_util.c
*/
diff --git a/src/mesa/pipe/pipebuffer/pb_buffer.h b/src/mesa/pipe/pipebuffer/pb_buffer.h
index 17551b3b50..97beb5f72a 100644
--- a/src/mesa/pipe/pipebuffer/pb_buffer.h
+++ b/src/mesa/pipe/pipebuffer/pb_buffer.h
@@ -44,10 +44,8 @@
#define PB_BUFFER_H_
-#include <assert.h>
-#include <stdlib.h>
-
#include "pipe/p_compiler.h"
+#include "pipe/p_debug.h"
#include "pipe/p_state.h"
#include "pipe/p_inlines.h"
diff --git a/src/mesa/pipe/pipebuffer/pb_buffer_fenced.c b/src/mesa/pipe/pipebuffer/pb_buffer_fenced.c
index 349647fe6e..f4fc3f6d71 100644
--- a/src/mesa/pipe/pipebuffer/pb_buffer_fenced.c
+++ b/src/mesa/pipe/pipebuffer/pb_buffer_fenced.c
@@ -34,12 +34,10 @@
*/
-#include <assert.h>
-#include <stdlib.h>
-
#include "linked_list.h"
#include "p_compiler.h"
+#include "p_debug.h"
#include "p_winsys.h"
#include "p_thread.h"
#include "p_util.h"
@@ -145,7 +143,7 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
/* Do the delayed destroy:
*/
pb_reference(&fenced_buf->buffer, NULL);
- free(fenced_buf);
+ FREE(fenced_buf);
}
}
@@ -162,7 +160,7 @@ fenced_buffer_destroy(struct pb_buffer *buf)
}
else {
pb_reference(&fenced_buf->buffer, NULL);
- free(fenced_buf);
+ FREE(fenced_buf);
}
if ((fenced_list->numDelayed % fenced_list->checkDelayed) == 0)
diff --git a/src/mesa/pipe/pipebuffer/pb_buffer_fenced.h b/src/mesa/pipe/pipebuffer/pb_buffer_fenced.h
index 09082a5390..c40b9c75e1 100644
--- a/src/mesa/pipe/pipebuffer/pb_buffer_fenced.h
+++ b/src/mesa/pipe/pipebuffer/pb_buffer_fenced.h
@@ -51,7 +51,7 @@
#define PB_BUFFER_FENCED_H_
-#include <assert.h>
+#include "pipe/p_debug.h"
struct pipe_winsys;
diff --git a/src/mesa/pipe/pipebuffer/pb_buffer_malloc.c b/src/mesa/pipe/pipebuffer/pb_buffer_malloc.c
index fc83a00f36..c1b7759874 100644
--- a/src/mesa/pipe/pipebuffer/pb_buffer_malloc.c
+++ b/src/mesa/pipe/pipebuffer/pb_buffer_malloc.c
@@ -34,9 +34,7 @@
*/
-#include <assert.h>
-#include <stdlib.h>
-
+#include "pipe/p_debug.h"
#include "pipe/p_util.h"
#include "pb_buffer.h"
@@ -107,10 +105,9 @@ pb_malloc_buffer_create(size_t size,
{
struct malloc_buffer *buf;
- /* TODO: accept an alignment parameter */
/* TODO: do a single allocation */
- buf = (struct malloc_buffer *)MALLOC(sizeof(struct malloc_buffer));
+ buf = CALLOC_STRUCT(malloc_buffer);
if(!buf)
return NULL;
diff --git a/src/mesa/pipe/pipebuffer/pb_bufmgr_fenced.c b/src/mesa/pipe/pipebuffer/pb_bufmgr_fenced.c
index 3b341c64c2..c535d3276c 100644
--- a/src/mesa/pipe/pipebuffer/pb_bufmgr_fenced.c
+++ b/src/mesa/pipe/pipebuffer/pb_bufmgr_fenced.c
@@ -34,9 +34,7 @@
*/
-#include <assert.h>
-#include <stdlib.h>
-
+#include "p_debug.h"
#include "p_util.h"
#include "pb_buffer.h"
diff --git a/src/mesa/pipe/pipebuffer/pb_bufmgr_mm.c b/src/mesa/pipe/pipebuffer/pb_bufmgr_mm.c
index 2694f57bca..8b1b51c0e2 100644
--- a/src/mesa/pipe/pipebuffer/pb_bufmgr_mm.c
+++ b/src/mesa/pipe/pipebuffer/pb_bufmgr_mm.c
@@ -34,11 +34,10 @@
*/
-#include <assert.h>
-
#include "linked_list.h"
#include "p_defines.h"
+#include "p_debug.h"
#include "p_thread.h"
#include "p_util.h"
#include "pb_buffer.h"
@@ -69,28 +68,28 @@ struct mem_block
static void
mmDumpMemInfo(const struct mem_block *heap)
{
- fprintf(stderr, "Memory heap %p:\n", (void *)heap);
+ debug_printf("Memory heap %p:\n", (void *)heap);
if (heap == 0) {
- fprintf(stderr, " heap == 0\n");
+ debug_printf(" heap == 0\n");
} else {
const struct mem_block *p;
for(p = heap->next; p != heap; p = p->next) {
- fprintf(stderr, " Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size,
+ debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size,
p->free ? 'F':'.',
p->reserved ? 'R':'.');
}
- fprintf(stderr, "\nFree list:\n");
+ debug_printf("\nFree list:\n");
for(p = heap->next_free; p != heap; p = p->next_free) {
- fprintf(stderr, " FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size,
+ debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size,
p->free ? 'F':'.',
p->reserved ? 'R':'.');
}
}
- fprintf(stderr, "End of memory blocks\n");
+ debug_printf("End of memory blocks\n");
}
#endif
@@ -308,11 +307,11 @@ mmFreeMem(struct mem_block *b)
return 0;
if (b->free) {
- fprintf(stderr, "block already free\n");
+ debug_printf("block already free\n");
return -1;
}
if (b->reserved) {
- fprintf(stderr, "block is reserved\n");
+ debug_printf("block is reserved\n");
return -1;
}
@@ -367,7 +366,7 @@ struct mm_pb_manager
};
-static inline struct mm_pb_manager *
+static INLINE struct mm_pb_manager *
mm_pb_manager(struct pb_manager *mgr)
{
assert(mgr);
@@ -385,7 +384,7 @@ struct mm_buffer
};
-static inline struct mm_buffer *
+static INLINE struct mm_buffer *
mm_buffer(struct pb_buffer *buf)
{
assert(buf);
@@ -399,6 +398,8 @@ mm_buffer_destroy(struct pb_buffer *buf)
struct mm_buffer *mm_buf = mm_buffer(buf);
struct mm_pb_manager *mm = mm_buf->mgr;
+ assert(buf->base.refcount == 0);
+
_glthread_LOCK_MUTEX(mm->mutex);
mmFreeMem(mm_buf->block);
FREE(buf);
@@ -477,7 +478,7 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr,
mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0);
if(!mm_buf->block) {
- fprintf(stderr, "warning: heap full\n");
+ debug_printf("warning: heap full\n");
#if 0
mmDumpMemInfo(mm->heap);
#endif
diff --git a/src/mesa/pipe/pipebuffer/pb_bufmgr_pool.c b/src/mesa/pipe/pipebuffer/pb_bufmgr_pool.c
index 7c29954112..04477a865a 100644
--- a/src/mesa/pipe/pipebuffer/pb_bufmgr_pool.c
+++ b/src/mesa/pipe/pipebuffer/pb_bufmgr_pool.c
@@ -35,12 +35,10 @@
*/
-#include <assert.h>
-#include <stdlib.h>
-
#include "linked_list.h"
#include "p_compiler.h"
+#include "p_debug.h"
#include "p_thread.h"
#include "p_defines.h"
#include "p_util.h"
@@ -172,13 +170,13 @@ pool_bufmgr_create_buffer(struct pb_manager *mgr,
struct list_head *item;
assert(size == pool->bufSize);
- assert(desc->alignment % pool->bufAlign == 0);
+ assert(pool->bufAlign % desc->alignment == 0);
_glthread_LOCK_MUTEX(pool->mutex);
if (pool->numFree == 0) {
_glthread_UNLOCK_MUTEX(pool->mutex);
- fprintf(stderr, "warning: out of fixed size buffer objects\n");
+ debug_printf("warning: out of fixed size buffer objects\n");
return NULL;
}
@@ -186,7 +184,7 @@ pool_bufmgr_create_buffer(struct pb_manager *mgr,
if (item == &pool->free) {
_glthread_UNLOCK_MUTEX(pool->mutex);
- fprintf(stderr, "error: fixed size buffer pool corruption\n");
+ debug_printf("error: fixed size buffer pool corruption\n");
return NULL;
}
@@ -258,7 +256,7 @@ pool_bufmgr_create(struct pb_manager *provider,
if(!pool->map)
goto failure;
- pool->bufs = (struct pool_buffer *) MALLOC(numBufs * sizeof(*pool->bufs));
+ pool->bufs = (struct pool_buffer *)CALLOC(numBufs, sizeof(*pool->bufs));
if (!pool->bufs)
goto failure;
diff --git a/src/mesa/pipe/softpipe/SConscript b/src/mesa/pipe/softpipe/SConscript
new file mode 100644
index 0000000000..d581ee8d3c
--- /dev/null
+++ b/src/mesa/pipe/softpipe/SConscript
@@ -0,0 +1,42 @@
+Import('*')
+
+env = env.Clone()
+
+softpipe = env.ConvenienceLibrary(
+ target = 'softpipe',
+ source = [
+ 'sp_clear.c',
+ 'sp_context.c',
+ 'sp_draw_arrays.c',
+ 'sp_flush.c',
+ 'sp_prim_setup.c',
+ 'sp_prim_vbuf.c',
+ 'sp_quad_alpha_test.c',
+ 'sp_quad_blend.c',
+ 'sp_quad_bufloop.c',
+ 'sp_quad.c',
+ 'sp_quad_colormask.c',
+ 'sp_quad_coverage.c',
+ 'sp_quad_depth_test.c',
+ 'sp_quad_earlyz.c',
+ 'sp_quad_fs.c',
+ 'sp_quad_occlusion.c',
+ 'sp_quad_output.c',
+ 'sp_quad_stencil.c',
+ 'sp_quad_stipple.c',
+ 'sp_query.c',
+ 'sp_state_blend.c',
+ 'sp_state_clip.c',
+ 'sp_state_derived.c',
+ 'sp_state_fs.c',
+ 'sp_state_rasterizer.c',
+ 'sp_state_sampler.c',
+ 'sp_state_surface.c',
+ 'sp_state_vertex.c',
+ 'sp_surface.c',
+ 'sp_tex_sample.c',
+ 'sp_texture.c',
+ 'sp_tile_cache.c',
+ ])
+
+Export('softpipe') \ No newline at end of file
diff --git a/src/mesa/pipe/softpipe/sp_clear.c b/src/mesa/pipe/softpipe/sp_clear.c
index 571f64b38d..8d295a30ca 100644
--- a/src/mesa/pipe/softpipe/sp_clear.c
+++ b/src/mesa/pipe/softpipe/sp_clear.c
@@ -55,7 +55,9 @@ softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps,
if (ps == sp_tile_cache_get_surface(softpipe->zsbuf_cache)) {
sp_tile_cache_clear(softpipe->zsbuf_cache, clearValue);
+#if TILE_CLEAR_OPTIMIZATION
return;
+#endif
}
for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) {
diff --git a/src/mesa/pipe/softpipe/sp_prim_setup.c b/src/mesa/pipe/softpipe/sp_prim_setup.c
index b17801d13d..7478b2336b 100644
--- a/src/mesa/pipe/softpipe/sp_prim_setup.c
+++ b/src/mesa/pipe/softpipe/sp_prim_setup.c
@@ -251,9 +251,9 @@ static void print_vertex(const struct setup_stage *setup,
const struct vertex_header *v)
{
int i;
- fprintf(stderr, "Vertex: (%p)\n", v);
+ debug_printf("Vertex: (%p)\n", v);
for (i = 0; i < setup->quad.nr_attrs; i++) {
- fprintf(stderr, " %d: %f %f %f %f\n", i,
+ debug_printf(" %d: %f %f %f %f\n", i,
v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]);
}
}
@@ -267,7 +267,7 @@ static boolean setup_sort_vertices( struct setup_stage *setup,
const struct vertex_header *v2 = prim->v[2];
#if DEBUG_VERTS
- fprintf(stderr, "Triangle:\n");
+ debug_printf("Triangle:\n");
print_vertex(setup, v0);
print_vertex(setup, v1);
print_vertex(setup, v2);
@@ -345,7 +345,7 @@ static boolean setup_sort_vertices( struct setup_stage *setup,
setup->oneoverarea = 1.0f / area;
/*
- _mesa_printf("%s one-over-area %f area %f det %f\n",
+ debug_printf("%s one-over-area %f area %f det %f\n",
__FUNCTION__, setup->oneoverarea, area, prim->det );
*/
}
@@ -419,7 +419,7 @@ static void tri_linear_coeff( struct setup_stage *setup,
dady * (setup->vmin->data[0][1] - 0.5f)));
/*
- _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n",
+ debug_printf("attr[%d].%c: %f dx:%f dy:%f\n",
slot, "xyzw"[i],
setup->coef[slot].a0[i],
setup->coef[slot].dadx[i],
@@ -453,10 +453,10 @@ static void tri_persp_coeff( struct setup_stage *setup,
float dady = b * setup->oneoverarea;
/*
- printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
- setup->vmin->data[vertSlot][i],
- setup->vmid->data[vertSlot][i],
- setup->vmax->data[vertSlot][i]
+ debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i,
+ setup->vmin->data[vertSlot][i],
+ setup->vmid->data[vertSlot][i],
+ setup->vmax->data[vertSlot][i]
);
*/
assert(i <= 3);
@@ -619,7 +619,7 @@ static void subtriangle( struct setup_stage *setup,
finish_y -= sy;
/*
- _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
+ debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y);
*/
for (y = start_y; y < finish_y; y++) {
@@ -671,7 +671,7 @@ static void setup_tri( struct draw_stage *stage,
struct setup_stage *setup = setup_stage( stage );
/*
- _mesa_printf("%s\n", __FUNCTION__ );
+ debug_printf("%s\n", __FUNCTION__ );
*/
setup_sort_vertices( setup, prim );
@@ -1124,7 +1124,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim)
int ix, iy;
/*
- printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
+ debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax);
*/
for (iy = iymin; iy <= iymax; iy += 2) {
uint rowMask = 0xf;
diff --git a/src/mesa/pipe/softpipe/sp_quad_fs.c b/src/mesa/pipe/softpipe/sp_quad_fs.c
index c9cc8afa0c..b5d7dfca1c 100644
--- a/src/mesa/pipe/softpipe/sp_quad_fs.c
+++ b/src/mesa/pipe/softpipe/sp_quad_fs.c
@@ -168,6 +168,11 @@ shade_quad(
sizeof( quad->outputs.color ) );
}
+ /*
+ * XXX the following code for updating quad->outputs.depth
+ * isn't really needed if we did early z testing.
+ */
+
/* store result Z */
if (qss->depthOutSlot >= 0) {
/* output[slot] is new Z */
@@ -181,6 +186,10 @@ shade_quad(
uint i;
for (i = 0; i < 4; i++) {
quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i];
+ /* XXX not sure the above line is always correct. The following
+ * might be better:
+ quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i];
+ */
}
}
@@ -214,13 +223,13 @@ shade_quad_llvm(struct quad_stage *qs,
inputs[2][0][1] = fy + 1.0f;
inputs[3][0][1] = fy + 1.0f;
#if DLLVM
- printf("MASK = %d\n", quad->mask);
+ debug_printf("MASK = %d\n", quad->mask);
#endif
gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef);
#if DLLVM
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < 2; ++j) {
- printf("IN(%d,%d) [%f %f %f %f]\n", i, j,
+ debug_printf("IN(%d,%d) [%f %f %f %f]\n", i, j,
inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]);
}
}
@@ -231,7 +240,7 @@ shade_quad_llvm(struct quad_stage *qs,
softpipe->mapped_constants[PIPE_SHADER_FRAGMENT],
qss->samplers);
#if DLLVM
- printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n",
+ debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n",
dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3],
dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]);
#endif
@@ -251,7 +260,7 @@ shade_quad_llvm(struct quad_stage *qs,
}
#if DLLVM
for (int i = 0; i < QUAD_SIZE; ++i) {
- printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot,
+ debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot,
quad->outputs.color[0][i],
quad->outputs.color[1][i],
quad->outputs.color[2][i],
@@ -275,7 +284,7 @@ shade_quad_llvm(struct quad_stage *qs,
}
}
#if DLLVM
- printf("D [%f, %f, %f, %f] mask = %d\n",
+ debug_printf("D [%f, %f, %f, %f] mask = %d\n",
quad->outputs.depth[0],
quad->outputs.depth[1],
quad->outputs.depth[2],
diff --git a/src/mesa/pipe/softpipe/sp_state_blend.c b/src/mesa/pipe/softpipe/sp_state_blend.c
index 160ca5cbc0..2d40d6bd8f 100644
--- a/src/mesa/pipe/softpipe/sp_state_blend.c
+++ b/src/mesa/pipe/softpipe/sp_state_blend.c
@@ -32,13 +32,12 @@
#include "sp_context.h"
#include "sp_state.h"
+
void *
softpipe_create_blend_state(struct pipe_context *pipe,
const struct pipe_blend_state *blend)
{
- struct pipe_blend_state *state = MALLOC( sizeof(struct pipe_blend_state) );
- memcpy(state, blend, sizeof(struct pipe_blend_state));
- return state;
+ return mem_dup(blend, sizeof(*blend));
}
void softpipe_bind_blend_state( struct pipe_context *pipe,
@@ -78,10 +77,7 @@ void *
softpipe_create_depth_stencil_state(struct pipe_context *pipe,
const struct pipe_depth_stencil_alpha_state *depth_stencil)
{
- struct pipe_depth_stencil_alpha_state *state =
- MALLOC( sizeof(struct pipe_depth_stencil_alpha_state) );
- memcpy(state, depth_stencil, sizeof(struct pipe_depth_stencil_alpha_state));
- return state;
+ return mem_dup(depth_stencil, sizeof(*depth_stencil));
}
void
diff --git a/src/mesa/pipe/softpipe/sp_state_rasterizer.c b/src/mesa/pipe/softpipe/sp_state_rasterizer.c
index ce8fa4f2b8..53755099dd 100644
--- a/src/mesa/pipe/softpipe/sp_state_rasterizer.c
+++ b/src/mesa/pipe/softpipe/sp_state_rasterizer.c
@@ -35,12 +35,9 @@
void *
softpipe_create_rasterizer_state(struct pipe_context *pipe,
- const struct pipe_rasterizer_state *setup)
+ const struct pipe_rasterizer_state *rast)
{
- struct pipe_rasterizer_state *state =
- MALLOC( sizeof(struct pipe_rasterizer_state) );
- memcpy(state, setup, sizeof(struct pipe_rasterizer_state));
- return state;
+ return mem_dup(rast, sizeof(*rast));
}
void softpipe_bind_rasterizer_state(struct pipe_context *pipe,
diff --git a/src/mesa/pipe/softpipe/sp_state_sampler.c b/src/mesa/pipe/softpipe/sp_state_sampler.c
index 3842e71503..291bbc40ad 100644
--- a/src/mesa/pipe/softpipe/sp_state_sampler.c
+++ b/src/mesa/pipe/softpipe/sp_state_sampler.c
@@ -40,9 +40,7 @@ void *
softpipe_create_sampler_state(struct pipe_context *pipe,
const struct pipe_sampler_state *sampler)
{
- struct pipe_sampler_state *state = MALLOC( sizeof(struct pipe_sampler_state) );
- memcpy(state, sampler, sizeof(struct pipe_sampler_state));
- return state;
+ return mem_dup(sampler, sizeof(*sampler));
}
void
@@ -51,6 +49,8 @@ softpipe_bind_sampler_state(struct pipe_context *pipe,
{
struct softpipe_context *softpipe = softpipe_context(pipe);
+ draw_flush(softpipe->draw);
+
assert(unit < PIPE_MAX_SAMPLERS);
softpipe->sampler[unit] = (struct pipe_sampler_state *)sampler;
diff --git a/src/mesa/pipe/softpipe/sp_texture.c b/src/mesa/pipe/softpipe/sp_texture.c
index 172234843d..fd2cc3dbbb 100644
--- a/src/mesa/pipe/softpipe/sp_texture.c
+++ b/src/mesa/pipe/softpipe/sp_texture.c
@@ -79,31 +79,30 @@ softpipe_texture_layout(struct softpipe_texture * spt)
}
-void
-softpipe_texture_create(struct pipe_context *pipe, struct pipe_texture **pt)
+struct pipe_texture *
+softpipe_texture_create(struct pipe_context *pipe,
+ const struct pipe_texture *templat)
{
- struct softpipe_texture *spt = REALLOC(*pt, sizeof(struct pipe_texture),
- sizeof(struct softpipe_texture));
-
- if (spt) {
- memset(&spt->base + 1, 0,
- sizeof(struct softpipe_texture) - sizeof(struct pipe_texture));
+ struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture);
+ if (!spt)
+ return NULL;
- softpipe_texture_layout(spt);
+ spt->base = *templat;
- spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32,
- PIPE_BUFFER_USAGE_PIXEL,
- spt->buffer_size);
+ softpipe_texture_layout(spt);
- if (!spt->buffer) {
- FREE(spt);
- spt = NULL;
- }
+ spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32,
+ PIPE_BUFFER_USAGE_PIXEL,
+ spt->buffer_size);
+ if (!spt->buffer) {
+ FREE(spt);
+ return NULL;
}
- *pt = &spt->base;
+ return &spt->base;
}
+
void
softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt)
{
diff --git a/src/mesa/pipe/softpipe/sp_texture.h b/src/mesa/pipe/softpipe/sp_texture.h
index c6cf370351..fa646c0de9 100644
--- a/src/mesa/pipe/softpipe/sp_texture.h
+++ b/src/mesa/pipe/softpipe/sp_texture.h
@@ -55,8 +55,9 @@ softpipe_texture(struct pipe_texture *pt)
-extern void
-softpipe_texture_create(struct pipe_context *pipe, struct pipe_texture **pt);
+extern struct pipe_texture *
+softpipe_texture_create(struct pipe_context *pipe,
+ const struct pipe_texture *templat);
extern void
softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt);
diff --git a/src/mesa/pipe/softpipe/sp_tile_cache.c b/src/mesa/pipe/softpipe/sp_tile_cache.c
index 451e157abf..1597361b82 100644
--- a/src/mesa/pipe/softpipe/sp_tile_cache.c
+++ b/src/mesa/pipe/softpipe/sp_tile_cache.c
@@ -341,7 +341,7 @@ sp_tile_cache_flush_clear(struct pipe_context *pipe,
}
}
#if 0
- printf("num cleared: %u\n", numCleared);
+ debug_printf("num cleared: %u\n", numCleared);
#endif
}
@@ -384,7 +384,7 @@ sp_flush_tile_cache(struct softpipe_context *softpipe,
#endif
#if 0
- printf("flushed tiles in use: %d\n", inuse);
+ debug_printf("flushed tiles in use: %d\n", inuse);
#endif
}
@@ -415,8 +415,8 @@ sp_get_cached_tile(struct softpipe_context *softpipe,
/* put dirty tile back in framebuffer */
if (tc->depth_stencil) {
pipe_put_tile_raw(pipe, ps,
- tile->x, tile->y, TILE_SIZE, TILE_SIZE,
- tile->data.depth32, 0/*STRIDE*/);
+ tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ tile->data.depth32, 0/*STRIDE*/);
}
else {
pipe_put_tile_rgba(pipe, ps,
@@ -441,9 +441,9 @@ sp_get_cached_tile(struct softpipe_context *softpipe,
else {
/* get new tile data from surface */
if (tc->depth_stencil) {
- pipe_put_tile_raw(pipe, ps,
- tile->x, tile->y, TILE_SIZE, TILE_SIZE,
- tile->data.depth32, 0/*STRIDE*/);
+ pipe_get_tile_raw(pipe, ps,
+ tile->x, tile->y, TILE_SIZE, TILE_SIZE,
+ tile->data.depth32, 0/*STRIDE*/);
}
else {
pipe_get_tile_rgba(pipe, ps,
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.c b/src/mesa/pipe/tgsi/exec/tgsi_exec.c
index dcc39362a9..336ae1c8b6 100644
--- a/src/mesa/pipe/tgsi/exec/tgsi_exec.c
+++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.c
@@ -143,7 +143,7 @@ tgsi_exec_prepare( struct tgsi_exec_machine *mach )
k = tgsi_parse_init( &parse, mach->Tokens );
if (k != TGSI_PARSE_OK) {
- fprintf(stderr, "Problem parsing!\n");
+ debug_printf("Problem parsing!\n");
return;
}
@@ -249,7 +249,7 @@ tgsi_exec_machine_init(
k = tgsi_parse_init (&parse, mach->Tokens);
if (k != TGSI_PARSE_OK) {
- fprintf( stderr, "Problem parsing!\n" );
+ debug_printf( "Problem parsing!\n" );
return;
}
@@ -1236,7 +1236,7 @@ exec_tex(struct tgsi_exec_machine *mach,
uint chan_index;
float lodBias;
- /* printf("Sampler %u unit %u\n", sampler, unit); */
+ /* debug_printf("Sampler %u unit %u\n", sampler, unit); */
switch (inst->InstructionExtTexture.Texture) {
case TGSI_TEXTURE_1D:
@@ -2010,7 +2010,7 @@ exec_instruction(
case TGSI_OPCODE_TXB:
/* Texture lookup with lod bias */
- /* src[0] = texcoord (src[0].w = load bias) */
+ /* src[0] = texcoord (src[0].w = LOD bias) */
/* src[1] = sampler unit */
exec_tex(mach, inst, TRUE);
break;
@@ -2026,7 +2026,7 @@ exec_instruction(
case TGSI_OPCODE_TXL:
/* Texture lookup with explit LOD */
- /* src[0] = texcoord (src[0].w = load bias) */
+ /* src[0] = texcoord (src[0].w = LOD) */
/* src[1] = sampler unit */
exec_tex(mach, inst, TRUE);
break;
diff --git a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c
index f8660e7ad1..40bacf8552 100755
--- a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c
+++ b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c
@@ -48,28 +48,28 @@ _print_reg(
case file_REG32:
switch( reg.idx ) {
case reg_AX:
- printf( "EAX" );
+ debug_printf( "EAX" );
break;
case reg_CX:
- printf( "ECX" );
+ debug_printf( "ECX" );
break;
case reg_DX:
- printf( "EDX" );
+ debug_printf( "EDX" );
break;
case reg_BX:
- printf( "EBX" );
+ debug_printf( "EBX" );
break;
case reg_SP:
- printf( "ESP" );
+ debug_printf( "ESP" );
break;
case reg_BP:
- printf( "EBP" );
+ debug_printf( "EBP" );
break;
case reg_SI:
- printf( "ESI" );
+ debug_printf( "ESI" );
break;
case reg_DI:
- printf( "EDI" );
+ debug_printf( "EDI" );
break;
}
break;
@@ -77,7 +77,7 @@ _print_reg(
assert( 0 );
break;
case file_XMM:
- printf( "XMM%u", reg.idx );
+ debug_printf( "XMM%u", reg.idx );
break;
case file_x87:
assert( 0 );
@@ -92,35 +92,35 @@ _fill(
unsigned count = 10 - strlen( op );
while( count-- ) {
- printf( " " );
+ debug_printf( " " );
}
}
-#define DUMP_START() printf( "\nsse-dump start ----------------" )
-#define DUMP_END() printf( "\nsse-dump end ----------------\n" )
-#define DUMP( OP ) printf( "\n%s", OP )
+#define DUMP_START() debug_printf( "\nsse-dump start ----------------" )
+#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" )
+#define DUMP( OP ) debug_printf( "\n%s", OP )
#define DUMP_I( OP, I ) do {\
- printf( "\n%s", OP );\
+ debug_printf( "\n%s", OP );\
_fill( OP );\
- printf( "%u", I ); } while( 0 )
+ debug_printf( "%u", I ); } while( 0 )
#define DUMP_R( OP, R0 ) do {\
- printf( "\n%s", OP );\
+ debug_printf( "\n%s", OP );\
_fill( OP );\
_print_reg( R0 ); } while( 0 )
#define DUMP_RR( OP, R0, R1 ) do {\
- printf( "\n%s", OP );\
+ debug_printf( "\n%s", OP );\
_fill( OP );\
_print_reg( R0 );\
- printf( ", " );\
+ debug_printf( ", " );\
_print_reg( R1 ); } while( 0 )
#define DUMP_RRI( OP, R0, R1, I ) do {\
- printf( "\n%s", OP );\
+ debug_printf( "\n%s", OP );\
_fill( OP );\
_print_reg( R0 );\
- printf( ", " );\
+ debug_printf( ", " );\
_print_reg( R1 );\
- printf( ", " );\
- printf( "%u", I ); } while( 0 )
+ debug_printf( ", " );\
+ debug_printf( "%u", I ); } while( 0 )
#else
@@ -198,9 +198,15 @@ get_output_base( void )
static struct x86_reg
get_temp_base( void )
{
+#ifdef WIN32
return x86_make_reg(
file_REG32,
reg_BX );
+#else
+ return x86_make_reg(
+ file_REG32,
+ reg_SI );
+#endif
}
static struct x86_reg
@@ -2248,8 +2254,7 @@ tgsi_emit_sse2(
case TGSI_TOKEN_TYPE_IMMEDIATE:
/* XXX implement this */
- assert(0);
- break;
+ return 0;
default:
assert( 0 );
diff --git a/src/mesa/pipe/tgsi/util/tgsi_build.c b/src/mesa/pipe/tgsi/util/tgsi_build.c
index 67f7d2c2c2..a00ff1c2a5 100644
--- a/src/mesa/pipe/tgsi/util/tgsi_build.c
+++ b/src/mesa/pipe/tgsi/util/tgsi_build.c
@@ -1,3 +1,4 @@
+#include "pipe/p_debug.h"
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_build.h"
diff --git a/src/mesa/pipe/tgsi/util/tgsi_dump.c b/src/mesa/pipe/tgsi/util/tgsi_dump.c
index cdbc0dbc9c..b5c54847e0 100644
--- a/src/mesa/pipe/tgsi/util/tgsi_dump.c
+++ b/src/mesa/pipe/tgsi/util/tgsi_dump.c
@@ -25,6 +25,9 @@
*
**************************************************************************/
+#include <stdio.h>
+
+#include "pipe/p_debug.h"
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_dump.h"
diff --git a/src/mesa/pipe/tgsi/util/tgsi_parse.c b/src/mesa/pipe/tgsi/util/tgsi_parse.c
index f0f8d44ac2..bf6b89ce56 100644
--- a/src/mesa/pipe/tgsi/util/tgsi_parse.c
+++ b/src/mesa/pipe/tgsi/util/tgsi_parse.c
@@ -25,6 +25,7 @@
*
**************************************************************************/
+#include "pipe/p_debug.h"
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_parse.h"
diff --git a/src/mesa/pipe/tgsi/util/tgsi_util.c b/src/mesa/pipe/tgsi/util/tgsi_util.c
index 1e76b0f133..4cdd89182a 100644
--- a/src/mesa/pipe/tgsi/util/tgsi_util.c
+++ b/src/mesa/pipe/tgsi/util/tgsi_util.c
@@ -1,3 +1,4 @@
+#include "pipe/p_debug.h"
#include "pipe/p_util.h"
#include "pipe/p_shader_tokens.h"
#include "tgsi_parse.h"
diff --git a/src/mesa/pipe/util/p_debug.c b/src/mesa/pipe/util/p_debug.c
new file mode 100644
index 0000000000..b9607a6ba7
--- /dev/null
+++ b/src/mesa/pipe/util/p_debug.c
@@ -0,0 +1,76 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include <stdarg.h>
+
+#ifdef WIN32
+#include <windows.h>
+#include <winddi.h>
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#endif
+
+#include "pipe/p_debug.h"
+#include "pipe/p_compiler.h"
+
+
+void debug_vprintf(const char *format, va_list ap)
+{
+#ifdef WIN32
+ EngDebugPrint("Gallium3D: ", (PCHAR)format, ap);
+#else
+ vfprintf(stderr, format, ap);
+#endif
+}
+
+
+void debug_printf(const char *format, ...)
+{
+ va_list ap;
+ va_start(ap, format);
+ debug_vprintf(format, ap);
+ va_end(ap);
+}
+
+
+static INLINE void debug_abort(void)
+{
+#ifdef WIN32
+ EngDebugBreak();
+#else
+ abort();
+#endif
+}
+
+
+void debug_assert_fail(const char *expr, const char *file, unsigned line)
+{
+ debug_printf("%s:%i: Assertion `%s' failed.\n", file, line, expr);
+ debug_abort();
+}
diff --git a/src/mesa/sources b/src/mesa/sources
index 97ef7e1936..84492c91ac 100644
--- a/src/mesa/sources
+++ b/src/mesa/sources
@@ -175,6 +175,9 @@ DRAW_SOURCES = \
pipe/draw/draw_vertex_fetch.c \
pipe/draw/draw_vertex_shader.c \
pipe/draw/draw_vertex_shader_llvm.c \
+ pipe/draw/draw_vf.c \
+ pipe/draw/draw_vf_generic.c \
+ pipe/draw/draw_vf_sse.c \
pipe/draw/draw_wide_prims.c
TGSIEXEC_SOURCES = \
@@ -192,6 +195,7 @@ STATECACHE_SOURCES = \
pipe/cso_cache/cso_cache.c
PIPEUTIL_SOURCES = \
+ pipe/util/p_debug.c \
pipe/util/p_tile.c \
pipe/util/p_util.c
@@ -230,6 +234,7 @@ STATETRACKER_SOURCES = \
state_tracker/st_extensions.c \
state_tracker/st_format.c \
state_tracker/st_framebuffer.c \
+ state_tracker/st_gen_mipmap.c \
state_tracker/st_mesa_to_tgsi.c \
state_tracker/st_program.c \
state_tracker/st_texture.c
diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c
index 1ed9333556..9196918509 100644
--- a/src/mesa/state_tracker/st_atom_shader.c
+++ b/src/mesa/state_tracker/st_atom_shader.c
@@ -226,9 +226,11 @@ find_translated_vp(struct st_context *st,
GLint fpInAttrib = vp_out_to_fp_in(outAttr);
if (fpInAttrib >= 0) {
GLuint fpInSlot = stfp->input_to_slot[fpInAttrib];
- GLuint vpOutSlot = stfp->fs->state.input_map[fpInSlot];
- xvp->output_to_slot[outAttr] = vpOutSlot;
- numVpOuts++;
+ if (fpInSlot != ~0) {
+ GLuint vpOutSlot = stfp->fs->state.input_map[fpInSlot];
+ xvp->output_to_slot[outAttr] = vpOutSlot;
+ numVpOuts++;
+ }
}
else if (outAttr == VERT_RESULT_PSIZ ||
outAttr == VERT_RESULT_BFC0 ||
@@ -247,7 +249,7 @@ find_translated_vp(struct st_context *st,
* We could use this info to do dead code elimination in the
* vertex program.
*/
- dummySlot = stfp->num_input_slots;
+ dummySlot = numVpOuts;
/* Map vert program outputs that aren't used to the dummy slot */
for (outAttr = 0; outAttr < VERT_RESULT_MAX; outAttr++) {
diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c
index fb21d29c40..2a836d630b 100644
--- a/src/mesa/state_tracker/st_atom_texture.c
+++ b/src/mesa/state_tracker/st_atom_texture.c
@@ -36,7 +36,6 @@
#include "st_atom.h"
#include "st_cb_texture.h"
#include "pipe/p_context.h"
-#include "pipe/p_defines.h"
/**
@@ -46,24 +45,21 @@
static void
update_textures(struct st_context *st)
{
- GLuint s;
-
/* ST_NEW_FRAGMENT_PROGRAM
*/
struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current;
+ GLuint unit;
- for (s = 0; s < st->ctx->Const.MaxTextureCoordUnits; s++) {
- GLuint su = fprog->Base.SamplerUnits[s];
-
- struct gl_texture_object *texObj
- = st->ctx->Texture.Unit[su]._Current;
-
+ for (unit = 0; unit < st->ctx->Const.MaxTextureCoordUnits; unit++) {
+ const GLuint su = fprog->Base.SamplerUnits[unit];
+ struct gl_texture_object *texObj = st->ctx->Texture.Unit[su]._Current;
struct pipe_texture *pt;
if (texObj) {
GLboolean flush, retval;
retval = st_finalize_texture(st->ctx, st->pipe, texObj, &flush);
+ /* XXX retval indicates whether there's a texture border */
pt = st_get_texobj_texture(texObj);
}
@@ -75,9 +71,9 @@ update_textures(struct st_context *st)
* this table before being deleted, otherwise the pointer
* comparison below could fail.
*/
- if (st->state.sampler_texture[s] != pt) {
- st->state.sampler_texture[s] = pt;
- st->pipe->set_sampler_texture(st->pipe, s, pt);
+ if (st->state.sampler_texture[unit] != pt) {
+ st->state.sampler_texture[unit] = pt;
+ st->pipe->set_sampler_texture(st->pipe, unit, pt);
}
}
}
diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c
index 758d4a4086..ab98b54bab 100644
--- a/src/mesa/state_tracker/st_cb_clear.c
+++ b/src/mesa/state_tracker/st_cb_clear.c
@@ -251,7 +251,7 @@ draw_quad(GLcontext *ctx,
verts[i][1][3] = color[3];
}
- st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 2);
+ st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 2, GL_FALSE);
}
@@ -408,7 +408,9 @@ check_clear_depth_with_quad(GLcontext *ctx, struct gl_renderbuffer *rb)
const struct st_renderbuffer *strb = st_renderbuffer(rb);
const GLboolean isDS = is_depth_stencil_format(strb->surface->format);
return ctx->Scissor.Enabled
- || (isDS && ctx->DrawBuffer->Visual.stencilBits > 0);
+ || (isDS &&
+ strb->surface->status == PIPE_SURFACE_STATUS_DEFINED &&
+ ctx->DrawBuffer->Visual.stencilBits > 0);
}
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 6b44cba2e4..07886e7982 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -355,8 +355,8 @@ make_fragment_shader_z(struct st_context *st)
* Create a simple vertex shader that just passes through the
* vertex position and texcoord (and optionally, color).
*/
-static struct st_vertex_program *
-make_vertex_shader(struct st_context *st, GLboolean passColor)
+struct st_vertex_program *
+st_make_passthrough_vertex_shader(struct st_context *st, GLboolean passColor)
{
/* only make programs once and re-use */
static struct st_vertex_program *progs[2] = { NULL, NULL };
@@ -572,7 +572,7 @@ draw_quad(GLcontext *ctx, GLfloat x0, GLfloat y0, GLfloat z,
verts[i][1][3] = 1.0; /*Q*/
}
- st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 2);
+ st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 2, GL_FALSE);
}
@@ -581,10 +581,13 @@ draw_quad_colored(GLcontext *ctx, GLfloat x0, GLfloat y0, GLfloat z,
GLfloat x1, GLfloat y1, const GLfloat *color,
GLboolean invertTex)
{
+ GLfloat bias = ctx->st->bitmap_texcoord_bias;
GLfloat verts[4][3][4]; /* four verts, three attribs, XYZW */
GLuint i;
- GLfloat sLeft = 0.0, sRight = 1.0;
- GLfloat tTop = invertTex, tBot = 1.0 - tTop;
+ GLfloat xBias = bias / (x1-x0);
+ GLfloat yBias = bias / (y1-y0);
+ GLfloat sLeft = 0.0 + xBias, sRight = 1.0 + xBias;
+ GLfloat tTop = invertTex - yBias, tBot = 1.0 - tTop - yBias;
/* upper-left */
verts[0][0][0] = x0; /* attr[0].x */
@@ -622,7 +625,7 @@ draw_quad_colored(GLcontext *ctx, GLfloat x0, GLfloat y0, GLfloat z,
verts[i][2][3] = 1.0; /*Q*/
}
- st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 3);
+ st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 3, GL_FALSE);
}
@@ -942,7 +945,7 @@ st_DrawPixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
if (format == GL_DEPTH_COMPONENT) {
ps = st->state.framebuffer.zsbuf;
stfp = make_fragment_shader_z(ctx->st);
- stvp = make_vertex_shader(ctx->st, GL_TRUE);
+ stvp = st_make_passthrough_vertex_shader(ctx->st, GL_TRUE);
color = ctx->Current.RasterColor;
}
else if (format == GL_STENCIL_INDEX) {
@@ -953,7 +956,7 @@ st_DrawPixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
else {
ps = st->state.framebuffer.cbufs[0];
stfp = combined_drawpix_fragment_program(ctx);
- stvp = make_vertex_shader(ctx->st, GL_FALSE);
+ stvp = st_make_passthrough_vertex_shader(ctx->st, GL_FALSE);
color = NULL;
}
@@ -1108,7 +1111,7 @@ st_Bitmap(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
struct st_context *st = ctx->st;
struct pipe_texture *pt;
- stvp = make_vertex_shader(ctx->st, GL_TRUE);
+ stvp = st_make_passthrough_vertex_shader(ctx->st, GL_TRUE);
stfp = combined_bitmap_fragment_program(ctx);
st_validate_state(st);
@@ -1226,13 +1229,13 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
rbRead = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer);
color = NULL;
stfp = combined_drawpix_fragment_program(ctx);
- stvp = make_vertex_shader(ctx->st, GL_FALSE);
+ stvp = st_make_passthrough_vertex_shader(ctx->st, GL_FALSE);
}
else {
rbRead = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer);
color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
stfp = make_fragment_shader_z(ctx->st);
- stvp = make_vertex_shader(ctx->st, GL_TRUE);
+ stvp = st_make_passthrough_vertex_shader(ctx->st, GL_TRUE);
}
psRead = rbRead->surface;
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.h b/src/mesa/state_tracker/st_cb_drawpixels.h
index 71ba487020..b8b906f06b 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.h
+++ b/src/mesa/state_tracker/st_cb_drawpixels.h
@@ -30,6 +30,10 @@
#define ST_CB_DRAWPIXELS_H
+extern struct st_vertex_program *
+st_make_passthrough_vertex_shader(struct st_context *st, GLboolean passColor);
+
+
extern void st_init_drawpixels_functions(struct dd_function_table *functions);
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index ba0950e295..3350254654 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -30,6 +30,7 @@
#include "main/enums.h"
#include "main/image.h"
#include "main/macros.h"
+#include "main/mipmap.h"
#include "main/texcompress.h"
#include "main/texformat.h"
#include "main/teximage.h"
@@ -41,6 +42,7 @@
#include "state_tracker/st_cb_texture.h"
#include "state_tracker/st_format.h"
#include "state_tracker/st_texture.h"
+#include "state_tracker/st_gen_mipmap.h"
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
@@ -76,13 +78,13 @@ struct st_texture_object
-
static INLINE struct st_texture_object *
st_texture_object(struct gl_texture_object *obj)
{
return (struct st_texture_object *) obj;
}
+
static INLINE struct st_texture_image *
st_texture_image(struct gl_texture_image *img)
{
@@ -122,32 +124,28 @@ gl_target_to_pipe(GLenum target)
}
+/**
+ * Return nominal bytes per texel for a compressed format, 0 for non-compressed
+ * format.
+ */
static int
compressed_num_bytes(GLuint mesaFormat)
{
- int bytes = 0;
switch(mesaFormat) {
-
case MESA_FORMAT_RGB_FXT1:
case MESA_FORMAT_RGBA_FXT1:
case MESA_FORMAT_RGB_DXT1:
case MESA_FORMAT_RGBA_DXT1:
- bytes = 2;
- break;
-
+ return 2;
case MESA_FORMAT_RGBA_DXT3:
case MESA_FORMAT_RGBA_DXT5:
- bytes = 4;
+ return 4;
default:
- break;
+ return 0;
}
-
- return bytes;
}
-
-
static GLboolean
st_IsTextureResident(GLcontext * ctx, struct gl_texture_object *texObj)
{
@@ -164,7 +162,6 @@ st_IsTextureResident(GLcontext * ctx, struct gl_texture_object *texObj)
}
-
static struct gl_texture_image *
st_NewTextureImage(GLcontext * ctx)
{
@@ -216,8 +213,6 @@ st_FreeTextureImageData(GLcontext * ctx, struct gl_texture_image *texImage)
}
-
-
/* ================================================================
* From linux kernel i386 header files, copes with odd sizes better
* than COPY_DWORDS would:
@@ -290,7 +285,12 @@ logbase2(int n)
}
-/* Otherwise, store it in memory if (Border != 0) or (any dimension ==
+/**
+ * Allocate a pipe_texture object for the given st_texture_object using
+ * the given st_texture_image to guess the mipmap size/levels.
+ *
+ * [comments...]
+ * Otherwise, store it in memory if (Border != 0) or (any dimension ==
* 1).
*
* Otherwise, if max_level >= level >= min_level, create texture with
@@ -302,18 +302,19 @@ logbase2(int n)
static void
guess_and_alloc_texture(struct st_context *st,
struct st_texture_object *stObj,
- struct st_texture_image *stImage)
+ const struct st_texture_image *stImage)
{
GLuint firstLevel;
GLuint lastLevel;
GLuint width = stImage->base.Width;
GLuint height = stImage->base.Height;
GLuint depth = stImage->base.Depth;
- GLuint l2width, l2height, l2depth;
GLuint i, comp_byte = 0;
DBG("%s\n", __FUNCTION__);
+ assert(!stObj->pt);
+
if (stImage->base.Border)
return;
@@ -355,15 +356,15 @@ guess_and_alloc_texture(struct st_context *st,
lastLevel = firstLevel;
}
else {
- l2width = logbase2(width);
- l2height = logbase2(height);
- l2depth = logbase2(depth);
+ GLuint l2width = logbase2(width);
+ GLuint l2height = logbase2(height);
+ GLuint l2depth = logbase2(depth);
lastLevel = firstLevel + MAX2(MAX2(l2width, l2height), l2depth);
}
- assert(!stObj->pt);
if (stImage->base.IsCompressed)
comp_byte = compressed_num_bytes(stImage->base.TexFormat->MesaFormat);
+
stObj->pt = st_texture_create(st,
gl_target_to_pipe(stObj->base.Target),
st_mesa_format_to_pipe_format(stImage->base.TexFormat->MesaFormat),
@@ -487,21 +488,18 @@ try_pbo_upload(GLcontext *ctx,
-
-
-
-
static void
st_TexImage(GLcontext * ctx,
- GLint dims,
- GLenum target, GLint level,
- GLint internalFormat,
- GLint width, GLint height, GLint depth,
- GLint border,
- GLenum format, GLenum type, const void *pixels,
- const struct gl_pixelstore_attrib *unpack,
- struct gl_texture_object *texObj,
- struct gl_texture_image *texImage, GLsizei imageSize, int compressed)
+ GLint dims,
+ GLenum target, GLint level,
+ GLint internalFormat,
+ GLint width, GLint height, GLint depth,
+ GLint border,
+ GLenum format, GLenum type, const void *pixels,
+ const struct gl_pixelstore_attrib *unpack,
+ struct gl_texture_object *texObj,
+ struct gl_texture_image *texImage,
+ GLsizei imageSize, int compressed)
{
struct st_texture_object *stObj = st_texture_object(texObj);
struct st_texture_image *stImage = st_texture_image(texImage);
@@ -524,7 +522,7 @@ st_TexImage(GLcontext * ctx,
/* choose the texture format */
texImage->TexFormat = st_ChooseTextureFormat(ctx, internalFormat,
- format, type);
+ format, type);
_mesa_set_fetch_functions(texImage, dims);
@@ -536,7 +534,8 @@ st_TexImage(GLcontext * ctx,
ctx->Driver.CompressedTextureSize(ctx, texImage->Width,
texImage->Height, texImage->Depth,
texImage->TexFormat->MesaFormat);
- } else {
+ }
+ else {
texelBytes = texImage->TexFormat->TexelBytes;
/* Minimum pitch of 32 bytes */
@@ -669,7 +668,7 @@ st_TexImage(GLcontext * ctx,
* conversion and copy:
*/
if (compressed) {
- memcpy(texImage->Data, pixels, imageSize);
+ memcpy(texImage->Data, pixels, imageSize);
}
else {
GLuint srcImageStride = _mesa_image_image_stride(unpack, width, height,
@@ -705,13 +704,9 @@ st_TexImage(GLcontext * ctx,
texImage->Data = NULL;
}
-#if 0
- /* GL_SGIS_generate_mipmap -- this can be accelerated now.
- */
+#if 01
if (level == texObj->BaseLevel && texObj->GenerateMipmap) {
- intel_generate_mipmap(ctx, target,
- &ctx->Texture.Unit[ctx->Texture.CurrentUnit],
- texObj);
+ ctx->Driver.GenerateMipmap(ctx, target, texObj);
}
#endif
}
@@ -1401,7 +1396,10 @@ copy_image_data_to_texture(struct st_context *st,
}
-/*
+/**
+ * Called during state validation. When this function is finished,
+ * the texture object should be ready for rendering.
+ * \return GL_FALSE if a texture border is present, GL_TRUE otherwise
*/
GLboolean
st_finalize_texture(GLcontext *ctx,
@@ -1410,11 +1408,10 @@ st_finalize_texture(GLcontext *ctx,
GLboolean *needFlush)
{
struct st_texture_object *stObj = st_texture_object(tObj);
+ const GLuint nr_faces = (stObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
int comp_byte = 0;
int cpp;
-
GLuint face, i;
- GLuint nr_faces = 0;
struct st_texture_image *firstImage;
*needFlush = GL_FALSE;
@@ -1426,8 +1423,7 @@ st_finalize_texture(GLcontext *ctx,
/* What levels must the texture include at a minimum?
*/
calculate_first_last_level(stObj);
- firstImage =
- st_texture_image(stObj->base.Image[0][stObj->firstLevel]);
+ firstImage = st_texture_image(stObj->base.Image[0][stObj->firstLevel]);
/* Fallback case:
*/
@@ -1503,7 +1499,6 @@ st_finalize_texture(GLcontext *ctx,
/* Pull in any images not in the object's texture:
*/
- nr_faces = (stObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1;
for (face = 0; face < nr_faces; face++) {
for (i = stObj->firstLevel; i <= stObj->lastLevel; i++) {
struct st_texture_image *stImage =
@@ -1540,6 +1535,7 @@ st_init_texture_functions(struct dd_function_table *functions)
functions->CopyTexSubImage1D = st_CopyTexSubImage1D;
functions->CopyTexSubImage2D = st_CopyTexSubImage2D;
functions->CopyTexSubImage3D = st_CopyTexSubImage3D;
+ functions->GenerateMipmap = st_generate_mipmap;
functions->GetTexImage = st_GetTexImage;
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 9c206c057a..bf4618bed8 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -49,6 +49,7 @@
#include "st_atom.h"
#include "st_draw.h"
#include "st_extensions.h"
+#include "st_gen_mipmap.h"
#include "st_program.h"
#include "pipe/p_context.h"
#include "pipe/p_winsys.h"
@@ -96,6 +97,7 @@ st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe )
st_init_atoms( st );
st_init_draw( st );
+ st_init_generate_mipmap(st);
/* we want all vertex data to be placed in buffer objects */
vbo_use_buffer_objects(ctx);
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index 2b6f8743f3..a756055898 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -143,6 +143,8 @@ struct st_context
GLfloat polygon_offset_scale; /* ?? */
+ GLfloat bitmap_texcoord_bias;
+
/** Mapping from VERT_RESULT_x to post-transformed vertex slot */
const GLuint *vertex_result_to_slot;
diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c
index 8ef50ee768..ae9f5c8b11 100644
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -298,6 +298,7 @@ st_draw_vbo(GLcontext *ctx,
break;
default:
assert(0);
+ return;
}
/* get/create the index buffer object */
@@ -353,7 +354,8 @@ st_draw_vbo(GLcontext *ctx,
void
st_draw_vertices(GLcontext *ctx, unsigned prim,
unsigned numVertex, float *verts,
- unsigned numAttribs)
+ unsigned numAttribs,
+ GLboolean inClipCoords)
{
const float width = ctx->DrawBuffer->Width;
const float height = ctx->DrawBuffer->Height;
@@ -366,14 +368,16 @@ st_draw_vertices(GLcontext *ctx, unsigned prim,
assert(numAttribs > 0);
- /* convert to clip coords */
- for (i = 0; i < numVertex; i++) {
- float x = verts[i * numAttribs * 4 + 0];
- float y = verts[i * numAttribs * 4 + 1];
- x = x / width * 2.0 - 1.0;
- y = y / height * 2.0 - 1.0;
- verts[i * numAttribs * 4 + 0] = x;
- verts[i * numAttribs * 4 + 1] = y;
+ if (!inClipCoords) {
+ /* convert to clip coords */
+ for (i = 0; i < numVertex; i++) {
+ float x = verts[i * numAttribs * 4 + 0];
+ float y = verts[i * numAttribs * 4 + 1];
+ x = x / width * 2.0 - 1.0;
+ y = y / height * 2.0 - 1.0;
+ verts[i * numAttribs * 4 + 0] = x;
+ verts[i * numAttribs * 4 + 1] = y;
+ }
}
/* XXX create one-time */
@@ -570,6 +574,7 @@ st_feedback_draw_vbo(GLcontext *ctx,
break;
default:
assert(0);
+ return;
}
map = pipe->winsys->buffer_map(pipe->winsys,
diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h
index 89ee790c57..171bde57e5 100644
--- a/src/mesa/state_tracker/st_draw.h
+++ b/src/mesa/state_tracker/st_draw.h
@@ -62,7 +62,8 @@ st_feedback_draw_vbo(GLcontext *ctx,
void
st_draw_vertices(GLcontext *ctx, unsigned prim,
unsigned numVertex, float *verts,
- unsigned numAttribs);
+ unsigned numAttribs,
+ GLboolean inClipCoords);
#endif
diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
index 0157bdd6b3..97d28d77c4 100644
--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -106,6 +106,9 @@ void st_init_limits(struct st_context *st)
c->MaxTextureLodBias
= pipe->get_paramf(pipe, PIPE_CAP_MAX_TEXTURE_LOD_BIAS);
+
+ st->bitmap_texcoord_bias
+ = pipe->get_paramf(pipe, PIPE_CAP_BITMAP_TEXCOORD_BIAS);
}
diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c
new file mode 100644
index 0000000000..a6ac9a55fb
--- /dev/null
+++ b/src/mesa/state_tracker/st_gen_mipmap.c
@@ -0,0 +1,363 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "main/imports.h"
+#include "main/mipmap.h"
+#include "main/teximage.h"
+
+#include "shader/prog_instruction.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/cso_cache/cso_cache.h"
+
+#include "st_context.h"
+#include "st_draw.h"
+#include "st_gen_mipmap.h"
+#include "st_program.h"
+#include "st_cb_drawpixels.h"
+#include "st_cb_texture.h"
+
+
+
+static void *blend_cso = NULL;
+static void *depthstencil_cso = NULL;
+static void *rasterizer_cso = NULL;
+static void *sampler_cso = NULL;
+
+static struct st_fragment_program *stfp = NULL;
+static struct st_vertex_program *stvp = NULL;
+
+
+
+static struct st_fragment_program *
+make_tex_fragment_program(GLcontext *ctx)
+{
+ struct st_fragment_program *stfp;
+ struct gl_program *p;
+ GLuint ic = 0;
+
+ p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
+ if (!p)
+ return NULL;
+
+ p->NumInstructions = 2;
+
+ p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
+ if (!p->Instructions) {
+ ctx->Driver.DeleteProgram(ctx, p);
+ return NULL;
+ }
+ _mesa_init_instructions(p->Instructions, p->NumInstructions);
+
+ /* TEX result.color, fragment.texcoord[0], texture[0], 2D; */
+ p->Instructions[ic].Opcode = OPCODE_TEX;
+ p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
+ p->Instructions[ic].DstReg.Index = FRAG_RESULT_COLR;
+ p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
+ p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
+ p->Instructions[ic].TexSrcUnit = 0;
+ p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
+ ic++;
+
+ /* END; */
+ p->Instructions[ic++].Opcode = OPCODE_END;
+
+ assert(ic == p->NumInstructions);
+
+ p->InputsRead = FRAG_BIT_TEX0;
+ p->OutputsWritten = (1 << FRAG_RESULT_COLR);
+
+ stfp = (struct st_fragment_program *) p;
+
+ st_translate_fragment_program(ctx->st, stfp, NULL,
+ stfp->tokens, ST_MAX_SHADER_TOKENS);
+
+ return stfp;
+}
+
+
+
+
+/**
+ * one-time init for generate mipmap
+ * XXX Note: there may be other times we need no-op/simple state like this.
+ * In that case, some code refactoring would be good.
+ */
+void
+st_init_generate_mipmap(struct st_context *st)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_blend_state blend;
+ struct pipe_rasterizer_state rasterizer;
+ struct pipe_sampler_state sampler;
+ struct pipe_depth_stencil_alpha_state depthstencil;
+
+ assert(!blend_cso);
+
+ memset(&blend, 0, sizeof(blend));
+ blend.colormask = PIPE_MASK_RGBA;
+ blend_cso = pipe->create_blend_state(pipe, &blend);
+
+ memset(&depthstencil, 0, sizeof(depthstencil));
+ depthstencil_cso = pipe->create_depth_stencil_alpha_state(pipe, &depthstencil);
+
+ memset(&rasterizer, 0, sizeof(rasterizer));
+ rasterizer_cso = pipe->create_rasterizer_state(pipe, &rasterizer);
+
+ memset(&sampler, 0, sizeof(sampler));
+ sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
+ sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR;
+ sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
+ sampler.normalized_coords = 1;
+ sampler_cso = pipe->create_sampler_state(pipe, &sampler);
+
+ stfp = make_tex_fragment_program(st->ctx);
+ stvp = st_make_passthrough_vertex_shader(st, GL_FALSE);
+}
+
+
+void
+st_destroy_generate_mipmpap(struct st_context *st)
+{
+ struct pipe_context *pipe = st->pipe;
+
+ pipe->delete_blend_state(pipe, blend_cso);
+ pipe->delete_depth_stencil_alpha_state(pipe, depthstencil_cso);
+ pipe->delete_rasterizer_state(pipe, rasterizer_cso);
+ pipe->delete_sampler_state(pipe, sampler_cso);
+
+ /* XXX free stfp, stvp */
+
+ blend_cso = NULL;
+ depthstencil_cso = NULL;
+ rasterizer_cso = NULL;
+ sampler_cso = NULL;
+}
+
+
+static void
+simple_viewport(struct pipe_context *pipe, uint width, uint height)
+{
+ struct pipe_viewport_state vp;
+
+ vp.scale[0] = 0.5 * width;
+ vp.scale[1] = -0.5 * height;
+ vp.scale[2] = 1.0;
+ vp.scale[3] = 1.0;
+ vp.translate[0] = 0.5 * width;
+ vp.translate[1] = 0.5 * height;
+ vp.translate[2] = 0.0;
+ vp.translate[3] = 0.0;
+
+ pipe->set_viewport_state(pipe, &vp);
+}
+
+
+
+/*
+ * Draw simple [-1,1]x[-1,1] quad
+ */
+static void
+draw_quad(GLcontext *ctx)
+{
+ GLfloat verts[4][2][4]; /* four verts, two attribs, XYZW */
+ GLuint i;
+ GLfloat sLeft = 0.0, sRight = 1.0;
+ GLfloat tTop = 1.0, tBot = 0.0;
+ GLfloat x0 = -1.0, x1 = 1.0;
+ GLfloat y0 = -1.0, y1 = 1.0;
+
+ /* upper-left */
+ verts[0][0][0] = x0; /* attr[0].x */
+ verts[0][0][1] = y0; /* attr[0].y */
+ verts[0][1][0] = sLeft; /* attr[1].s */
+ verts[0][1][1] = tTop; /* attr[1].t */
+
+ /* upper-right */
+ verts[1][0][0] = x1;
+ verts[1][0][1] = y0;
+ verts[1][1][0] = sRight;
+ verts[1][1][1] = tTop;
+
+ /* lower-right */
+ verts[2][0][0] = x1;
+ verts[2][0][1] = y1;
+ verts[2][1][0] = sRight;
+ verts[2][1][1] = tBot;
+
+ /* lower-left */
+ verts[3][0][0] = x0;
+ verts[3][0][1] = y1;
+ verts[3][1][0] = sLeft;
+ verts[3][1][1] = tBot;
+
+ /* same for all verts: */
+ for (i = 0; i < 4; i++) {
+ verts[i][0][2] = 0.0; /*Z*/
+ verts[i][0][3] = 1.0; /*W*/
+ verts[i][1][2] = 0.0; /*R*/
+ verts[i][1][3] = 1.0; /*Q*/
+ }
+
+ st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 2, GL_TRUE);
+}
+
+
+
+/**
+ * Generate mipmap levels using hardware rendering.
+ * \return TRUE if successful, FALSE if not possible
+ */
+static boolean
+st_render_mipmap(struct st_context *st,
+ struct pipe_texture *pt,
+ uint baseLevel, uint lastLevel)
+{
+ struct pipe_context *pipe = st->pipe;
+ struct pipe_framebuffer_state fb;
+ const uint face = 0, zslice = 0;
+ const uint first_level_save = pt->first_level;
+ uint dstLevel;
+
+ /* check if we can render in the texture's format */
+ if (!pipe->is_format_supported(pipe, pt->format, PIPE_SURFACE)) {
+ return FALSE;
+ }
+
+ /* init framebuffer state */
+ memset(&fb, 0, sizeof(fb));
+ fb.num_cbufs = 1;
+
+ /* bind CSOs */
+ pipe->bind_blend_state(pipe, blend_cso);
+ pipe->bind_depth_stencil_alpha_state(pipe, depthstencil_cso);
+ pipe->bind_rasterizer_state(pipe, rasterizer_cso);
+ pipe->bind_sampler_state(pipe, 0, sampler_cso);
+
+ /* bind shaders */
+ pipe->bind_fs_state(pipe, stfp->fs->data);
+ pipe->bind_vs_state(pipe, stvp->cso->data);
+
+ for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
+ const uint srcLevel = dstLevel - 1;
+
+ /*
+ * Setup framebuffer / dest surface
+ */
+ fb.cbufs[0] = pipe->get_tex_surface(pipe, pt, face, dstLevel, zslice);
+ pipe->set_framebuffer_state(pipe, &fb);
+
+ simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]);
+
+ /*
+ * Setup src texture, override pt->first_level so we sample from
+ * the right mipmap level.
+ */
+ pt->first_level = srcLevel;
+ pipe->set_sampler_texture(pipe, 0, pt);
+
+ draw_quad(st->ctx);
+ }
+
+ /* restore first_level */
+ pt->first_level = first_level_save;
+
+ /* restore pipe state */
+ if (st->state.rasterizer)
+ pipe->bind_rasterizer_state(pipe, st->state.rasterizer->data);
+ if (st->state.fs)
+ pipe->bind_fs_state(pipe, st->state.fs->data);
+ if (st->state.vs)
+ pipe->bind_vs_state(pipe, st->state.vs->cso->data);
+ if (st->state.sampler[0])
+ pipe->bind_sampler_state(pipe, 0, st->state.sampler[0]->data);
+ pipe->set_sampler_texture(pipe, 0, st->state.sampler_texture[0]);
+ pipe->set_viewport_state(pipe, &st->state.viewport);
+
+ return TRUE;
+}
+
+
+
+void
+st_generate_mipmap(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *texObj)
+{
+ struct st_context *st = ctx->st;
+ struct pipe_texture *pt = st_get_texobj_texture(texObj);
+ const uint baseLevel = texObj->BaseLevel;
+ const uint lastLevel = pt->last_level;
+ uint dstLevel;
+
+ if (!st_render_mipmap(st, pt, baseLevel, lastLevel)) {
+ abort();
+ /* XXX the following won't really work at this time */
+ _mesa_generate_mipmap(ctx, target, texObj);
+ return;
+ }
+
+ for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
+ const uint srcLevel = dstLevel - 1;
+ const struct gl_texture_image *srcImage
+ = _mesa_get_tex_image(ctx, texObj, target, srcLevel);
+ struct gl_texture_image *dstImage;
+ struct st_texture_image *stImage;
+ uint dstWidth = pt->width[dstLevel];
+ uint dstHeight = pt->height[dstLevel];
+ uint dstDepth = pt->depth[dstLevel];
+ uint border = srcImage->Border;
+
+
+ dstImage = _mesa_get_tex_image(ctx, texObj, target, dstLevel);
+ if (!dstImage) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps");
+ return;
+ }
+
+ if (dstImage->ImageOffsets)
+ _mesa_free(dstImage->ImageOffsets);
+
+ /* Free old image data */
+ if (dstImage->Data)
+ ctx->Driver.FreeTexImageData(ctx, dstImage);
+
+ /* initialize new image */
+ _mesa_init_teximage_fields(ctx, target, dstImage, dstWidth, dstHeight,
+ dstDepth, border, srcImage->InternalFormat);
+
+ dstImage->TexFormat = srcImage->TexFormat;
+
+ stImage = (struct st_texture_image *) dstImage;
+ stImage->pt = pt;
+ }
+
+}
diff --git a/src/mesa/state_tracker/st_gen_mipmap.h b/src/mesa/state_tracker/st_gen_mipmap.h
new file mode 100644
index 0000000000..7668c1e44e
--- /dev/null
+++ b/src/mesa/state_tracker/st_gen_mipmap.h
@@ -0,0 +1,46 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#ifndef ST_GEN_MIPMAP_H
+#define ST_GEN_MIPMAP_H
+
+
+extern void
+st_init_generate_mipmap(struct st_context *st);
+
+
+extern void
+st_destroy_generate_mipmpap(struct st_context *st);
+
+
+extern void
+st_generate_mipmap(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *texObj);
+
+
+#endif /* ST_GEN_MIPMAP_H */
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 1f1e6500e0..84a9094001 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -33,6 +33,7 @@
#include "main/imports.h"
#include "main/mtypes.h"
+#include "shader/prog_print.h"
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
@@ -68,6 +69,7 @@ st_translate_vertex_program(struct st_context *st,
struct pipe_shader_state vs;
const struct cso_vertex_shader *cso;
GLuint attr, i;
+ GLuint num_generic = 0;
memset(&vs, 0, sizeof(vs));
@@ -117,7 +119,7 @@ st_translate_vertex_program(struct st_context *st,
case VERT_ATTRIB_TEX6:
case VERT_ATTRIB_TEX7:
vs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
- vs.input_semantic_index[slot] = attr - VERT_ATTRIB_TEX0;
+ vs.input_semantic_index[slot] = num_generic++;
break;
case VERT_ATTRIB_GENERIC0:
case VERT_ATTRIB_GENERIC1:
@@ -129,7 +131,7 @@ st_translate_vertex_program(struct st_context *st,
case VERT_ATTRIB_GENERIC7:
assert(attr < VERT_ATTRIB_MAX);
vs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
- vs.input_semantic_index[slot] = attr - VERT_ATTRIB_GENERIC0;
+ vs.input_semantic_index[slot] = num_generic++;
break;
default:
assert(0);
@@ -143,6 +145,7 @@ st_translate_vertex_program(struct st_context *st,
vs.output_semantic_index[i] = 0;
}
+ num_generic = 0;
/*
* Determine number of outputs, the (default) output register
* mapping and the semantic information for each output.
@@ -207,14 +210,14 @@ st_translate_vertex_program(struct st_context *st,
case VERT_RESULT_TEX6:
case VERT_RESULT_TEX7:
vs.output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
- vs.output_semantic_index[slot] = attr - VERT_RESULT_TEX0;
+ vs.output_semantic_index[slot] = num_generic++;
break;
case VERT_RESULT_VAR0:
/* fall-through */
default:
assert(attr - VERT_RESULT_VAR0 < MAX_VARYING);
vs.output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
- vs.output_semantic_index[slot] = attr - VERT_RESULT_VAR0;
+ vs.output_semantic_index[slot] = num_generic++;
}
}
}
@@ -258,6 +261,9 @@ st_translate_vertex_program(struct st_context *st,
cso = st_cached_vs_state(st, &vs);
stvp->cso = cso;
+ if (0)
+ _mesa_print_program(&stvp->Base.Base);
+
if (TGSI_DEBUG)
tgsi_dump( tokensOut, 0 );
}
@@ -286,6 +292,7 @@ st_translate_fragment_program(struct st_context *st,
GLuint attr;
const GLbitfield inputsRead = stfp->Base.Base.InputsRead;
GLuint vslot = 0;
+ GLuint num_generic = 0;
memset(&fs, 0, sizeof(fs));
@@ -338,14 +345,14 @@ st_translate_fragment_program(struct st_context *st,
case FRAG_ATTRIB_TEX6:
case FRAG_ATTRIB_TEX7:
fs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
- fs.input_semantic_index[slot] = attr - FRAG_ATTRIB_TEX0;
+ fs.input_semantic_index[slot] = num_generic++;
interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
break;
case FRAG_ATTRIB_VAR0:
/* fall-through */
default:
fs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC;
- fs.input_semantic_index[slot] = attr - FRAG_ATTRIB_VAR0;
+ fs.input_semantic_index[slot] = num_generic++;
interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE;
}
}
@@ -415,6 +422,9 @@ st_translate_fragment_program(struct st_context *st,
cso = st_cached_fs_state(st, &fs);
stfp->fs = cso;
+ if (0)
+ _mesa_print_program(&stfp->Base.Base);
+
if (TGSI_DEBUG)
tgsi_dump( tokensOut, 0/*TGSI_DUMP_VERBOSE*/ );
diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c
index 15cc458be8..844a9f80d8 100644
--- a/src/mesa/state_tracker/st_texture.c
+++ b/src/mesa/state_tracker/st_texture.c
@@ -59,6 +59,10 @@ target_to_target(GLenum target)
}
#endif
+
+/**
+ * Allocate a new pipe_texture object
+ */
struct pipe_texture *
st_texture_create(struct st_context *st,
enum pipe_texture_target target,
@@ -70,7 +74,7 @@ st_texture_create(struct st_context *st,
GLuint depth0,
GLuint compress_byte)
{
- struct pipe_texture *pt = CALLOC_STRUCT(pipe_texture);
+ struct pipe_texture pt;
assert(target <= PIPE_TEXTURE_CUBE);
@@ -78,39 +82,33 @@ st_texture_create(struct st_context *st,
_mesa_lookup_enum_by_nr(target),
_mesa_lookup_enum_by_nr(format), first_level, last_level);
- if (!pt)
- return NULL;
-
assert(format);
- pt->target = target;
- pt->format = format;
- pt->first_level = first_level;
- pt->last_level = last_level;
- pt->width[0] = width0;
- pt->height[0] = height0;
- pt->depth[0] = depth0;
- pt->compressed = compress_byte ? 1 : 0;
- pt->cpp = pt->compressed ? compress_byte : st_sizeof_format(format);
- pt->refcount = 1;
-
- st->pipe->texture_create(st->pipe, &pt);
-
- return pt;
+ pt.target = target;
+ pt.format = format;
+ pt.first_level = first_level;
+ pt.last_level = last_level;
+ pt.width[0] = width0;
+ pt.height[0] = height0;
+ pt.depth[0] = depth0;
+ pt.compressed = compress_byte ? 1 : 0;
+ pt.cpp = pt.compressed ? compress_byte : st_sizeof_format(format);
+ pt.refcount = 1;
+
+ return st->pipe->texture_create(st->pipe, &pt);
}
-
-
-/* Can the image be pulled into a unified mipmap texture. This mirrors
- * the completeness test in a lot of ways.
+/**
+ * Check if a texture image be pulled into a unified mipmap texture.
+ * This mirrors the completeness test in a lot of ways.
*
* Not sure whether I want to pass gl_texture_image here.
*/
GLboolean
-st_texture_match_image(struct pipe_texture *pt,
- struct gl_texture_image *image,
- GLuint face, GLuint level)
+st_texture_match_image(const struct pipe_texture *pt,
+ const struct gl_texture_image *image,
+ GLuint face, GLuint level)
{
/* Images with borders are never pulled into mipmap textures.
*/
@@ -189,6 +187,7 @@ st_texture_image_map(struct st_context *st, struct st_texture_image *stImage,
return pipe_surface_map(stImage->surface);
}
+
void
st_texture_image_unmap(struct st_texture_image *stImage)
{
@@ -201,7 +200,8 @@ st_texture_image_unmap(struct st_texture_image *stImage)
-/* Upload data to a rectangular sub-region. Lots of choices how to do this:
+/**
+ * Upload data to a rectangular sub-region. Lots of choices how to do this:
*
* - memcpy by span to current destination
* - upload data as new buffer and blit
@@ -261,13 +261,14 @@ st_texture_image_data(struct pipe_context *pipe,
}
}
+
/* Copy mipmap image between textures
*/
void
st_texture_image_copy(struct pipe_context *pipe,
- struct pipe_texture *dst,
- GLuint face, GLuint level,
- struct pipe_texture *src)
+ struct pipe_texture *dst,
+ GLuint face, GLuint level,
+ struct pipe_texture *src)
{
GLuint width = src->width[level];
GLuint height = src->height[level];
@@ -278,6 +279,7 @@ st_texture_image_copy(struct pipe_context *pipe,
if (dst->compressed)
height /= 4;
+
for (i = 0; i < depth; i++) {
dst_surface = pipe->get_tex_surface(pipe, dst, face, level, i);
src_surface = pipe->get_tex_surface(pipe, src, face, level, i);
@@ -292,5 +294,4 @@ st_texture_image_copy(struct pipe_context *pipe,
pipe_surface_reference(&dst_surface, NULL);
pipe_surface_reference(&src_surface, NULL);
}
-
}
diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h
index d8b1bcad9d..0b87a494c3 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -47,11 +47,11 @@ st_texture_create(struct st_context *st,
GLuint compress_byte);
-/* Check if an image fits an existing texture
+/* Check if an image fits into an existing texture object.
*/
extern GLboolean
-st_texture_match_image(struct pipe_texture *pt,
- struct gl_texture_image *image,
+st_texture_match_image(const struct pipe_texture *pt,
+ const struct gl_texture_image *image,
GLuint face, GLuint level);
/* Return a pointer to an image within a texture. Return image stride as
@@ -73,7 +73,7 @@ extern const GLuint *
st_texture_depth_offsets(struct pipe_texture *pt, GLuint level);
-/* Return the linear offset of an image relative to the start of its region:
+/* Return the linear offset of an image relative to the start of its region.
*/
extern GLuint
st_texture_image_offset(const struct pipe_texture *pt,
diff --git a/src/mesa/x86/rtasm/x86sse.c b/src/mesa/x86/rtasm/x86sse.c
index 56c211eee0..f8da6e405f 100644
--- a/src/mesa/x86/rtasm/x86sse.c
+++ b/src/mesa/x86/rtasm/x86sse.c
@@ -1137,6 +1137,7 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size )
void x86_release_func( struct x86_function *p )
{
_mesa_exec_free(p->store);
+ p->store = NULL;
}