diff options
146 files changed, 9812 insertions, 1582 deletions
diff --git a/.gitignore b/.gitignore index 033e6e10bd..b5e59dfc3e 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ depend depend.bak lib lib64 +.sconsign* +config.py +build diff --git a/SConstruct b/SConstruct new file mode 100644 index 0000000000..22a4072c93 --- /dev/null +++ b/SConstruct @@ -0,0 +1,228 @@ +####################################################################### +# Top-level SConstruct + +import os +import os.path +import sys + + +####################################################################### +# Configuration options +# +# For example, invoke scons as +# +# scons debug=1 dri=0 machine=x86 +# +# to set configuration variables. Or you can write those options to a file +# named config.py: +# +# # config.py +# debug=1 +# dri=0 +# machine='x86' +# +# Invoke +# +# scons -h +# +# to get the full list of options. See scons manpage for more info. +# + +# TODO: auto-detect defaults +opts = Options('config.py') +opts.Add(BoolOption('debug', 'build debug version', False)) +opts.Add(BoolOption('dri', 'build dri drivers', False)) +opts.Add(EnumOption('machine', 'use machine-specific assembly code', 'x86', + allowed_values=('generic', 'x86', 'x86-64'))) + +env = Environment( + options = opts, + ENV = os.environ) +Help(opts.GenerateHelpText(env)) + +# for debugging +#print env.Dump() + +if 0: + # platform will be typically 'posix' or 'win32' + platform = env['PLATFORM'] +else: + # platform will be one of 'linux', 'freebsd', 'win32', 'darwin', etc. + platform = sys.platform + if platform == 'linux2': + platform = 'linux' + +# replicate options values in local variables +debug = env['debug'] +dri = env['dri'] +machine = env['machine'] + +# derived options +x86 = machine == 'x86' +gcc = platform in ('posix', 'linux', 'freebsd', 'darwin') +msvc = platform == 'win32' + +Export([ + 'debug', + 'x86', + 'dri', + 'platform', + 'gcc', + 'msvc', +]) + + +####################################################################### +# Environment setup +# +# TODO: put the compiler specific settings in seperate files +# TODO: auto-detect as much as possible + + +# Optimization flags +if gcc: + if debug: + env.Append(CFLAGS = '-O0 -g3') + env.Append(CXXFLAGS = '-O0 -g3') + else: + env.Append(CFLAGS = '-O3 -g3') + env.Append(CXXFLAGS = '-O3 -g3') + + env.Append(CFLAGS = '-Wall -Wmissing-prototypes -std=c99 -ffast-math -pedantic') + env.Append(CXXFLAGS = '-Wall -pedantic') + + # Be nice to Eclipse + env.Append(CFLAGS = '-fmessage-length=0') + env.Append(CXXFLAGS = '-fmessage-length=0') + + +# Defines +if debug: + env.Append(CPPDEFINES = ['DEBUG']) +else: + env.Append(CPPDEFINES = ['NDEBUG']) + + +# Includes +env.Append(CPPPATH = [ + '#/include', + '#/src/mesa', + '#/src/mesa/main', + '#/src/mesa/pipe', +]) + + +# x86 assembly +if x86: + env.Append(CPPDEFINES = [ + 'USE_X86_ASM', + 'USE_MMX_ASM', + 'USE_3DNOW_ASM', + 'USE_SSE_ASM', + ]) + if gcc: + env.Append(CFLAGS = '-m32') + env.Append(CXXFLAGS = '-m32') + + +# Posix +if platform in ('posix', 'linux', 'freebsd', 'darwin'): + env.Append(CPPDEFINES = [ + '_POSIX_SOURCE', + ('_POSIX_C_SOURCE', '199309L'), + '_SVID_SOURCE', + '_BSD_SOURCE', + '_GNU_SOURCE', + + 'PTHREADS', + 'HAVE_POSIX_MEMALIGN', + ]) + env.Append(CPPPATH = ['/usr/X11R6/include']) + env.Append(LIBPATH = ['/usr/X11R6/lib']) + env.Append(LIBS = [ + 'm', + 'pthread', + 'expat', + 'dl', + ]) + + +# DRI +if dri: + env.ParseConfig('pkg-config --cflags --libs libdrm') + env.Append(CPPDEFINES = [ + ('USE_EXTERNAL_DXTN_LIB', '1'), + 'IN_DRI_DRIVER', + 'GLX_DIRECT_RENDERING', + 'GLX_INDIRECT_RENDERING', + ]) + +# libGL +if 1: + env.Append(LIBS = [ + 'X11', + 'Xext', + 'Xxf86vm', + 'Xdamage', + 'Xfixes', + ]) + +Export('env') + + +####################################################################### +# Convenience Library Builder +# based on the stock StaticLibrary and SharedLibrary builders + +def createConvenienceLibBuilder(env): + """This is a utility function that creates the ConvenienceLibrary + Builder in an Environment if it is not there already. + + If it is already there, we return the existing one. + """ + + try: + convenience_lib = env['BUILDERS']['ConvenienceLibrary'] + except KeyError: + action_list = [ Action("$ARCOM", "$ARCOMSTR") ] + if env.Detect('ranlib'): + ranlib_action = Action("$RANLIBCOM", "$RANLIBCOMSTR") + action_list.append(ranlib_action) + + convenience_lib = Builder(action = action_list, + emitter = '$LIBEMITTER', + prefix = '$LIBPREFIX', + suffix = '$LIBSUFFIX', + src_suffix = '$SHOBJSUFFIX', + src_builder = 'SharedObject') + env['BUILDERS']['ConvenienceLibrary'] = convenience_lib + env['BUILDERS']['Library'] = convenience_lib + + return convenience_lib + +createConvenienceLibBuilder(env) + + +####################################################################### +# Invoke SConscripts + +# Put build output in a separate dir, which depends on the current configuration +# See also http://www.scons.org/wiki/AdvancedBuildExample +build_topdir = 'build' +build_subdir = platform +if dri: + build_subdir += "-dri" +if x86: + build_subdir += "-x86" +if debug: + build_subdir += "-debug" +build_dir = os.path.join(build_topdir, build_subdir) + +# TODO: Build several variants at the same time? +# http://www.scons.org/wiki/SimultaneousVariantBuilds + +SConscript( + 'src/mesa/SConscript', + build_dir = build_dir, + duplicate = 0 # http://www.scons.org/doc/0.97/HTML/scons-user/x2261.html +) diff --git a/configs/linux-cell b/configs/linux-cell index 4f0086cc1f..3d874491e4 100644 --- a/configs/linux-cell +++ b/configs/linux-cell @@ -10,11 +10,13 @@ CC = ppu32-gcc CXX = ppu32-g++ HOST_CC = gcc +OPT_FLAGS = -g + # Cell SDK location SDK = /opt/ibm/cell-sdk/prototype/sysroot/usr -CFLAGS = -g -Wall -Winline -fPIC -m32 -mabi=altivec -maltivec -I. -I$(SDK)/include -DGALLIUM_CELL +CFLAGS = $(OPT_FLAGS) -Wall -Winline -fPIC -m32 -mabi=altivec -maltivec -I. -I$(SDK)/include -DGALLIUM_CELL CXXFLAGS = $(CFLAGS) @@ -34,7 +36,7 @@ GL_LIB_DEPS = $(EXTRA_LIB_PATH) -lX11 -lXext -lm -lpthread \ SPU_CC = spu-gcc -SPU_CFLAGS = -g -W -Wall -Winline -Wmissing-prototypes -Wno-main -I. -I $(SDK)/spu/include -include spu_intrinsics.h -I $(TOP)/src/mesa/ +SPU_CFLAGS = $(OPT_FLAGS) -W -Wall -Winline -Wmissing-prototypes -Wno-main -I. -I $(SDK)/spu/include -include spu_intrinsics.h -I $(TOP)/src/mesa/ SPU_LFLAGS = -L$(SDK)/spu/lib -Wl,-N -lmisc diff --git a/progs/demos/gears.c b/progs/demos/gears.c index ab9bc00742..2a9fefefb5 100644 --- a/progs/demos/gears.c +++ b/progs/demos/gears.c @@ -27,6 +27,9 @@ static GLint T0 = 0; static GLint Frames = 0; static GLint autoexit = 0; static GLint win = 0; +static GLboolean Visible = GL_TRUE; +static GLboolean Animate = GL_TRUE; +static GLfloat viewDist = 40.0; /** @@ -179,6 +182,9 @@ draw(void) glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glPushMatrix(); + + glTranslatef(0.0, 0.0, -viewDist); + glRotatef(view_rotx, 1.0, 0.0, 0.0); glRotatef(view_roty, 0.0, 1.0, 0.0); glRotatef(view_rotz, 0.0, 0.0, 1.0); @@ -240,6 +246,15 @@ idle(void) glutPostRedisplay(); } +static void +update_idle_func(void) +{ + if (Visible && Animate) + glutIdleFunc(idle); + else + glutIdleFunc(NULL); +} + /* change view angle, exit upon ESC */ /* ARGSUSED1 */ static void @@ -252,6 +267,16 @@ key(unsigned char k, int x, int y) case 'Z': view_rotz -= 5.0; break; + case 'd': + viewDist += 1.0; + break; + case 'D': + viewDist -= 1.0; + break; + case 'a': + Animate = !Animate; + update_idle_func(); + break; case 27: /* Escape */ cleanup(); exit(0); @@ -295,10 +320,8 @@ reshape(int width, int height) glViewport(0, 0, (GLint) width, (GLint) height); glMatrixMode(GL_PROJECTION); glLoadIdentity(); - glFrustum(-1.0, 1.0, -h, h, 5.0, 60.0); + glFrustum(-1.0, 1.0, -h, h, 5.0, 200.0); glMatrixMode(GL_MODELVIEW); - glLoadIdentity(); - glTranslatef(0.0, 0.0, -40.0); } static void @@ -351,13 +374,12 @@ init(int argc, char *argv[]) } } + static void visible(int vis) { - if (vis == GLUT_VISIBLE) - glutIdleFunc(idle); - else - glutIdleFunc(NULL); + Visible = vis; + update_idle_func(); } int main(int argc, char *argv[]) @@ -375,6 +397,7 @@ int main(int argc, char *argv[]) glutKeyboardFunc(key); glutSpecialFunc(special); glutVisibilityFunc(visible); + update_idle_func(); glutMainLoop(); return 0; /* ANSI C requires main to return int. */ diff --git a/src/mesa/Makefile b/src/mesa/Makefile index b16d74bf49..720f1b2e02 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -125,24 +125,25 @@ osmesa-only: depend subdirs $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME) # Make the GL library $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) $(LLVM_LIB) @ $(TOP)/bin/mklib -o $(GL_LIB) \ - -linker $(CC) \ + -linker "$(CC)" \ -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ -install $(TOP)/$(LIB_DIR) \ $(MKLIB_OPTIONS) $(STAND_ALONE_OBJECTS) \ $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) $(LLVM_LIB) $(GL_LIB_DEPS) # Make the OSMesa library -$(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME): $(OSMESA_DRIVER_OBJECTS) $(OSMESA16_OBJECTS) +$(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME): $(OSMESA_DRIVER_OBJECTS) \ + $(OSMESA16_OBJECTS) $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) @ if [ "${DRIVER_DIRS}" = "osmesa" ] ; then \ $(TOP)/bin/mklib -o $(OSMESA_LIB) \ - -linker $(CC) \ + -linker "$(CC)" \ -major $(MESA_MAJOR) \ -minor $(MESA_MINOR) -patch $(MESA_TINY) \ -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ $(OSMESA_LIB_DEPS) $(OSMESA16_OBJECTS) ; \ else \ $(TOP)/bin/mklib -o $(OSMESA_LIB) \ - -linker $(CC) \ + -linker "$(CC)" \ -major $(MESA_MAJOR) \ -minor $(MESA_MINOR) -patch $(GL_TINY) \ -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ diff --git a/src/mesa/SConscript b/src/mesa/SConscript new file mode 100644 index 0000000000..faf8c84872 --- /dev/null +++ b/src/mesa/SConscript @@ -0,0 +1,436 @@ +####################################################################### +# SConscript for mesa +# +# TODO: Split this into per-module SConscripts + + +Import('*') + + +####################################################################### +# Core sources + +MAIN_SOURCES = [ + 'main/api_arrayelt.c', + 'main/api_loopback.c', + 'main/api_noop.c', + 'main/api_validate.c', + 'main/accum.c', + 'main/attrib.c', + 'main/arrayobj.c', + 'main/blend.c', + 'main/bufferobj.c', + 'main/buffers.c', + 'main/clip.c', + 'main/colortab.c', + 'main/context.c', + 'main/convolve.c', + 'main/debug.c', + 'main/depth.c', + 'main/depthstencil.c', + 'main/dlist.c', + 'main/drawpix.c', + 'main/enable.c', + 'main/enums.c', + 'main/eval.c', + 'main/execmem.c', + 'main/extensions.c', + 'main/fbobject.c', + 'main/feedback.c', + 'main/ffvertex_prog.c', + 'main/fog.c', + 'main/framebuffer.c', + 'main/get.c', + 'main/getstring.c', + 'main/hash.c', + 'main/hint.c', + 'main/histogram.c', + 'main/image.c', + 'main/imports.c', + 'main/light.c', + 'main/lines.c', + 'main/matrix.c', + 'main/mipmap.c', + 'main/mm.c', + 'main/pixel.c', + 'main/points.c', + 'main/polygon.c', + 'main/queryobj.c', + 'main/rastpos.c', + 'main/rbadaptors.c', + 'main/renderbuffer.c', + 'main/shaders.c', + 'main/state.c', + 'main/stencil.c', + 'main/texcompress.c', + 'main/texcompress_s3tc.c', + 'main/texcompress_fxt1.c', + 'main/texenvprogram.c', + 'main/texformat.c', + 'main/teximage.c', + 'main/texobj.c', + 'main/texrender.c', + 'main/texstate.c', + 'main/texstore.c', + 'main/varray.c', + 'main/vtxfmt.c', +] + +GLAPI_SOURCES = [ + 'main/dispatch.c', + 'glapi/glapi.c', + 'glapi/glthread.c', +] + +MATH_SOURCES = [ + 'math/m_debug_clip.c', + 'math/m_debug_norm.c', + 'math/m_debug_xform.c', + 'math/m_eval.c', + 'math/m_matrix.c', + 'math/m_translate.c', + 'math/m_vector.c', + 'math/m_xform.c', +] + +VBO_SOURCES = [ + 'vbo/vbo_context.c', + 'vbo/vbo_exec.c', + 'vbo/vbo_exec_api.c', + 'vbo/vbo_exec_array.c', + 'vbo/vbo_exec_draw.c', + 'vbo/vbo_exec_eval.c', + 'vbo/vbo_rebase.c', + 'vbo/vbo_split.c', + 'vbo/vbo_split_copy.c', + 'vbo/vbo_split_inplace.c', + 'vbo/vbo_save.c', + 'vbo/vbo_save_api.c', + 'vbo/vbo_save_draw.c', + 'vbo/vbo_save_loopback.c', +] + +VF_SOURCES = [ + 'vf/vf.c', + 'vf/vf_generic.c', + 'vf/vf_sse.c', +] + +DRAW_SOURCES = [ + 'pipe/draw/draw_clip.c', + 'pipe/draw/draw_context.c', + 'pipe/draw/draw_cull.c', + 'pipe/draw/draw_debug.c', + 'pipe/draw/draw_flatshade.c', + 'pipe/draw/draw_offset.c', + 'pipe/draw/draw_prim.c', + 'pipe/draw/draw_stipple.c', + 'pipe/draw/draw_twoside.c', + 'pipe/draw/draw_unfilled.c', + 'pipe/draw/draw_validate.c', + 'pipe/draw/draw_vbuf.c', + 'pipe/draw/draw_vertex.c', + 'pipe/draw/draw_vertex_cache.c', + 'pipe/draw/draw_vertex_fetch.c', + 'pipe/draw/draw_vertex_shader.c', + 'pipe/draw/draw_vertex_shader_llvm.c', + 'pipe/draw/draw_vf.c', + 'pipe/draw/draw_vf_generic.c', + 'pipe/draw/draw_vf_sse.c', + 'pipe/draw/draw_wide_prims.c', +] + +TGSIEXEC_SOURCES = [ + 'pipe/tgsi/exec/tgsi_exec.c', + 'pipe/tgsi/exec/tgsi_sse2.c', +] + +TGSIUTIL_SOURCES = [ + 'pipe/tgsi/util/tgsi_build.c', + 'pipe/tgsi/util/tgsi_dump.c', + 'pipe/tgsi/util/tgsi_parse.c', + 'pipe/tgsi/util/tgsi_util.c', +] + +STATECACHE_SOURCES = [ + 'pipe/cso_cache/cso_hash.c', + 'pipe/cso_cache/cso_cache.c', +] + +PIPEUTIL_SOURCES = [ + 'pipe/util/p_debug.c', + 'pipe/util/p_tile.c', + 'pipe/util/p_util.c', +] + +STATETRACKER_SOURCES = [ + 'state_tracker/st_atom.c', + 'state_tracker/st_atom_blend.c', + 'state_tracker/st_atom_clip.c', + 'state_tracker/st_atom_constbuf.c', + 'state_tracker/st_atom_depth.c', + 'state_tracker/st_atom_framebuffer.c', + 'state_tracker/st_atom_pixeltransfer.c', + 'state_tracker/st_atom_sampler.c', + 'state_tracker/st_atom_scissor.c', + 'state_tracker/st_atom_shader.c', + 'state_tracker/st_atom_rasterizer.c', + 'state_tracker/st_atom_stipple.c', + 'state_tracker/st_atom_texture.c', + 'state_tracker/st_atom_viewport.c', + 'state_tracker/st_cb_accum.c', + 'state_tracker/st_cb_bufferobjects.c', + 'state_tracker/st_cb_clear.c', + 'state_tracker/st_cb_flush.c', + 'state_tracker/st_cb_drawpixels.c', + 'state_tracker/st_cb_fbo.c', + 'state_tracker/st_cb_feedback.c', + 'state_tracker/st_cb_program.c', + 'state_tracker/st_cb_queryobj.c', + 'state_tracker/st_cb_rasterpos.c', + 'state_tracker/st_cb_readpixels.c', + 'state_tracker/st_cb_strings.c', + 'state_tracker/st_cb_texture.c', + 'state_tracker/st_cache.c', + 'state_tracker/st_context.c', + 'state_tracker/st_debug.c', + 'state_tracker/st_draw.c', + 'state_tracker/st_extensions.c', + 'state_tracker/st_format.c', + 'state_tracker/st_framebuffer.c', + 'state_tracker/st_mesa_to_tgsi.c', + 'state_tracker/st_program.c', + 'state_tracker/st_texture.c', +] + +SHADER_SOURCES = [ + 'shader/arbprogparse.c', + 'shader/arbprogram.c', + 'shader/atifragshader.c', + 'shader/grammar/grammar_mesa.c', + 'shader/nvfragparse.c', + 'shader/nvprogram.c', + 'shader/nvvertparse.c', + 'shader/program.c', + 'shader/prog_cache.c', + 'shader/prog_debug.c', + 'shader/prog_execute.c', + 'shader/prog_instruction.c', + 'shader/prog_parameter.c', + 'shader/prog_print.c', + 'shader/prog_statevars.c', + 'shader/programopt.c', + 'shader/shader_api.c', +] + +SLANG_SOURCES = [ + 'shader/slang/slang_builtin.c', + 'shader/slang/slang_codegen.c', + 'shader/slang/slang_compile.c', + 'shader/slang/slang_compile_function.c', + 'shader/slang/slang_compile_operation.c', + 'shader/slang/slang_compile_struct.c', + 'shader/slang/slang_compile_variable.c', + 'shader/slang/slang_emit.c', + 'shader/slang/slang_ir.c', + 'shader/slang/slang_label.c', + 'shader/slang/slang_library_noise.c', + 'shader/slang/slang_link.c', + 'shader/slang/slang_log.c', + 'shader/slang/slang_mem.c', + 'shader/slang/slang_preprocess.c', + 'shader/slang/slang_print.c', + 'shader/slang/slang_simplify.c', + 'shader/slang/slang_storage.c', + 'shader/slang/slang_typeinfo.c', + 'shader/slang/slang_vartable.c', + 'shader/slang/slang_utility.c', +] + + +####################################################################### +# Assembly sources + +ASM_C_SOURCES = [ + 'x86/common_x86.c', + 'x86/x86.c', + 'x86/3dnow.c', + 'x86/sse.c', + 'x86/rtasm/x86sse.c', + 'sparc/sparc.c', + 'ppc/common_ppc.c', + 'x86-64/x86-64.c', +] + +X86_SOURCES = [ + 'x86/common_x86_asm.S', + 'x86/x86_xform2.S', + 'x86/x86_xform3.S', + 'x86/x86_xform4.S', + 'x86/x86_cliptest.S', + 'x86/mmx_blend.S', + 'x86/3dnow_xform1.S', + 'x86/3dnow_xform2.S', + 'x86/3dnow_xform3.S', + 'x86/3dnow_xform4.S', + 'x86/3dnow_normal.S', + 'x86/sse_xform1.S', + 'x86/sse_xform2.S', + 'x86/sse_xform3.S', + 'x86/sse_xform4.S', + 'x86/sse_normal.S', + 'x86/read_rgba_span_x86.S', +] + +X86_API = [ + 'x86/glapi_x86.S', +] + +X86_64_SOURCES = [ + 'x86-64/xform4.S', +] + +X86_64_API = [ + 'x86-64/glapi_x86-64.S', +] + +SPARC_SOURCES = [ + 'sparc/clip.S', + 'sparc/norm.S', + 'sparc/xform.S', +] + +SPARC_API = [ + 'sparc/glapi_sparc.S', +] + +if x86: + ASM_SOURCES = ASM_C_SOURCES + X86_SOURCES + API_SOURCES = X86_API +else: + ASM_SOURCES = [] + API_SOURCES = [] + + +####################################################################### +# Driver sources + + +X11_DRIVER_SOURCES = [ + 'pipe/xlib/glxapi.c', + 'pipe/xlib/fakeglx.c', + 'pipe/xlib/xfonts.c', + 'pipe/xlib/xm_api.c', + 'pipe/xlib/xm_winsys.c', + 'pipe/xlib/xm_winsys_aub.c', + 'pipe/xlib/brw_aub.c', +] + +OSMESA_DRIVER_SOURCES = [ + 'drivers/osmesa/osmesa.c', +] + +GLIDE_DRIVER_SOURCES = [ + 'drivers/glide/fxapi.c', + 'drivers/glide/fxdd.c', + 'drivers/glide/fxddspan.c', + 'drivers/glide/fxddtex.c', + 'drivers/glide/fxsetup.c', + 'drivers/glide/fxtexman.c', + 'drivers/glide/fxtris.c', + 'drivers/glide/fxvb.c', + 'drivers/glide/fxglidew.c', + 'drivers/glide/fxg.c', +] + +SVGA_DRIVER_SOURCES = [ + 'drivers/svga/svgamesa.c', + 'drivers/svga/svgamesa8.c', + 'drivers/svga/svgamesa15.c', + 'drivers/svga/svgamesa16.c', + 'drivers/svga/svgamesa24.c', + 'drivers/svga/svgamesa32.c', +] + +FBDEV_DRIVER_SOURCES = [ + 'drivers/fbdev/glfbdev.c', +] + + +### All the core C sources + +SOLO_SOURCES = \ + MAIN_SOURCES + \ + MATH_SOURCES + \ + VBO_SOURCES + \ + VF_SOURCES + \ + DRAW_SOURCES + \ + TGSIEXEC_SOURCES + \ + TGSIUTIL_SOURCES + \ + PIPEUTIL_SOURCES + \ + STATECACHE_SOURCES + \ + STATETRACKER_SOURCES + \ + SHADER_SOURCES + \ + ASM_SOURCES + \ + SLANG_SOURCES + +CORE_SOURCES = \ + GLAPI_SOURCES + API_SOURCES + \ + SOLO_SOURCES + +ALL_SOURCES = \ + GLAPI_SOURCES + API_SOURCES + \ + SOLO_SOURCES + \ + ASM_SOURCES + \ + X11_DRIVER_SOURCES + \ + FBDEV_DRIVER_SOURCES + \ + OSMESA_DRIVER_SOURCES + + +###################################################################### +# Gallium sources + +SConscript([ + 'pipe/SConscript', +]) + + +###################################################################### +# libGL + +if not dri: + STAND_ALONE_DRIVER_SOURCES = \ + CORE_SOURCES + \ + X11_DRIVER_SOURCES + + Import( + 'softpipe', + 'i915simple', + 'i965simple' + ) + + pipe_drivers = [ + softpipe, + i965simple + ] + + env.SharedLibrary( + target ='GL', + source = STAND_ALONE_DRIVER_SOURCES, + LIBS = [softpipe, i965simple] + env['LIBS'], + ) + + +###################################################################### +# Driver sources + +if dri: + mesa = env.ConvenienceLibrary( + target = 'mesa', + source = SOLO_SOURCES, + ) + env.Prepend(LIBS = [mesa]) + + SConscript([ + 'drivers/dri/SConscript', + ]) diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index 33caf7dae1..b5b383b4e4 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -28,6 +28,7 @@ #include "buffers.h" #include "context.h" #include "framebuffer.h" +#include "mipmap.h" #include "program.h" #include "prog_execute.h" #include "queryobj.h" @@ -99,6 +100,7 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->CopyTexSubImage1D = _swrast_copy_texsubimage1d; driver->CopyTexSubImage2D = _swrast_copy_texsubimage2d; driver->CopyTexSubImage3D = _swrast_copy_texsubimage3d; + driver->GenerateMipmap = _mesa_generate_mipmap; driver->TestProxyTexImage = _mesa_test_proxy_teximage; driver->CompressedTexImage1D = _mesa_store_compressed_teximage1d; driver->CompressedTexImage2D = _mesa_store_compressed_teximage2d; diff --git a/src/mesa/drivers/dri/SConscript b/src/mesa/drivers/dri/SConscript new file mode 100644 index 0000000000..d32bd08669 --- /dev/null +++ b/src/mesa/drivers/dri/SConscript @@ -0,0 +1,48 @@ +Import('*') + +drienv = env.Clone() + +drienv.Replace(CPPPATH = [ + '#src/mesa/drivers/dri/common', + '#include', + '#include/GL/internal', + '#src/mesa', + '#src/mesa/main', + '#src/mesa/glapi', + '#src/mesa/math', + '#src/mesa/transform', + '#src/mesa/shader', + '#src/mesa/swrast', + '#src/mesa/swrast_setup', + '#src/egl/main', + '#src/egl/drivers/dri', +]) + +drienv.ParseConfig('pkg-config --cflags --libs libdrm') + +COMMON_GALLIUM_SOURCES = [ + '../common/utils.c', + '../common/vblank.c', + '../common/dri_util.c', + '../common/xmlconfig.c', +] + +COMMON_BM_SOURCES = [ + '../common/dri_bufmgr.c', + '../common/dri_drmpool.c', +] + +Export([ + 'drienv', + 'COMMON_GALLIUM_SOURCES', + 'COMMON_BM_SOURCES', +]) + +# TODO: Installation +#install: $(LIBNAME) +# $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR) +# $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR) + +SConscript([ + 'intel_winsys/SConscript', +]) diff --git a/src/mesa/drivers/dri/intel_winsys/SConscript b/src/mesa/drivers/dri/intel_winsys/SConscript new file mode 100644 index 0000000000..a7cc10450e --- /dev/null +++ b/src/mesa/drivers/dri/intel_winsys/SConscript @@ -0,0 +1,41 @@ +Import('*') + +env = drienv.Clone() + +env.Append(CPPPATH = [ + '../intel', + 'server' +]) + +#MINIGLX_SOURCES = server/intel_dri.c + +pipe_drivers = [ + softpipe, + i915simple +] + +DRIVER_SOURCES = [ + 'intel_winsys_pipe.c', + 'intel_winsys_softpipe.c', + 'intel_winsys_i915.c', + 'intel_batchbuffer.c', + 'intel_swapbuffers.c', + 'intel_context.c', + 'intel_lock.c', + 'intel_screen.c', + 'intel_batchpool.c', +] + +sources = \ + COMMON_GALLIUM_SOURCES + \ + COMMON_BM_SOURCES + \ + DRIVER_SOURCES + +# DRIVER_DEFINES = -I../intel $(shell pkg-config libdrm --atleast-version=2.3.1 \ +# && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") + +env.SharedLibrary( + target ='i915tex_dri.so', + source = sources, + LIBS = pipe_drivers + env['LIBS'], +)
\ No newline at end of file diff --git a/src/mesa/drivers/dri/intel_winsys/intel_winsys_pipe.c b/src/mesa/drivers/dri/intel_winsys/intel_winsys_pipe.c index 910c0d2cc5..789a386500 100644 --- a/src/mesa/drivers/dri/intel_winsys/intel_winsys_pipe.c +++ b/src/mesa/drivers/dri/intel_winsys/intel_winsys_pipe.c @@ -224,11 +224,6 @@ intel_i915_surface_alloc_storage(struct pipe_winsys *winsys, if(!surf->buffer) return -1; - if(ret) { - pipe_buffer_reference(winsys, &surf->buffer, NULL); - return ret; - } - return 0; } diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 3bec3bd433..37ef2a865b 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -333,6 +333,12 @@ struct dd_function_table { GLsizei width, GLsizei height ); /** + * Called by glGenerateMipmap() or when GL_GENERATE_MIPMAP_SGIS is enabled. + */ + void (*GenerateMipmap)(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj); + + /** * Called by glTexImage[123]D when user specifies a proxy texture * target. * diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 963e35d678..6a8cba4d8a 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -1560,7 +1560,7 @@ _mesa_GenerateMipmapEXT(GLenum target) /* XXX this might not handle cube maps correctly */ _mesa_lock_texture(ctx, texObj); - _mesa_generate_mipmap(ctx, target, texUnit, texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); _mesa_unlock_texture(ctx, texObj); } diff --git a/src/mesa/main/mipmap.c b/src/mesa/main/mipmap.c index 9f3db22b75..1e61829e8f 100644 --- a/src/mesa/main/mipmap.c +++ b/src/mesa/main/mipmap.c @@ -933,7 +933,6 @@ make_2d_stack_mipmap(const struct gl_texture_format *format, GLint border, */ void _mesa_generate_mipmap(GLcontext *ctx, GLenum target, - const struct gl_texture_unit *texUnit, struct gl_texture_object *texObj) { const struct gl_texture_image *srcImage; diff --git a/src/mesa/main/mipmap.h b/src/mesa/main/mipmap.h index df78603283..46e16902c8 100644 --- a/src/mesa/main/mipmap.h +++ b/src/mesa/main/mipmap.h @@ -30,7 +30,6 @@ extern void _mesa_generate_mipmap(GLcontext *ctx, GLenum target, - const struct gl_texture_unit *texUnit, struct gl_texture_object *texObj); diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 30be65525e..a6a18910fc 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -2917,9 +2917,7 @@ _mesa_store_teximage1d(GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, packing); @@ -3003,9 +3001,7 @@ _mesa_store_teximage2d(GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, packing); @@ -3079,9 +3075,7 @@ _mesa_store_teximage3d(GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, packing); @@ -3127,9 +3121,7 @@ _mesa_store_texsubimage1d(GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, packing); @@ -3182,9 +3174,7 @@ _mesa_store_texsubimage2d(GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, packing); @@ -3237,9 +3227,7 @@ _mesa_store_texsubimage3d(GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, packing); @@ -3313,9 +3301,7 @@ _mesa_store_compressed_teximage2d(GLcontext *ctx, GLenum target, GLint level, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, &ctx->Unpack); @@ -3425,9 +3411,7 @@ _mesa_store_compressed_texsubimage2d(GLcontext *ctx, GLenum target, /* GL_SGIS_generate_mipmap */ if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - _mesa_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } _mesa_unmap_teximage_pbo(ctx, &ctx->Unpack); diff --git a/src/mesa/pipe/README.portability b/src/mesa/pipe/README.portability new file mode 100644 index 0000000000..c70ca774da --- /dev/null +++ b/src/mesa/pipe/README.portability @@ -0,0 +1,43 @@ + CROSS-PLATFORM PORTABILITY GUIDELINES FOR GALLIUM3D + + += General Considerations = + +The state tracker and winsys driver support a rather limited number of +platforms. However, the pipe drivers are meant to run in a wide number of +platforms. Hence the pipe drivers, the auxiliary modules, and all public +headers in general, should stricly follow these guidelines to ensure + + += Compiler Support = + +* Include the p_compiler.h. + +* Don't use the 'inline' keyword, use the INLINE macro in p_compiler.h instead. + +* Cast explicitly when converting to integer types of smaller sizes. + +* Cast explicitly when converting between float, double and integral types. + +* Don't use named struct initializers. + +* Don't use variable number of macro arguments. Use static inline functions +instead. + + += Standard Library = + +* Avoid including standard library headers. Most standard library functions are +not available in Windows Kernel Mode. Use the appropriate p_*.h include. + +== Memory Allocation == + +* Use MALLOC, CALLOC, FREE instead of the malloc, calloc, free functions. + +* Use align_pointer() function defined in p_util.h for aligning pointers in a +portable way. + +== Debugging == + +TODO + diff --git a/src/mesa/pipe/SConscript b/src/mesa/pipe/SConscript new file mode 100644 index 0000000000..d9c20e0100 --- /dev/null +++ b/src/mesa/pipe/SConscript @@ -0,0 +1,9 @@ +Import('*') + +#env = env.Clone() + +SConscript([ + 'softpipe/SConscript', + 'i915simple/SConscript', + 'i965simple/SConscript', +]) diff --git a/src/mesa/pipe/cell/common.h b/src/mesa/pipe/cell/common.h index 0b63ed39be..4de514c358 100644 --- a/src/mesa/pipe/cell/common.h +++ b/src/mesa/pipe/cell/common.h @@ -51,16 +51,21 @@ /** for sanity checking */ #define ASSERT_ALIGN16(ptr) \ - assert((((unsigned long) (ptr)) & 0xf) == 0); + ASSERT((((unsigned long) (ptr)) & 0xf) == 0); /** round up value to next multiple of 4 */ #define ROUNDUP4(k) (((k) + 0x3) & ~0x3) +/** round up value to next multiple of 8 */ +#define ROUNDUP8(k) (((k) + 0x7) & ~0x7) + /** round up value to next multiple of 16 */ #define ROUNDUP16(k) (((k) + 0xf) & ~0xf) +#define CELL_MAX_SPUS 6 + #define TILE_SIZE 32 @@ -68,21 +73,27 @@ * The low byte of a mailbox word contains the command opcode. * Remaining higher bytes are command specific. */ -#define CELL_CMD_OPCODE_MASK 0xf +#define CELL_CMD_OPCODE_MASK 0xff #define CELL_CMD_EXIT 1 #define CELL_CMD_CLEAR_SURFACE 2 #define CELL_CMD_FINISH 3 #define CELL_CMD_RENDER 4 #define CELL_CMD_BATCH 5 +#define CELL_CMD_RELEASE_VERTS 6 #define CELL_CMD_STATE_FRAMEBUFFER 10 #define CELL_CMD_STATE_DEPTH_STENCIL 11 #define CELL_CMD_STATE_SAMPLER 12 -#define CELL_CMD_STATE_VERTEX_INFO 13 +#define CELL_CMD_STATE_TEXTURE 13 +#define CELL_CMD_STATE_VERTEX_INFO 14 +#define CELL_CMD_STATE_VIEWPORT 15 +#define CELL_CMD_STATE_VS_ARRAY_INFO 16 +#define CELL_CMD_STATE_BLEND 17 +#define CELL_CMD_VS_EXECUTE 18 -#define CELL_NUM_BATCH_BUFFERS 3 -#define CELL_BATCH_BUFFER_SIZE 1024 /**< 16KB would be the max */ +#define CELL_NUM_BUFFERS 4 +#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */ #define CELL_BUFFER_STATUS_FREE 10 #define CELL_BUFFER_STATUS_USED 20 @@ -94,11 +105,11 @@ */ struct cell_command_framebuffer { - uint opcode; + uint64_t opcode; /**< CELL_CMD_FRAMEBUFFER */ int width, height; void *color_start, *depth_start; enum pipe_format color_format, depth_format; -} ALIGN16_ATTRIB; +}; /** @@ -106,38 +117,90 @@ struct cell_command_framebuffer */ struct cell_command_clear_surface { - uint opcode; + uint64_t opcode; /**< CELL_CMD_CLEAR_SURFACE */ uint surface; /**< Temporary: 0=color, 1=Z */ uint value; +}; + + +/** + * Array info used by the vertex shader's vertex puller. + */ +struct cell_array_info +{ + uint64_t base; /**< Base address of the 0th element. */ + uint attr; /**< Attribute that this state is for. */ + uint pitch; /**< Byte pitch from one entry to the next. */ + uint format; /**< Pipe format of each entry. */ +} ALIGN16_ATTRIB; + + +struct cell_shader_info +{ + unsigned num_outputs; + + uint64_t declarations; + unsigned num_declarations; + uint64_t instructions; + unsigned num_instructions; + uint64_t uniforms; + uint64_t immediates; + unsigned num_immediates; } ALIGN16_ATTRIB; -#define CELL_MAX_VBUF_SIZE (16 * 1024) -#define CELL_MAX_VBUF_INDEXES 1024 +#define SPU_VERTS_PER_BATCH 64 +struct cell_command_vs +{ + uint64_t opcode; /**< CELL_CMD_VS_EXECUTE */ + struct cell_shader_info shader; + unsigned num_elts; + unsigned elts[SPU_VERTS_PER_BATCH]; + uint64_t vOut[SPU_VERTS_PER_BATCH]; + float plane[12][4]; + unsigned nr_planes; + unsigned nr_attrs; +} ALIGN16_ATTRIB; struct cell_command_render { - uint opcode; /**< CELL_CMD_RENDER */ + uint64_t opcode; /**< CELL_CMD_RENDER */ uint prim_type; /**< PIPE_PRIM_x */ uint num_verts; uint vertex_size; /**< bytes per vertex */ - uint dummy; /* XXX this dummy field works around a compiler bug */ uint num_indexes; - const void *vertex_data; - const ushort *index_data; - float xmin, ymin, xmax, ymax; - boolean inline_indexes; + uint vertex_buf; /**< which cell->buffer[] contains the vertex data */ + float xmin, ymin, xmax, ymax; /* XXX another dummy field */ + uint min_index; boolean inline_verts; -} ALIGN16_ATTRIB; +}; + + +struct cell_command_release_verts +{ + uint64_t opcode; /**< CELL_CMD_RELEASE_VERTS */ + uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ +}; + + +struct cell_command_texture +{ + void *start; /**< Address in main memory */ + uint width, height; +}; /** XXX unions don't seem to work */ +/* XXX this should go away; all commands should be placed in batch buffers */ struct cell_command { +#if 0 struct cell_command_framebuffer fb; struct cell_command_clear_surface clear; struct cell_command_render render; +#endif + struct cell_command_vs vs; } ALIGN16_ATTRIB; @@ -147,7 +210,9 @@ struct cell_init_info unsigned id; unsigned num_spus; struct cell_command *cmd; - ubyte *batch_buffers[CELL_NUM_BATCH_BUFFERS]; + + /** Buffers for command batches, vertex/index data */ + ubyte *buffers[CELL_NUM_BUFFERS]; uint *buffer_status; /**< points at cell_context->buffer_status */ } ALIGN16_ATTRIB; diff --git a/src/mesa/pipe/cell/ppu/Makefile b/src/mesa/pipe/cell/ppu/Makefile index e7f2562da7..50060f5cd3 100644 --- a/src/mesa/pipe/cell/ppu/Makefile +++ b/src/mesa/pipe/cell/ppu/Makefile @@ -34,6 +34,7 @@ SOURCES = \ cell_surface.c \ cell_texture.c \ cell_vbuf.c \ + cell_vertex_shader.c \ cell_winsys.c diff --git a/src/mesa/pipe/cell/ppu/cell_batch.c b/src/mesa/pipe/cell/ppu/cell_batch.c index c894ef8608..f45e5f25b6 100644 --- a/src/mesa/pipe/cell/ppu/cell_batch.c +++ b/src/mesa/pipe/cell/ppu/cell_batch.c @@ -31,12 +31,55 @@ #include "cell_spu.h" + +uint +cell_get_empty_buffer(struct cell_context *cell) +{ + uint buf = 0, tries = 0; + + /* Find a buffer that's marked as free by all SPUs */ + while (1) { + uint spu, num_free = 0; + + for (spu = 0; spu < cell->num_spus; spu++) { + if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) { + num_free++; + + if (num_free == cell->num_spus) { + /* found a free buffer, now mark status as used */ + for (spu = 0; spu < cell->num_spus; spu++) { + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; + } + /* + printf("PPU: ALLOC BUFFER %u\n", buf); + */ + return buf; + } + } + else { + break; + } + } + + /* try next buf */ + buf = (buf + 1) % CELL_NUM_BUFFERS; + + tries++; + if (tries == 100) { + /* + printf("PPU WAITING for buffer...\n"); + */ + } + } +} + + void cell_batch_flush(struct cell_context *cell) { static boolean flushing = FALSE; uint batch = cell->cur_batch; - const uint size = cell->batch_buffer_size[batch]; + const uint size = cell->buffer_size[batch]; uint spu, cmd_word; assert(!flushing); @@ -46,7 +89,7 @@ cell_batch_flush(struct cell_context *cell) flushing = TRUE; - assert(batch < CELL_NUM_BATCH_BUFFERS); + assert(batch < CELL_NUM_BUFFERS); /* printf("cell_batch_dispatch: buf %u at %p, size %u\n", @@ -68,28 +111,9 @@ cell_batch_flush(struct cell_context *cell) * array indicating that the PPU can re-use the buffer. */ + batch = cell_get_empty_buffer(cell); - /* Find a buffer that's marked as free by all SPUs */ - while (1) { - uint num_free = 0; - - batch = (batch + 1) % CELL_NUM_BATCH_BUFFERS; - - for (spu = 0; spu < cell->num_spus; spu++) { - if (cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_FREE) - num_free++; - } - - if (num_free == cell->num_spus) { - /* found a free buffer, now mark status as used */ - for (spu = 0; spu < cell->num_spus; spu++) { - cell->buffer_status[spu][batch][0] = CELL_BUFFER_STATUS_USED; - } - break; - } - } - - cell->batch_buffer_size[batch] = 0; /* empty */ + cell->buffer_size[batch] = 0; /* empty */ cell->cur_batch = batch; flushing = FALSE; @@ -99,61 +123,95 @@ cell_batch_flush(struct cell_context *cell) uint cell_batch_free_space(const struct cell_context *cell) { - uint free = CELL_BATCH_BUFFER_SIZE - - cell->batch_buffer_size[cell->cur_batch]; + uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; return free; } /** - * \param cmd command to append - * \param length command size in bytes + * Append data to current batch. */ void -cell_batch_append(struct cell_context *cell, const void *cmd, uint length) +cell_batch_append(struct cell_context *cell, const void *data, uint bytes) { uint size; - assert(length % 4 == 0); - assert(cell->cur_batch >= 0); + ASSERT(bytes % 8 == 0); + ASSERT(bytes <= CELL_BUFFER_SIZE); + ASSERT(cell->cur_batch >= 0); + +#ifdef ASSERT + { + uint spu; + for (spu = 0; spu < cell->num_spus; spu++) { + ASSERT(cell->buffer_status[spu][cell->cur_batch][0] + == CELL_BUFFER_STATUS_USED); + } + } +#endif - size = cell->batch_buffer_size[cell->cur_batch]; + size = cell->buffer_size[cell->cur_batch]; - if (size + length > CELL_BATCH_BUFFER_SIZE) { + if (size + bytes > CELL_BUFFER_SIZE) { cell_batch_flush(cell); size = 0; } - assert(size + length <= CELL_BATCH_BUFFER_SIZE); + ASSERT(size + bytes <= CELL_BUFFER_SIZE); - memcpy(cell->batch_buffer[cell->cur_batch] + size, cmd, length); + memcpy(cell->buffer[cell->cur_batch] + size, data, bytes); - cell->batch_buffer_size[cell->cur_batch] = size + length; + cell->buffer_size[cell->cur_batch] = size + bytes; } void * cell_batch_alloc(struct cell_context *cell, uint bytes) { + return cell_batch_alloc_aligned(cell, bytes, 1); +} + + +void * +cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, + uint alignment) +{ void *pos; - uint size; + uint size, padbytes; - ASSERT(bytes % 4 == 0); + ASSERT(bytes % 8 == 0); + ASSERT(bytes <= CELL_BUFFER_SIZE); + ASSERT(alignment > 0); + ASSERT(cell->cur_batch >= 0); - assert(cell->cur_batch >= 0); +#ifdef ASSERT + { + uint spu; + for (spu = 0; spu < cell->num_spus; spu++) { + ASSERT(cell->buffer_status[spu][cell->cur_batch][0] + == CELL_BUFFER_STATUS_USED); + } + } +#endif - size = cell->batch_buffer_size[cell->cur_batch]; + size = cell->buffer_size[cell->cur_batch]; - if (size + bytes > CELL_BATCH_BUFFER_SIZE) { + padbytes = (alignment - (size % alignment)) % alignment; + + if (padbytes + size + bytes > CELL_BUFFER_SIZE) { cell_batch_flush(cell); size = 0; } + else { + size += padbytes; + } - assert(size + bytes <= CELL_BATCH_BUFFER_SIZE); + ASSERT(size % alignment == 0); + ASSERT(size + bytes <= CELL_BUFFER_SIZE); - pos = (void *) (cell->batch_buffer[cell->cur_batch] + size); + pos = (void *) (cell->buffer[cell->cur_batch] + size); - cell->batch_buffer_size[cell->cur_batch] = size + bytes; + cell->buffer_size[cell->cur_batch] = size + bytes; return pos; } diff --git a/src/mesa/pipe/cell/ppu/cell_batch.h b/src/mesa/pipe/cell/ppu/cell_batch.h index c4ba7feb3d..a6eee0a8b1 100644 --- a/src/mesa/pipe/cell/ppu/cell_batch.h +++ b/src/mesa/pipe/cell/ppu/cell_batch.h @@ -35,6 +35,9 @@ struct cell_context; +extern uint +cell_get_empty_buffer(struct cell_context *cell); + extern void cell_batch_flush(struct cell_context *cell); @@ -42,10 +45,14 @@ extern uint cell_batch_free_space(const struct cell_context *cell); extern void -cell_batch_append(struct cell_context *cell, const void *cmd, uint length); +cell_batch_append(struct cell_context *cell, const void *data, uint bytes); extern void * cell_batch_alloc(struct cell_context *cell, uint bytes); +extern void * +cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, + uint alignment); + #endif /* CELL_BATCH_H */ diff --git a/src/mesa/pipe/cell/ppu/cell_clear.c b/src/mesa/pipe/cell/ppu/cell_clear.c index e01640b994..07b908eec5 100644 --- a/src/mesa/pipe/cell/ppu/cell_clear.c +++ b/src/mesa/pipe/cell/ppu/cell_clear.c @@ -48,9 +48,12 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) { struct cell_context *cell = cell_context(pipe); - /*uint i;*/ uint surfIndex; + if (cell->dirty) + cell_update_derived(cell); + + if (!cell->cbuf_map[0]) cell->cbuf_map[0] = pipe_surface_map(ps); @@ -61,29 +64,7 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, surfIndex = 0; } -#if 0 - for (i = 0; i < cell->num_spus; i++) { -#if 1 - uint clr = clearValue; - if (surfIndex == 0) { - /* XXX debug: clear color varied per-SPU to visualize tiles */ - if ((clr & 0xff) == 0) - clr |= 64 + i * 8; - if ((clr & 0xff00) == 0) - clr |= (64 + i * 8) << 8; - if ((clr & 0xff0000) == 0) - clr |= (64 + i * 8) << 16; - if ((clr & 0xff000000) == 0) - clr |= (64 + i * 8) << 24; - } - cell_global.command[i].clear.value = clr; -#else - cell_global.command[i].clear.value = clearValue; -#endif - cell_global.command[i].clear.surface = surfIndex; - send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_CLEAR_SURFACE); - } -#else + { struct cell_command_clear_surface *clr = (struct cell_command_clear_surface *) @@ -92,9 +73,4 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, clr->surface = surfIndex; clr->value = clearValue; } -#endif - - /* XXX temporary */ - cell_flush(&cell->pipe, 0x0); - } diff --git a/src/mesa/pipe/cell/ppu/cell_context.c b/src/mesa/pipe/cell/ppu/cell_context.c index 8cb0c48f40..bbe1fd7a11 100644 --- a/src/mesa/pipe/cell/ppu/cell_context.c +++ b/src/mesa/pipe/cell/ppu/cell_context.c @@ -39,6 +39,7 @@ #include "pipe/p_winsys.h" #include "pipe/cell/common.h" #include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_private.h" #include "cell_clear.h" #include "cell_context.h" #include "cell_draw_arrays.h" @@ -156,6 +157,19 @@ cell_destroy_context( struct pipe_context *pipe ) } +static struct draw_context * +cell_draw_create(struct cell_context *cell) +{ + struct draw_context *draw = draw_create(); + + if (getenv("GALLIUM_CELL_VS")) { + /* plug in SPU-based vertex transformation code */ + draw->shader_queue_flush = cell_vertex_shader_queue_flush; + draw->driver_private = cell; + } + + return draw; +} struct pipe_context * @@ -242,7 +256,7 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) cell_init_surface_functions(cell); - cell->draw = draw_create(); + cell->draw = cell_draw_create(cell); cell_init_vbuf(cell); draw_set_rasterize_stage(cell->draw, cell->vbuf); @@ -254,8 +268,9 @@ cell_create_context(struct pipe_winsys *winsys, struct cell_winsys *cws) cell_start_spus(cell); - for (buf = 0; buf < CELL_NUM_BATCH_BUFFERS; buf++) { - cell->batch_buffer_size[buf] = 0; + /* init command, vertex/index buffer info */ + for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { + cell->buffer_size[buf] = 0; /* init batch buffer status values, * mark 0th buffer as used, rest as free. diff --git a/src/mesa/pipe/cell/ppu/cell_context.h b/src/mesa/pipe/cell/ppu/cell_context.h index 3bd88bfd5b..3b63419b5e 100644 --- a/src/mesa/pipe/cell/ppu/cell_context.h +++ b/src/mesa/pipe/cell/ppu/cell_context.h @@ -38,9 +38,6 @@ #include "pipe/cell/common.h" -#define CELL_MAX_SPUS 6 - - struct cell_vbuf_render; struct cell_vertex_shader_state @@ -76,7 +73,7 @@ struct cell_context struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; - struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct cell_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; @@ -84,6 +81,9 @@ struct cell_context ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; ubyte *zsbuf_map; + struct pipe_surface *tex_surf; + uint *tex_map; + uint dirty; /** The primitive drawing context */ @@ -102,12 +102,14 @@ struct cell_context uint num_spus; - uint batch_buffer_size[CELL_NUM_BATCH_BUFFERS]; - ubyte batch_buffer[CELL_NUM_BATCH_BUFFERS][CELL_BATCH_BUFFER_SIZE] ALIGN16_ATTRIB; - int cur_batch; /**< which batch buffer is being filled */ + /** Buffers for command batches, vertex/index data */ + uint buffer_size[CELL_NUM_BUFFERS]; + ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + + int cur_batch; /**< which buffer is being filled w/ commands */ /** [4] to ensure 16-byte alignment for each status word */ - uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BATCH_BUFFERS][4] ALIGN16_ATTRIB; + uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; }; @@ -124,6 +126,8 @@ cell_context(struct pipe_context *pipe) extern struct pipe_context * cell_create_context(struct pipe_winsys *ws, struct cell_winsys *cws); +extern void +cell_vertex_shader_queue_flush(struct draw_context *draw); diff --git a/src/mesa/pipe/cell/ppu/cell_flush.c b/src/mesa/pipe/cell/ppu/cell_flush.c index b98bb566b1..f62bc4650c 100644 --- a/src/mesa/pipe/cell/ppu/cell_flush.c +++ b/src/mesa/pipe/cell/ppu/cell_flush.c @@ -39,6 +39,9 @@ cell_flush(struct pipe_context *pipe, unsigned flags) { struct cell_context *cell = cell_context(pipe); + if (flags & PIPE_FLUSH_SWAPBUFFERS) + flags |= PIPE_FLUSH_WAIT; + draw_flush( cell->draw ); cell_flush_int(pipe, flags); } @@ -56,7 +59,7 @@ cell_flush_int(struct pipe_context *pipe, unsigned flags) flushing = TRUE; if (flags & PIPE_FLUSH_WAIT) { - uint *cmd = (uint *) cell_batch_alloc(cell, sizeof(uint)); + uint64_t *cmd = (uint64_t *) cell_batch_alloc(cell, sizeof(uint64_t)); *cmd = CELL_CMD_FINISH; } diff --git a/src/mesa/pipe/cell/ppu/cell_spu.c b/src/mesa/pipe/cell/ppu/cell_spu.c index 4627bc8d1f..7c83a47e57 100644 --- a/src/mesa/pipe/cell/ppu/cell_spu.c +++ b/src/mesa/pipe/cell/ppu/cell_spu.c @@ -111,8 +111,8 @@ cell_start_spus(struct cell_context *cell) cell_global.inits[i].id = i; cell_global.inits[i].num_spus = cell->num_spus; cell_global.inits[i].cmd = &cell_global.command[i]; - for (j = 0; j < CELL_NUM_BATCH_BUFFERS; j++) { - cell_global.inits[i].batch_buffers[j] = cell->batch_buffer[j]; + for (j = 0; j < CELL_NUM_BUFFERS; j++) { + cell_global.inits[i].buffers[j] = cell->buffer[j]; } cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0]; diff --git a/src/mesa/pipe/cell/ppu/cell_state_blend.c b/src/mesa/pipe/cell/ppu/cell_state_blend.c index 34ae0128ea..4fc60548c8 100644 --- a/src/mesa/pipe/cell/ppu/cell_state_blend.c +++ b/src/mesa/pipe/cell/ppu/cell_state_blend.c @@ -29,6 +29,7 @@ */ #include "pipe/p_util.h" +#include "pipe/draw/draw_context.h" #include "cell_context.h" #include "cell_state.h" @@ -38,9 +39,7 @@ void * cell_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *blend) { - struct pipe_blend_state *state = MALLOC(sizeof(struct pipe_blend_state)); - memcpy(state, blend, sizeof(struct pipe_blend_state)); - return state; + return mem_dup(blend, sizeof(*blend)); } @@ -49,6 +48,8 @@ cell_bind_blend_state(struct pipe_context *pipe, void *blend) { struct cell_context *cell = cell_context(pipe); + draw_flush(cell->draw); + cell->blend = (const struct pipe_blend_state *)blend; cell->dirty |= CELL_NEW_BLEND; @@ -68,6 +69,8 @@ cell_set_blend_color(struct pipe_context *pipe, { struct cell_context *cell = cell_context(pipe); + draw_flush(cell->draw); + cell->blend_color = *blend_color; cell->dirty |= CELL_NEW_BLEND; @@ -80,10 +83,7 @@ void * cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *depth_stencil) { - struct pipe_depth_stencil_alpha_state *state = - MALLOC(sizeof(struct pipe_depth_stencil_alpha_state)); - memcpy(state, depth_stencil, sizeof(struct pipe_depth_stencil_alpha_state)); - return state; + return mem_dup(depth_stencil, sizeof(*depth_stencil)); } @@ -93,6 +93,8 @@ cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, { struct cell_context *cell = cell_context(pipe); + draw_flush(cell->draw); + cell->depth_stencil = (const struct pipe_depth_stencil_alpha_state *) depth_stencil; diff --git a/src/mesa/pipe/cell/ppu/cell_state_emit.c b/src/mesa/pipe/cell/ppu/cell_state_emit.c index dbca900c35..5d2a786449 100644 --- a/src/mesa/pipe/cell/ppu/cell_state_emit.c +++ b/src/mesa/pipe/cell/ppu/cell_state_emit.c @@ -30,6 +30,18 @@ #include "cell_state.h" #include "cell_state_emit.h" #include "cell_batch.h" +#include "cell_texture.h" + + +static void +emit_state_cmd(struct cell_context *cell, uint cmd, + const void *state, uint state_size) +{ + uint64_t *dst = (uint64_t *) + cell_batch_alloc(cell, ROUNDUP8(sizeof(uint64_t) + state_size)); + *dst = cmd; + memcpy(dst + 1, state, state_size); +} @@ -50,23 +62,42 @@ cell_emit_state(struct cell_context *cell) fb->height = cell->framebuffer.cbufs[0]->height; } + if (cell->dirty & CELL_NEW_BLEND) { + emit_state_cmd(cell, CELL_CMD_STATE_BLEND, + cell->blend, + sizeof(struct pipe_blend_state)); + } + if (cell->dirty & CELL_NEW_DEPTH_STENCIL) { - uint cmd = CELL_CMD_STATE_DEPTH_STENCIL; - cell_batch_append(cell, &cmd, 4); - cell_batch_append(cell, cell->depth_stencil, - sizeof(struct pipe_depth_stencil_alpha_state)); + emit_state_cmd(cell, CELL_CMD_STATE_DEPTH_STENCIL, + cell->depth_stencil, + sizeof(struct pipe_depth_stencil_alpha_state)); } if (cell->dirty & CELL_NEW_SAMPLER) { - uint cmd = CELL_CMD_STATE_SAMPLER; - cell_batch_append(cell, &cmd, 4); - cell_batch_append(cell, cell->sampler[0], - sizeof(struct pipe_sampler_state)); + emit_state_cmd(cell, CELL_CMD_STATE_SAMPLER, + cell->sampler[0], sizeof(struct pipe_sampler_state)); + } + + if (cell->dirty & CELL_NEW_TEXTURE) { + struct cell_command_texture texture; + if (cell->texture[0]) { + texture.start = cell->texture[0]->tiled_data; + texture.width = cell->texture[0]->base.width[0]; + texture.height = cell->texture[0]->base.height[0]; + } + else { + texture.start = NULL; + texture.width = 0; + texture.height = 0; + } + + emit_state_cmd(cell, CELL_CMD_STATE_TEXTURE, + &texture, sizeof(struct cell_command_texture)); } if (cell->dirty & CELL_NEW_VERTEX_INFO) { - uint cmd = CELL_CMD_STATE_VERTEX_INFO; - cell_batch_append(cell, &cmd, 4); - cell_batch_append(cell, &cell->vertex_info, sizeof(struct vertex_info)); + emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, + &cell->vertex_info, sizeof(struct vertex_info)); } } diff --git a/src/mesa/pipe/cell/ppu/cell_state_fs.c b/src/mesa/pipe/cell/ppu/cell_state_fs.c index 81c2ac14dd..96a52273b0 100644 --- a/src/mesa/pipe/cell/ppu/cell_state_fs.c +++ b/src/mesa/pipe/cell/ppu/cell_state_fs.c @@ -45,7 +45,7 @@ void * cell_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { - struct cell_context *cell = cell_context(pipe); + /*struct cell_context *cell = cell_context(pipe);*/ struct cell_fragment_shader_state *state; state = CALLOC_STRUCT(cell_fragment_shader_state); @@ -94,8 +94,6 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs) void cell_delete_fs_state(struct pipe_context *pipe, void *fs) { - struct cell_context *cell = cell_context(pipe); - struct cell_fragment_shader_state *state = (struct cell_fragment_shader_state *) fs; diff --git a/src/mesa/pipe/cell/ppu/cell_state_sampler.c b/src/mesa/pipe/cell/ppu/cell_state_sampler.c index ae1eeb4620..ade6cc8338 100644 --- a/src/mesa/pipe/cell/ppu/cell_state_sampler.c +++ b/src/mesa/pipe/cell/ppu/cell_state_sampler.c @@ -30,21 +30,17 @@ */ #include "pipe/p_util.h" +#include "pipe/draw/draw_context.h" #include "cell_context.h" #include "cell_state.h" -#if 0 #include "cell_texture.h" -#include "cell_tile_cache.h" -#endif void * cell_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *sampler) { - struct pipe_sampler_state *state = MALLOC( sizeof(struct pipe_sampler_state) ); - memcpy(state, sampler, sizeof(struct pipe_sampler_state)); - return state; + return mem_dup(sampler, sizeof(*sampler)); } void @@ -53,6 +49,8 @@ cell_bind_sampler_state(struct pipe_context *pipe, { struct cell_context *cell = cell_context(pipe); + draw_flush(cell->draw); + assert(unit < PIPE_MAX_SAMPLERS); cell->sampler[unit] = (struct pipe_sampler_state *)sampler; @@ -76,7 +74,11 @@ cell_set_sampler_texture(struct pipe_context *pipe, { struct cell_context *cell = cell_context(pipe); + draw_flush(cell->draw); + cell->texture[sampler] = texture; + cell_update_texture_mapping(cell); + cell->dirty |= CELL_NEW_TEXTURE; } diff --git a/src/mesa/pipe/cell/ppu/cell_texture.c b/src/mesa/pipe/cell/ppu/cell_texture.c index 0a8190d983..df178d9ca2 100644 --- a/src/mesa/pipe/cell/ppu/cell_texture.c +++ b/src/mesa/pipe/cell/ppu/cell_texture.c @@ -79,31 +79,30 @@ cell_texture_layout(struct cell_texture * spt) } -void -cell_texture_create(struct pipe_context *pipe, struct pipe_texture **pt) +struct pipe_texture * +cell_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) { - struct cell_texture *spt = REALLOC(*pt, sizeof(struct pipe_texture), - sizeof(struct cell_texture)); + struct cell_texture *spt = CALLOC_STRUCT(cell_texture); + if (!spt) + return NULL; - if (spt) { - memset(&spt->base + 1, 0, - sizeof(struct cell_texture) - sizeof(struct pipe_texture)); + spt->base = *templat; - cell_texture_layout(spt); + cell_texture_layout(spt); - spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32, - PIPE_BUFFER_USAGE_PIXEL, - spt->buffer_size); + spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32, + PIPE_BUFFER_USAGE_PIXEL, + spt->buffer_size); - if (!spt->buffer) { - FREE(spt); - spt = NULL; - } + if (!spt->buffer) { + FREE(spt); + return NULL; } - *pt = &spt->base; + return &spt->base; } + void cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) { @@ -163,3 +162,91 @@ cell_get_tex_surface(struct pipe_context *pipe, } return ps; } + + + +static void +tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src) +{ + const uint tile_size2 = tile_size * tile_size; + const uint h_t = h / tile_size, w_t = w / tile_size; + + uint it, jt; /* tile counters */ + uint i, j; /* intra-tile counters */ + + for (it = 0; it < h_t; it++) { + for (jt = 0; jt < w_t; jt++) { + /* fill in tile (i, j) */ + uint *tdst = dst + (it * w_t + jt) * tile_size2; + for (i = 0; i < tile_size; i++) { + for (j = 0; j < tile_size; j++) { + const uint srci = it * tile_size + i; + const uint srcj = jt * tile_size + j; + *tdst++ = src[srci * h + srcj]; + } + } + } + } +} + + + +/** + * Convert linear texture image data to tiled format for SPU usage. + */ +static void +cell_tile_texture(struct cell_context *cell, + struct cell_texture *texture) +{ + uint face = 0, level = 0, zslice = 0; + struct pipe_surface *surf; + const uint w = texture->base.width[0], h = texture->base.height[0]; + const uint *src; + + /* temporary restrictions: */ + assert(w >= TILE_SIZE); + assert(h >= TILE_SIZE); + assert(w % TILE_SIZE == 0); + assert(h % TILE_SIZE == 0); + + surf = cell_get_tex_surface(&cell->pipe, &texture->base, face, level, zslice); + ASSERT(surf); + + src = (const uint *) pipe_surface_map(surf); + + if (texture->tiled_data) { + align_free(texture->tiled_data); + } + texture->tiled_data = align_malloc(w * h * 4, 16); + + tile_copy_data(w, h, TILE_SIZE, texture->tiled_data, src); + + pipe_surface_unmap(surf); + + pipe_surface_reference(&surf, NULL); +} + + + +void +cell_update_texture_mapping(struct cell_context *cell) +{ + uint face = 0, level = 0, zslice = 0; + + if (cell->texture[0]) + cell_tile_texture(cell, cell->texture[0]); +#if 0 + if (cell->tex_surf && cell->tex_map) { + pipe_surface_unmap(cell->tex_surf); + cell->tex_map = NULL; + } + + /* XXX free old surface */ + + cell->tex_surf = cell_get_tex_surface(&cell->pipe, + &cell->texture[0]->base, + face, level, zslice); + + cell->tex_map = pipe_surface_map(cell->tex_surf); +#endif +} diff --git a/src/mesa/pipe/cell/ppu/cell_texture.h b/src/mesa/pipe/cell/ppu/cell_texture.h index ef5808c086..0264fed88e 100644 --- a/src/mesa/pipe/cell/ppu/cell_texture.h +++ b/src/mesa/pipe/cell/ppu/cell_texture.h @@ -46,6 +46,8 @@ struct cell_texture */ struct pipe_buffer *buffer; unsigned long buffer_size; + + void *tiled_data; /* XXX this may be temporary */ /*ALIGN16*/ }; @@ -58,8 +60,9 @@ cell_texture(struct pipe_texture *pt) -extern void -cell_texture_create(struct pipe_context *pipe, struct pipe_texture **pt); +extern struct pipe_texture * +cell_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat); extern void cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); @@ -70,4 +73,8 @@ cell_get_tex_surface(struct pipe_context *pipe, unsigned face, unsigned level, unsigned zslice); +extern void +cell_update_texture_mapping(struct cell_context *cell); + + #endif /* CELL_TEXTURE */ diff --git a/src/mesa/pipe/cell/ppu/cell_vbuf.c b/src/mesa/pipe/cell/ppu/cell_vbuf.c index ee572b3a51..e9fafe492e 100644 --- a/src/mesa/pipe/cell/ppu/cell_vbuf.c +++ b/src/mesa/pipe/cell/ppu/cell_vbuf.c @@ -39,8 +39,7 @@ #include "pipe/draw/draw_vbuf.h" -/** Allow prim indexes, verts to be inlined after RENDER command */ -#define ALLOW_INLINE_INDEXES 1 +/** Allow vertex data to be inlined after RENDER command */ #define ALLOW_INLINE_VERTS 1 @@ -52,9 +51,10 @@ struct cell_vbuf_render { struct vbuf_render base; struct cell_context *cell; - uint prim; - uint vertex_size; - void *vertex_buffer; + uint prim; /**< PIPE_PRIM_x */ + uint vertex_size; /**< in bytes */ + void *vertex_buffer; /**< just for debug, really */ + uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ }; @@ -81,14 +81,46 @@ cell_vbuf_allocate_vertices(struct vbuf_render *vbr, { struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/ - assert(!cvbr->vertex_buffer); - cvbr->vertex_buffer = align_malloc(vertex_size * nr_vertices, 16); + + assert(cvbr->vertex_buf == ~0); + cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell); + cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf]; cvbr->vertex_size = vertex_size; return cvbr->vertex_buffer; } static void +cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, + unsigned vertex_size, unsigned vertices_used) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + struct cell_context *cell = cvbr->cell; + + /* + printf("%s vertex_buf = %u count = %u\n", + __FUNCTION__, cvbr->vertex_buf, vertices_used); + */ + + /* Tell SPUs they can release the vert buf */ + if (cvbr->vertex_buf != ~0U) { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) + cell_batch_alloc(cell, sizeof(struct cell_command_release_verts)); + release->opcode = CELL_CMD_RELEASE_VERTS; + release->vertex_buf = cvbr->vertex_buf; + } + + cvbr->vertex_buf = ~0; + cell_flush_int(&cell->pipe, 0x0); + + assert(vertices == cvbr->vertex_buffer); + cvbr->vertex_buffer = NULL; +} + + + +static void cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); @@ -106,17 +138,24 @@ cell_vbuf_draw(struct vbuf_render *vbr, struct cell_context *cell = cvbr->cell; float xmin, ymin, xmax, ymax; uint i; - uint nr_vertices = 0; + uint nr_vertices = 0, min_index = ~0; const void *vertices = cvbr->vertex_buffer; const uint vertex_size = cvbr->vertex_size; for (i = 0; i < nr_indices; i++) { if (indices[i] > nr_vertices) nr_vertices = indices[i]; + if (indices[i] < min_index) + min_index = indices[i]; } nr_vertices++; #if 0 + /*if (min_index > 0)*/ + printf("%s min_index = %u\n", __FUNCTION__, min_index); +#endif + +#if 0 printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n", nr_indices, nr_vertices); printf(" "); @@ -137,7 +176,7 @@ cell_vbuf_draw(struct vbuf_render *vbr, /* compute x/y bounding box */ xmin = ymin = 1e50; xmax = ymax = -1e50; - for (i = 0; i < nr_vertices; i++) { + for (i = min_index; i < nr_vertices; i++) { const float *v = (float *) ((ubyte *) vertices + i * vertex_size); if (v[0] < xmin) xmin = v[0]; @@ -148,83 +187,68 @@ cell_vbuf_draw(struct vbuf_render *vbr, if (v[1] > ymax) ymax = v[1]; } +#if 0 + printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax); + fflush(stdout); +#endif if (cvbr->prim != PIPE_PRIM_TRIANGLES) return; /* only render tris for now */ /* build/insert batch RENDER command */ { - const uint index_bytes = ROUNDUP4(nr_indices * 2); + const uint index_bytes = ROUNDUP8(nr_indices * 2); const uint vertex_bytes = nr_vertices * 4 * cell->vertex_info.size; + const uint batch_size = sizeof(struct cell_command_render) + index_bytes; struct cell_command_render *render = (struct cell_command_render *) - cell_batch_alloc(cell, sizeof(*render)); + cell_batch_alloc(cell, batch_size); + render->opcode = CELL_CMD_RENDER; render->prim_type = cvbr->prim; render->num_indexes = nr_indices; - if (ALLOW_INLINE_INDEXES && - index_bytes <= cell_batch_free_space(cell)) { - /* indices inlined, right after render cmd */ - void *dst = cell_batch_alloc(cell, index_bytes); - memcpy(dst, indices, nr_indices * 2); - render->inline_indexes = TRUE; - render->index_data = NULL; - } - else { - /* indices in separate buffer */ - render->inline_indexes = FALSE; - render->index_data = indices; - ASSERT_ALIGN16(render->index_data); - } + render->min_index = min_index; + + /* append indices after render command */ + memcpy(render + 1, indices, nr_indices * 2); + /* if there's room, append vertices after the indices, else leave + * vertices in the original/separate buffer. + */ render->vertex_size = 4 * cell->vertex_info.size; render->num_verts = nr_vertices; if (ALLOW_INLINE_VERTS && - render->inline_indexes && - vertex_bytes <= cell_batch_free_space(cell)) { - /* vertex data inlined, after indices */ - void *dst = cell_batch_alloc(cell, vertex_bytes); + min_index == 0 && + vertex_bytes + 16 <= cell_batch_free_space(cell)) { + /* vertex data inlined, after indices, at 16-byte boundary */ + void *dst = cell_batch_alloc_aligned(cell, vertex_bytes, 16); memcpy(dst, vertices, vertex_bytes); render->inline_verts = TRUE; - render->vertex_data = NULL; + render->vertex_buf = ~0; } else { + /* vertex data in separate buffer */ render->inline_verts = FALSE; - render->vertex_data = vertices; - ASSERT_ALIGN16(render->vertex_data); + ASSERT(cvbr->vertex_buf >= 0); + render->vertex_buf = cvbr->vertex_buf; } - render->xmin = xmin; render->ymin = ymin; render->xmax = xmax; render->ymax = ymax; } -#if 01 - /* XXX this is temporary */ +#if 0 + /* helpful for debug */ cell_flush_int(&cell->pipe, PIPE_FLUSH_WAIT); #endif } static void -cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, - unsigned vertex_size, unsigned vertices_used) -{ - struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); - - /*printf("Free verts %u * %u\n", vertex_size, vertices_used);*/ - align_free(vertices); - - assert(vertices == cvbr->vertex_buffer); - cvbr->vertex_buffer = NULL; -} - - -static void cell_vbuf_destroy(struct vbuf_render *vbr) { struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); @@ -244,8 +268,15 @@ cell_init_vbuf(struct cell_context *cell) cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render); - cell->vbuf_render->base.max_indices = CELL_MAX_VBUF_INDEXES; - cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_MAX_VBUF_SIZE; + /* The max number of indexes is what can fix into a batch buffer, + * minus the render and release-verts commands. + */ + cell->vbuf_render->base.max_indices + = (CELL_BUFFER_SIZE + - sizeof(struct cell_command_render) + - sizeof(struct cell_command_release_verts)) + / sizeof(ushort); + cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE; cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info; cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices; @@ -255,6 +286,9 @@ cell_init_vbuf(struct cell_context *cell) cell->vbuf_render->base.destroy = cell_vbuf_destroy; cell->vbuf_render->cell = cell; +#if 1 + cell->vbuf_render->vertex_buf = ~0; +#endif cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base); } diff --git a/src/mesa/pipe/cell/ppu/cell_vertex_shader.c b/src/mesa/pipe/cell/ppu/cell_vertex_shader.c new file mode 100644 index 0000000000..80dd500b34 --- /dev/null +++ b/src/mesa/pipe/cell/ppu/cell_vertex_shader.c @@ -0,0 +1,120 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file cell_vertex_shader.c + * Vertex shader interface routines for Cell. + * + * \author Ian Romanick <idr@us.ibm.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_spu.h" +#include "cell_batch.h" + +#include "pipe/cell/common.h" +#include "pipe/draw/draw_context.h" +#include "pipe/draw/draw_private.h" + +/** + * Run the vertex shader on all vertices in the vertex queue. + * Called by the draw module when the vertx cache needs to be flushed. + */ +void +cell_vertex_shader_queue_flush(struct draw_context *draw) +{ + struct cell_context *const cell = + (struct cell_context *) draw->driver_private; + struct cell_command_vs *const vs = &cell_global.command[0].vs; + uint64_t *batch; + struct cell_array_info *array_info; + unsigned i, j; + + assert(draw->vs.queue_nr != 0); + + /* XXX: do this on statechange: + */ + draw_update_vertex_fetch(draw); + + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); + + batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; + + array_info = (struct cell_array_info *) &batch[1]; + assert(draw->vertex_fetch.src_ptr[i] != NULL); + array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; + array_info->attr = i; + array_info->pitch = draw->vertex_fetch.pitch[i]; + array_info->format = draw->vertex_element[i].src_format; + } + + batch = cell_batch_alloc(cell, sizeof(batch[0]) + + sizeof(struct pipe_viewport_state)); + batch[0] = CELL_CMD_STATE_VIEWPORT; + (void) memcpy(&batch[1], &draw->viewport, + sizeof(struct pipe_viewport_state)); + + cell_batch_flush(cell); + + vs->opcode = CELL_CMD_VS_EXECUTE; + vs->shader.num_outputs = draw->num_vs_outputs; + vs->shader.declarations = (uintptr_t) draw->machine.Declarations; + vs->shader.num_declarations = draw->machine.NumDeclarations; + vs->shader.instructions = (uintptr_t) draw->machine.Instructions; + vs->shader.num_instructions = draw->machine.NumInstructions; + vs->shader.uniforms = (uintptr_t) draw->user.constants; + vs->shader.immediates = (uintptr_t) draw->machine.Imms; + vs->shader.num_immediates = draw->machine.ImmLimit / 4; + vs->nr_attrs = draw->vertex_fetch.nr_attrs; + + (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane)); + vs->nr_planes = draw->nr_planes; + + for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) { + const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i); + + for (j = 0; j < n; j++) { + vs->elts[j] = draw->vs.queue[i + j].elt; + vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].dest; + } + + for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { + vs->elts[j] = vs->elts[0]; + vs->vOut[j] = vs->vOut[0]; + } + + vs->num_elts = n; + send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); + + cell_flush_int(& cell->pipe, PIPE_FLUSH_WAIT); + } + + draw->vs.queue_nr = 0; +} diff --git a/src/mesa/pipe/cell/spu/Makefile b/src/mesa/pipe/cell/spu/Makefile index 417ae1b072..f202971d73 100644 --- a/src/mesa/pipe/cell/spu/Makefile +++ b/src/mesa/pipe/cell/spu/Makefile @@ -17,8 +17,15 @@ PROG_SPU_EMBED_O = $(PROG)_spu-embed.o SOURCES = \ spu_main.c \ + spu_blend.c \ + spu_render.c \ + spu_texture.c \ spu_tile.c \ - spu_tri.c + spu_tri.c \ + spu_exec.c \ + spu_util.c \ + spu_vertex_fetch.c \ + spu_vertex_shader.c SPU_OBJECTS = $(SOURCES:.c=.o) \ diff --git a/src/mesa/pipe/cell/spu/spu_blend.c b/src/mesa/pipe/cell/spu/spu_blend.c new file mode 100644 index 0000000000..23ec0eeb45 --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_blend.c @@ -0,0 +1,62 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "spu_main.h" +#include "spu_blend.h" +#include "spu_colorpack.h" + + +void +blend_quad(uint itx, uint ity, vector float colors[4]) +{ + /* simple SRC_ALPHA, ONE_MINUS_SRC_ALPHA blending */ + vector float fbc00 = spu_unpack_color(spu.ctile.ui[ity][itx]); + vector float fbc01 = spu_unpack_color(spu.ctile.ui[ity][itx+1]); + vector float fbc10 = spu_unpack_color(spu.ctile.ui[ity+1][itx]); + vector float fbc11 = spu_unpack_color(spu.ctile.ui[ity+1][itx+1]); + + vector float alpha00 = spu_splats(spu_extract(colors[0], 3)); + vector float alpha01 = spu_splats(spu_extract(colors[1], 3)); + vector float alpha10 = spu_splats(spu_extract(colors[2], 3)); + vector float alpha11 = spu_splats(spu_extract(colors[3], 3)); + + vector float one_minus_alpha00 = spu_sub(spu_splats(1.0f), alpha00); + vector float one_minus_alpha01 = spu_sub(spu_splats(1.0f), alpha01); + vector float one_minus_alpha10 = spu_sub(spu_splats(1.0f), alpha10); + vector float one_minus_alpha11 = spu_sub(spu_splats(1.0f), alpha11); + + colors[0] = spu_add(spu_mul(colors[0], alpha00), + spu_mul(fbc00, one_minus_alpha00)); + colors[1] = spu_add(spu_mul(colors[1], alpha01), + spu_mul(fbc01, one_minus_alpha01)); + colors[2] = spu_add(spu_mul(colors[2], alpha10), + spu_mul(fbc10, one_minus_alpha10)); + colors[3] = spu_add(spu_mul(colors[3], alpha11), + spu_mul(fbc11, one_minus_alpha11)); +} + diff --git a/src/mesa/pipe/cell/spu/spu_blend.h b/src/mesa/pipe/cell/spu/spu_blend.h new file mode 100644 index 0000000000..2b594b578b --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_blend.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_BLEND_H +#define SPU_BLEND_H + + +extern void +blend_quad(uint itx, uint ity, vector float colors[4]); + + +#endif /* SPU_BLEND_H */ diff --git a/src/mesa/pipe/cell/spu/spu_colorpack.h b/src/mesa/pipe/cell/spu/spu_colorpack.h new file mode 100644 index 0000000000..e9fee8a3a6 --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_colorpack.h @@ -0,0 +1,110 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#ifndef SPU_COLORPACK_H +#define SPU_COLORPACK_H + + +#include <spu_intrinsics.h> + + +static INLINE unsigned int +spu_pack_R8G8B8A8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + + out = spu_shuffle(out, out, ((vector unsigned char) { + 0, 4, 8, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }) ); + + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_A8R8G8B8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, ((vector unsigned char) { + 12, 0, 4, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}) ); + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_B8G8R8A8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, ((vector unsigned char) { + 8, 4, 0, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}) ); + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, shuffle); + return spu_extract(out, 0); +} + + +static INLINE vector float +spu_unpack_color(uint color) +{ + vector unsigned int color_u4 = spu_splats(color); + color_u4 = spu_shuffle(color_u4, color_u4, + ((vector unsigned char) { + 0, 0, 0, 0, + 5, 5, 5, 5, + 10, 10, 10, 10, + 15, 15, 15, 15}) ); + return spu_convtf(color_u4, 32); +} + + +static INLINE vector float +spu_unpack_A8R8G8B8(uint color) +{ + vector unsigned int color_u4 = spu_splats(color); + color_u4 = spu_shuffle(color_u4, color_u4, + ((vector unsigned char) { + 5, 5, 5, 5, + 10, 10, 10, 10, + 15, 15, 15, 15, + 0, 0, 0, 0}) ); + + return spu_convtf(color_u4, 32); +} + + +#endif /* SPU_COLORPACK_H */ diff --git a/src/mesa/pipe/cell/spu/spu_exec.c b/src/mesa/pipe/cell/spu/spu_exec.c new file mode 100644 index 0000000000..e51008b9b3 --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_exec.c @@ -0,0 +1,1948 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI interpretor/executor. + * + * Flow control information: + * + * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) + * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special + * care since a condition may be true for some quad components but false + * for other components. + * + * We basically execute all statements (even if they're in the part of + * an IF/ELSE clause that's "not taken") and use a special mask to + * control writing to destination registers. This is the ExecMask. + * See store_dest(). + * + * The ExecMask is computed from three other masks (CondMask, LoopMask and + * ContMask) which are controlled by the flow control instructions (namely: + * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). + * + * + * Authors: + * Michal Krol + * Brian Paul + */ + +#include <libmisc.h> +#include <spu_mfcio.h> +#include <transpose_matrix4x4.h> +#include <simdmath/ceilf4.h> +#include <simdmath/cosf4.h> +#include <simdmath/divf4.h> +#include <simdmath/floorf4.h> +#include <simdmath/log2f4.h> +#include <simdmath/powf4.h> +#include <simdmath/sinf4.h> +#include <simdmath/sqrtf4.h> +#include <simdmath/truncf4.h> + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" +#include "pipe/tgsi/util/tgsi_util.h" +#include "spu_exec.h" +#include "spu_main.h" +#include "spu_vertex_shader.h" + +#define TILE_TOP_LEFT 0 +#define TILE_TOP_RIGHT 1 +#define TILE_BOTTOM_LEFT 2 +#define TILE_BOTTOM_RIGHT 3 + +/* + * Shorthand locations of various utility registers (_I = Index, _C = Channel) + */ +#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I +#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C +#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I +#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C +#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I +#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C +#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I +#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C +#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C +#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I +#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C +#define TEMP_128_I TGSI_EXEC_TEMP_128_I +#define TEMP_128_C TGSI_EXEC_TEMP_128_C +#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I +#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C +#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I +#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C +#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I +#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C +#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I +#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +#define FOR_EACH_CHANNEL(CHAN)\ + for (CHAN = 0; CHAN < 4; CHAN++) + +#define IS_CHANNEL_ENABLED(INST, CHAN)\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IS_CHANNEL_ENABLED2(INST, CHAN)\ + ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + +#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED2( INST, CHAN )) + + +/** The execution mask depends on the conditional mask and the loop mask */ +#define UPDATE_EXEC_MASK(MACH) \ + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + + + +/** + * Initialize machine state by expanding tokens to full instructions, + * allocating temporary storage, setting up constants, etc. + * After this, we can call spu_exec_machine_run() many times. + */ +void +spu_exec_machine_init(struct spu_exec_machine *mach, + uint numSamplers, + struct spu_sampler *samplers, + unsigned processor) +{ + qword zero; + qword not_zero; + uint i; + + mach->Samplers = samplers; + mach->Processor = processor; + mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; + + zero = si_xor(zero, zero); + not_zero = si_xori(zero, 0xff); + + /* Setup constants. */ + mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; + mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; + mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); + mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); + + mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); + mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); + mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); + mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); +} + + +static INLINE qword +micro_abs(qword src) +{ + return si_rotmi(si_shli(src, 1), -1); +} + +static INLINE qword +micro_ceil(qword src) +{ + return (qword) _ceilf4((vec_float4) src); +} + +static INLINE qword +micro_cos(qword src) +{ + return (qword) _cosf4((vec_float4) src); +} + +static const qword br_shuf = { + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, +}; + +static const qword bl_shuf = { + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, +}; + +static const qword tl_shuf = { + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, +}; + +static qword +micro_ddx(qword src) +{ + qword bottom_right = si_shufb(src, src, br_shuf); + qword bottom_left = si_shufb(src, src, bl_shuf); + + return si_fs(bottom_right, bottom_left); +} + +static qword +micro_ddy(qword src) +{ + qword top_left = si_shufb(src, src, tl_shuf); + qword bottom_left = si_shufb(src, src, bl_shuf); + + return si_fs(top_left, bottom_left); +} + +static INLINE qword +micro_div(qword src0, qword src1) +{ + return (qword) _divf4((vec_float4) src0, (vec_float4) src1); +} + +static qword +micro_flr(qword src) +{ + return (qword) _floorf4((vec_float4) src); +} + +static qword +micro_frc(qword src) +{ + return si_fs(src, (qword) _floorf4((vec_float4) src)); +} + +static INLINE qword +micro_ge(qword src0, qword src1) +{ + return si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); +} + +static qword +micro_lg2(qword src) +{ + return (qword) _log2f4((vec_float4) src); +} + +static INLINE qword +micro_lt(qword src0, qword src1) +{ + const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); + + return si_xori(tmp, 0xff); +} + +static INLINE qword +micro_max(qword src0, qword src1) +{ + return si_selb(src1, src0, si_fcgt(src0, src1)); +} + +static INLINE qword +micro_min(qword src0, qword src1) +{ + return si_selb(src0, src1, si_fcgt(src0, src1)); +} + +static qword +micro_neg(qword src) +{ + return si_xor(src, (qword) spu_splats(0x80000000)); +} + +static qword +micro_set_sign(qword src) +{ + return si_or(src, (qword) spu_splats(0x80000000)); +} + +static qword +micro_pow(qword src0, qword src1) +{ + return (qword) _powf4((vec_float4) src0, (vec_float4) src1); +} + +static qword +micro_rnd(qword src) +{ + const qword half = (qword) spu_splats(0.5f); + + /* May be able to use _roundf4. There may be some difference, though. + */ + return (qword) _floorf4((vec_float4) si_fa(src, half)); +} + +static INLINE qword +micro_ishr(qword src0, qword src1) +{ + return si_rotma(src0, si_sfi(src1, 0)); +} + +static qword +micro_trunc(qword src) +{ + return (qword) _truncf4((vec_float4) src); +} + +static qword +micro_sin(qword src) +{ + return (qword) _sinf4((vec_float4) src); +} + +static INLINE qword +micro_sqrt(qword src) +{ + return (qword) _sqrtf4((vec_float4) src); +} + +static void +fetch_src_file_channel( + const struct spu_exec_machine *mach, + const uint file, + const uint swizzle, + const union spu_exec_channel *index, + union spu_exec_channel *chan ) +{ + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( file ) { + case TGSI_FILE_CONSTANT: { + unsigned char buffer[32] ALIGN16_ATTRIB; + unsigned i; + + for (i = 0; i < 4; i++) { + const float *ptr = mach->Consts[index->i[i]]; + const uint64_t addr = (uint64_t)(uintptr_t) ptr; + const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32; + + mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0); + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + (void) memcpy(& chan->f[i], &buffer[(addr & 0x0f) + + (sizeof(float) * swizzle)], sizeof(float)); + } + break; + } + + case TGSI_FILE_INPUT: + chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_TEMPORARY: + chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_IMMEDIATE: + assert( index->i[0] < (int) mach->ImmLimit ); + assert( index->i[1] < (int) mach->ImmLimit ); + assert( index->i[2] < (int) mach->ImmLimit ); + assert( index->i[3] < (int) mach->ImmLimit ); + + chan->f[0] = mach->Imms[index->i[0]][swizzle]; + chan->f[1] = mach->Imms[index->i[1]][swizzle]; + chan->f[2] = mach->Imms[index->i[2]][swizzle]; + chan->f[3] = mach->Imms[index->i[3]][swizzle]; + break; + + case TGSI_FILE_ADDRESS: + chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_OUTPUT: + /* vertex/fragment output vars can be read too */ + chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; + break; + + case TGSI_EXTSWIZZLE_ONE: + *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; + break; + + default: + assert( 0 ); + } +} + +static void +fetch_source( + const struct spu_exec_machine *mach, + union spu_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index ) +{ + union spu_exec_channel index; + uint swizzle; + + index.i[0] = + index.i[1] = + index.i[2] = + index.i[3] = reg->SrcRegister.Index; + + if (reg->SrcRegister.Indirect) { + union spu_exec_channel index2; + union spu_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle(®->SrcRegisterInd, + CHAN_X); + fetch_src_file_channel( + mach, + reg->SrcRegisterInd.File, + swizzle, + &index2, + &indir_index ); + + index.q = si_a(index.q, indir_index.q); + } + + if( reg->SrcRegister.Dimension ) { + switch( reg->SrcRegister.File ) { + case TGSI_FILE_INPUT: + index.q = si_mpyi(index.q, 17); + break; + case TGSI_FILE_CONSTANT: + index.q = si_shli(index.q, 12); + break; + default: + assert( 0 ); + } + + index.i[0] += reg->SrcRegisterDim.Index; + index.i[1] += reg->SrcRegisterDim.Index; + index.i[2] += reg->SrcRegisterDim.Index; + index.i[3] += reg->SrcRegisterDim.Index; + + if (reg->SrcRegisterDim.Indirect) { + union spu_exec_channel index2; + union spu_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterDimInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterDimInd.File, + swizzle, + &index2, + &indir_index ); + + index.q = si_a(index.q, indir_index.q); + } + } + + swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + fetch_src_file_channel( + mach, + reg->SrcRegister.File, + swizzle, + &index, + chan ); + + switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { + case TGSI_UTIL_SIGN_CLEAR: + chan->q = micro_abs(chan->q); + break; + + case TGSI_UTIL_SIGN_SET: + chan->q = micro_set_sign(chan->q); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + chan->q = micro_neg(chan->q); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } + + if (reg->SrcRegisterExtMod.Complement) { + chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); + } +} + +static void +store_dest( + struct spu_exec_machine *mach, + const union spu_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index ) +{ + union spu_exec_channel *dst; + + switch( reg->DstRegister.File ) { + case TGSI_FILE_NULL: + return; + + case TGSI_FILE_OUTPUT: + dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + + reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_TEMPORARY: + dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_ADDRESS: + dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; + break; + + default: + assert( 0 ); + return; + } + + switch (inst->Instruction.Saturate) + { + case TGSI_SAT_NONE: + if (mach->ExecMask & 0x1) + dst->i[0] = chan->i[0]; + if (mach->ExecMask & 0x2) + dst->i[1] = chan->i[1]; + if (mach->ExecMask & 0x4) + dst->i[2] = chan->i[2]; + if (mach->ExecMask & 0x8) + dst->i[3] = chan->i[3]; + break; + + case TGSI_SAT_ZERO_ONE: + /* XXX need to obey ExecMask here */ + dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + + default: + assert( 0 ); + } +} + +#define FETCH(VAL,INDEX,CHAN)\ + fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + +#define STORE(VAL,INDEX,CHAN)\ + store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + + +/** + * Execute ARB-style KIL which is predicated by a src register. + * Kill fragment if any of the four values is less than zero. + */ +static void +exec_kilp(struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint uniquemask; + uint chan_index; + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + union spu_exec_channel r[1]; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + for (chan_index = 0; chan_index < 4; chan_index++) + { + uint swizzle; + uint i; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle ( + &inst->FullSrcRegisters[0], + chan_index); + + /* check if the component has not been already tested */ + if (uniquemask & (1 << swizzle)) + continue; + uniquemask |= 1 << swizzle; + + FETCH(&r[0], 0, chan_index); + for (i = 0; i < 4; i++) + if (r[0].f[i] < 0.0f) + kilmask |= 1 << i; + } + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + + +/* + * Fetch a texel using STR texture coordinates. + */ +static void +fetch_texel( struct spu_sampler *sampler, + const union spu_exec_channel *s, + const union spu_exec_channel *t, + const union spu_exec_channel *p, + float lodbias, /* XXX should be float[4] */ + union spu_exec_channel *r, + union spu_exec_channel *g, + union spu_exec_channel *b, + union spu_exec_channel *a ) +{ + qword rgba[4]; + qword out[4]; + + sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, (float *) rgba); + + _transpose_matrix4x4(out, rgba); + r->q = out[0]; + g->q = out[1]; + b->q = out[2]; + a->q = out[3]; +} + + +static void +exec_tex(struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst, + boolean biasLod) +{ + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + union spu_exec_channel r[8]; + uint chan_index; + float lodBias; + + /* printf("Sampler %u unit %u\n", sampler, unit); */ + + switch (inst->InstructionExtTexture.Texture) { + case TGSI_TEXTURE_1D: + + FETCH(&r[0], 0, CHAN_X); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[1], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[1].q); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[1], 0, CHAN_W); + lodBias = r[2].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[3], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[3].q); + r[1].q = micro_div(r[1].q, r[3].q); + r[2].q = micro_div(r[2].q, r[3].q); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[3], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[3].q); + r[1].q = micro_div(r[1].q, r[3].q); + r[2].q = micro_div(r[2].q, r[3].q); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert (0); + } + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[chan_index], 0, chan_index ); + } +} + + + +static void +constant_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; + } +} + +static void +linear_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + mach->Inputs[attrib].xyzw[chan].f[0] = a0; + mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; + mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; + mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; +} + +static void +perspective_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + const float *w = mach->QuadPos.xyzw[3].f; + /* divide by W here */ + mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; + mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; + mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; + mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; +} + + +typedef void (* interpolation_func)( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ); + +static void +exec_declaration(struct spu_exec_machine *mach, + const struct tgsi_full_declaration *decl) +{ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + interpolation_func interp; + + assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); + + first = decl->u.DeclarationRange.First; + last = decl->u.DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + switch( decl->Interpolation.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + interp = constant_interpolation; + break; + + case TGSI_INTERPOLATE_LINEAR: + interp = linear_interpolation; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + interp = perspective_interpolation; + break; + + default: + assert( 0 ); + } + + if( mask == TGSI_WRITEMASK_XYZW ) { + unsigned i, j; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + interp( mach, i, j ); + } + } + } + else { + unsigned i, j; + + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + interp( mach, i, j ); + } + } + } + } + } + } +} + +static void +exec_instruction( + struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst, + int *pc ) +{ + uint chan_index; + union spu_exec_channel r[8]; + + (*pc)++; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_cflts(r[0].q, 0); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + /* TGSI_OPCODE_SWZ */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_X ); + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[1], 0, CHAN_Y ); + r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + + FETCH( &r[2], 0, CHAN_W ); + r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); + r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); + r[1].q = micro_pow(r[1].q, r[2].q); + + /* r0 = (r0 > 0.0) ? r1 : 0.0 + */ + r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, + r[0].q); + STORE( &r[0], 0, CHAN_Z ); + } + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_sqrt(r[0].q); + r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + assert (0); + break; + + case TGSI_OPCODE_LOG: + assert (0); + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = si_fm(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_fa(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + + FETCH( &r[1], 0, CHAN_Z ); + FETCH( &r[2], 1, CHAN_Z ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_W); + FETCH(&r[2], 1, CHAN_W); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + FETCH( &r[0], 0, CHAN_Y ); + FETCH( &r[1], 1, CHAN_Y); + r[0].q = si_fm(r[0].q, r[1].q); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_Z ); + STORE( &r[0], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + FETCH( &r[0], 1, CHAN_W ); + STORE( &r[0], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = micro_min(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = micro_max(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = micro_ge(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = micro_ge(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + FETCH( &r[2], 2, chan_index ); + r[0].q = si_fma(r[0].q, r[1].q, r[2].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = si_fs(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + r[1].q = si_fs(r[1].q, r[2].q); + r[0].q = si_fma(r[0].q, r[1].q, r[2].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_CND: + assert (0); + break; + + case TGSI_OPCODE_CND0: + assert (0); + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + assert (0); + break; + + case TGSI_OPCODE_INDEX: + assert (0); + break; + + case TGSI_OPCODE_NEGATE: + assert (0); + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_frc(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + assert (0); + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_flr(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_rnd(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH(&r[0], 0, CHAN_X); + + r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_lg2(r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = micro_pow(r[0].q, r[1].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + FETCH(&r[0], 0, CHAN_Y); + FETCH(&r[1], 1, CHAN_Z); + FETCH(&r[3], 0, CHAN_Z); + FETCH(&r[4], 1, CHAN_Y); + + /* r2 = (r0 * r1) - (r3 * r5) + */ + r[2].q = si_fm(r[3].q, r[5].q); + r[2].q = si_fms(r[0].q, r[1].q, r[2].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[2], 0, CHAN_X ); + } + + FETCH(&r[2], 1, CHAN_X); + FETCH(&r[5], 0, CHAN_X); + + /* r3 = (r3 * r2) - (r1 * r5) + */ + r[1].q = si_fm(r[1].q, r[5].q); + r[3].q = si_fms(r[3].q, r[2].q, r[1].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + STORE( &r[3], 0, CHAN_Y ); + } + + /* r5 = (r5 * r4) - (r0 * r2) + */ + r[0].q = si_fm(r[0].q, r[2].q); + r[5].q = si_fms(r[5].q, r[4].q, r[0].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[5], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + assert (0); + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + + r[0].q = micro_abs(r[0].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_RCC: + assert (0); + break; + + case TGSI_OPCODE_DPH: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 1, CHAN_W); + + r[0].q = si_fa(r[0].q, r[1].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH(&r[0], 0, CHAN_X); + + r[0].q = micro_cos(r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ddx(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDY: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ddy(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_KILP: + exec_kilp (mach, inst); + break; + + case TGSI_OPCODE_KIL: + /* for enabled ExecMask bits, set the killed bit */ + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; + break; + + case TGSI_OPCODE_PK2H: + assert (0); + break; + + case TGSI_OPCODE_PK2US: + assert (0); + break; + + case TGSI_OPCODE_PK4B: + assert (0); + break; + + case TGSI_OPCODE_PK4UB: + assert (0); + break; + + case TGSI_OPCODE_RFL: + assert (0); + break; + + case TGSI_OPCODE_SEQ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fceq(r[0].q, r[1].q); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SFL: + assert (0); + break; + + case TGSI_OPCODE_SGT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_fcgt(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SIN: + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_sin(r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fcgt(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SNE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fceq(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_STR: + assert (0); + break; + + case TGSI_OPCODE_TEX: + /* simple texture lookup */ + /* src[0] = texcoord */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE); + break; + + case TGSI_OPCODE_TXB: + /* Texture lookup with lod bias */ + /* src[0] = texcoord (src[0].w = load bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE); + break; + + case TGSI_OPCODE_TXD: + /* Texture lookup with explict partial derivatives */ + /* src[0] = texcoord */ + /* src[1] = d[strq]/dx */ + /* src[2] = d[strq]/dy */ + /* src[3] = sampler unit */ + assert (0); + break; + + case TGSI_OPCODE_TXL: + /* Texture lookup with explit LOD */ + /* src[0] = texcoord (src[0].w = load bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE); + break; + + case TGSI_OPCODE_UP2H: + assert (0); + break; + + case TGSI_OPCODE_UP2US: + assert (0); + break; + + case TGSI_OPCODE_UP4B: + assert (0); + break; + + case TGSI_OPCODE_UP4UB: + assert (0); + break; + + case TGSI_OPCODE_X2D: + assert (0); + break; + + case TGSI_OPCODE_ARA: + assert (0); + break; + + case TGSI_OPCODE_ARR: + assert (0); + break; + + case TGSI_OPCODE_BRA: + assert (0); + break; + + case TGSI_OPCODE_CAL: + /* skip the call if no execution channels are enabled */ + if (mach->ExecMask) { + /* do the call */ + + /* push the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + + assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; + + /* note that PC was already incremented above */ + mach->CallStack[mach->CallStackTop++] = *pc; + *pc = inst->InstructionExtLabel.Label; + } + break; + + case TGSI_OPCODE_RET: + mach->FuncMask &= ~mach->ExecMask; + UPDATE_EXEC_MASK(mach); + + if (mach->ExecMask == 0x0) { + /* really return now (otherwise, keep executing */ + + if (mach->CallStackTop == 0) { + /* returning from main() */ + *pc = -1; + return; + } + *pc = mach->CallStack[--mach->CallStackTop]; + + /* pop the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + UPDATE_EXEC_MASK(mach); + } + break; + + case TGSI_OPCODE_SSG: + assert (0); + break; + + case TGSI_OPCODE_CMP: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + /* r0 = (r0 < 0.0) ? r1 : r2 + */ + r[3].q = si_xor(r[3].q, r[3].q); + r[0].q = micro_lt(r[0].q, r[3].q); + r[0].q = si_selb(r[1].q, r[2].q, r[0].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_SCS: + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( &r[0], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + r[1].q = micro_cos(r[0].q); + STORE( &r[1], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + r[1].q = micro_sin(r[0].q); + STORE( &r[1], 0, CHAN_Y ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_NRM: + assert (0); + break; + + case TGSI_OPCODE_DIV: + assert( 0 ); + break; + + case TGSI_OPCODE_DP2: + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_IF: + /* push CondMask */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + FETCH( &r[0], 0, CHAN_X ); + /* update CondMask */ + if( ! r[0].u[0] ) { + mach->CondMask &= ~0x1; + } + if( ! r[0].u[1] ) { + mach->CondMask &= ~0x2; + } + if( ! r[0].u[2] ) { + mach->CondMask &= ~0x4; + } + if( ! r[0].u[3] ) { + mach->CondMask &= ~0x8; + } + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ELSE */ + break; + + case TGSI_OPCODE_ELSE: + /* invert CondMask wrt previous mask */ + { + uint prevMask; + assert(mach->CondStackTop > 0); + prevMask = mach->CondStack[mach->CondStackTop - 1]; + mach->CondMask = ~mach->CondMask & prevMask; + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ENDIF */ + } + break; + + case TGSI_OPCODE_ENDIF: + /* pop CondMask */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_END: + /* halt execution */ + *pc = -1; + break; + + case TGSI_OPCODE_REP: + assert (0); + break; + + case TGSI_OPCODE_ENDREP: + assert (0); + break; + + case TGSI_OPCODE_PUSHA: + assert (0); + break; + + case TGSI_OPCODE_POPA: + assert (0); + break; + + case TGSI_OPCODE_CEIL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ceil(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_I2F: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_csflt(r[0].q, 0); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_NOT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_xorbi(r[0].q, 0xff); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_TRUNC: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_trunc(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_shl(r[0].q, r[1].q); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = micro_ishr(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_AND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_and(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_OR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_or(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOD: + assert (0); + break; + + case TGSI_OPCODE_XOR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_xor(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SAD: + assert (0); + break; + + case TGSI_OPCODE_TXF: + assert (0); + break; + + case TGSI_OPCODE_TXQ: + assert (0); + break; + + case TGSI_OPCODE_EMIT: + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + break; + + case TGSI_OPCODE_ENDPRIM: + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + break; + + case TGSI_OPCODE_LOOP: + /* fall-through (for now) */ + case TGSI_OPCODE_BGNLOOP2: + /* push LoopMask and ContMasks */ + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + break; + + case TGSI_OPCODE_ENDLOOP: + /* fall-through (for now at least) */ + case TGSI_OPCODE_ENDLOOP2: + /* Restore ContMask, but don't pop */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; + if (mach->LoopMask) { + /* repeat loop: jump to instruction just past BGNLOOP */ + *pc = inst->InstructionExtLabel.Label + 1; + } + else { + /* exit loop: pop LoopMask */ + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + /* pop ContMask */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + } + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BRK: + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_CONT: + /* turn off cont channels for each enabled exec channel */ + mach->ContMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BGNSUB: + /* no-op */ + break; + + case TGSI_OPCODE_ENDSUB: + /* no-op */ + break; + + case TGSI_OPCODE_NOISE1: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE2: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE3: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE4: + assert( 0 ); + break; + + case TGSI_OPCODE_NOP: + break; + + default: + assert( 0 ); + } +} + + +/** + * Run TGSI interpreter. + * \return bitmask of "alive" quad components + */ +uint +spu_exec_machine_run( struct spu_exec_machine *mach ) +{ + uint i; + int pc = 0; + + mach->CondMask = 0xf; + mach->LoopMask = 0xf; + mach->ContMask = 0xf; + mach->FuncMask = 0xf; + mach->ExecMask = 0xf; + + mach->CondStackTop = 0; /* temporarily subvert this assertion */ + assert(mach->CondStackTop == 0); + assert(mach->LoopStackTop == 0); + assert(mach->ContStackTop == 0); + assert(mach->CallStackTop == 0); + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; + + if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; + mach->Primitives[0] = 0; + } + + + /* execute declarations (interpolants) */ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + for (i = 0; i < mach->NumDeclarations; i++) { + uint8_t buffer[sizeof(struct tgsi_full_declaration) + 32] ALIGN16_ATTRIB; + struct tgsi_full_declaration decl; + unsigned long decl_addr = (unsigned long) (mach->Declarations+i); + unsigned size = ((sizeof(decl) + (decl_addr & 0x0f) + 0x0f) & ~0x0f); + + mfc_get(buffer, decl_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); + wait_on_mask(1 << TAG_INSTRUCTION_FETCH); + + memcpy(& decl, buffer + (decl_addr & 0x0f), sizeof(decl)); + exec_declaration( mach, &decl ); + } + } + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + uint8_t buffer[sizeof(struct tgsi_full_instruction) + 32] ALIGN16_ATTRIB; + struct tgsi_full_instruction inst; + unsigned long inst_addr = (unsigned long) (mach->Instructions + pc); + unsigned size = ((sizeof(inst) + (inst_addr & 0x0f) + 0x0f) & ~0x0f); + + assert(pc < mach->NumInstructions); + mfc_get(buffer, inst_addr & ~0x0f, size, TAG_INSTRUCTION_FETCH, 0, 0); + wait_on_mask(1 << TAG_INSTRUCTION_FETCH); + + memcpy(& inst, buffer + (inst_addr & 0x0f), sizeof(inst)); + exec_instruction( mach, & inst, &pc ); + } + +#if 0 + /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + /* + * Scale back depth component. + */ + for (i = 0; i < 4; i++) + mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; + } +#endif + + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; +} + + diff --git a/src/mesa/pipe/cell/spu/spu_exec.h b/src/mesa/pipe/cell/spu/spu_exec.h new file mode 100644 index 0000000000..b4c7661ef6 --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_exec.h @@ -0,0 +1,172 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#if !defined SPU_EXEC_H +#define SPU_EXEC_H + +#include "pipe/p_compiler.h" +#include "pipe/tgsi/exec/tgsi_exec.h" + +#if defined __cplusplus +extern "C" { +#endif + +/** + * Registers may be treated as float, signed int or unsigned int. + */ +union spu_exec_channel +{ + float f[QUAD_SIZE]; + int i[QUAD_SIZE]; + unsigned u[QUAD_SIZE]; + qword q; +}; + +/** + * A vector[RGBA] of channels[4 pixels] + */ +struct spu_exec_vector +{ + union spu_exec_channel xyzw[NUM_CHANNELS]; +}; + +/** + * For fragment programs, information for computing fragment input + * values from plane equation of the triangle/line. + */ +struct spu_interp_coef +{ + float a0[NUM_CHANNELS]; /* in an xyzw layout */ + float dadx[NUM_CHANNELS]; + float dady[NUM_CHANNELS]; +}; + + +struct softpipe_tile_cache; /**< Opaque to TGSI */ + +/** + * Information for sampling textures, which must be implemented + * by code outside the TGSI executor. + */ +struct spu_sampler +{ + const struct pipe_sampler_state *state; + struct pipe_texture *texture; + /** Get samples for four fragments in a quad */ + void (*get_samples)(struct spu_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + void *pipe; /*XXX temporary*/ + struct softpipe_tile_cache *cache; +}; + + +/** + * Run-time virtual machine state for executing TGSI shader. + */ +struct spu_exec_machine +{ + /* + * 32 program temporaries + * 4 internal temporaries + * 1 address + */ + struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS + + TGSI_EXEC_NUM_ADDRS + 1] + ALIGN16_ATTRIB; + + struct spu_exec_vector *Addrs; + + struct spu_sampler *Samplers; + + float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; + unsigned ImmLimit; + float (*Consts)[4]; + struct spu_exec_vector *Inputs; + struct spu_exec_vector *Outputs; + unsigned Processor; + + /* GEOMETRY processor only. */ + unsigned *Primitives; + + /* FRAGMENT processor only. */ + const struct spu_interp_coef *InterpCoefs; + struct spu_exec_vector QuadPos; + + /* Conditional execution masks */ + uint CondMask; /**< For IF/ELSE/ENDIF */ + uint LoopMask; /**< For BGNLOOP/ENDLOOP */ + uint ContMask; /**< For loop CONT statements */ + uint FuncMask; /**< For function calls */ + uint ExecMask; /**< = CondMask & LoopMask */ + + /** Condition mask stack (for nested conditionals) */ + uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; + int CondStackTop; + + /** Loop mask stack (for nested loops) */ + uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopStackTop; + + /** Loop continue mask stack (see comments in tgsi_exec.c) */ + uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int ContStackTop; + + /** Function execution mask stack (for executing subroutine code) */ + uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; + int FuncStackTop; + + /** Function call stack for saving/restoring the program counter */ + uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; + int CallStackTop; + + struct tgsi_full_instruction *Instructions; + uint NumInstructions; + + struct tgsi_full_declaration *Declarations; + uint NumDeclarations; +}; + + +extern void +spu_exec_machine_init(struct spu_exec_machine *mach, + uint numSamplers, + struct spu_sampler *samplers, + unsigned processor); + +extern uint +spu_exec_machine_run( struct spu_exec_machine *mach ); + + +#if defined __cplusplus +} /* extern "C" */ +#endif + +#endif /* SPU_EXEC_H */ diff --git a/src/mesa/pipe/cell/spu/spu_main.c b/src/mesa/pipe/cell/spu/spu_main.c index 0c83900a18..e375197fe6 100644 --- a/src/mesa/pipe/cell/spu/spu_main.c +++ b/src/mesa/pipe/cell/spu/spu_main.c @@ -31,11 +31,13 @@ #include <stdio.h> #include <libmisc.h> -#include <spu_mfcio.h> #include "spu_main.h" -#include "spu_tri.h" +#include "spu_render.h" +#include "spu_texture.h" #include "spu_tile.h" +//#include "spu_test.h" +#include "spu_vertex_shader.h" #include "pipe/cell/common.h" #include "pipe/p_defines.h" @@ -46,28 +48,37 @@ helpful headers: /opt/ibm/cell-sdk/prototype/sysroot/usr/include/libmisc.h */ -static boolean Debug = FALSE; +boolean Debug = FALSE; struct spu_global spu; +struct spu_vs_context draw; -void -wait_on_mask(unsigned tagMask) +/** + * Tell the PPU that this SPU has finished copying a buffer to + * local store and that it may be reused by the PPU. + * This is done by writting a 16-byte batch-buffer-status block back into + * main memory (in cell_context->buffer_status[]). + */ +static void +release_buffer(uint buffer) { - mfc_write_tag_mask( tagMask ); - /* wait for completion of _any_ DMAs specified by tagMask */ - mfc_read_tag_status_any(); -} + /* Evidently, using less than a 16-byte status doesn't work reliably */ + static const uint status[4] ALIGN16_ATTRIB + = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; + const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); + uint *dst = spu.init.buffer_status + index; -static void -wait_on_mask_all(unsigned tagMask) -{ - mfc_write_tag_mask( tagMask ); - /* wait for completion of _any_ DMAs specified by tagMask */ - mfc_read_tag_status_all(); -} + ASSERT(buffer < CELL_NUM_BUFFERS); + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_MISC, /* tag is unimportant */ + 0, /* tid */ + 0 /* rid */); +} /** @@ -81,24 +92,24 @@ really_clear_tiles(uint surfaceIndex) uint i; if (surfaceIndex == 0) { - clear_c_tile(&ctile); + clear_c_tile(&spu.ctile); for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { uint tx = i % spu.fb.width_tiles; uint ty = i / spu.fb.width_tiles; - if (tile_status[ty][tx] == TILE_STATUS_CLEAR) { - put_tile(tx, ty, &ctile, TAG_SURFACE_CLEAR, 0); + if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); } } } else { - clear_z_tile(&ztile); + clear_z_tile(&spu.ztile); for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { uint tx = i % spu.fb.width_tiles; uint ty = i / spu.fb.width_tiles; - if (tile_status_z[ty][tx] == TILE_STATUS_CLEAR) - put_tile(tx, ty, &ctile, TAG_SURFACE_CLEAR, 1); + if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); } } @@ -122,11 +133,11 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) #if CLEAR_OPT /* set all tile's status to CLEAR */ if (clear->surface == 0) { - memset(tile_status, TILE_STATUS_CLEAR, sizeof(tile_status)); + memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); spu.fb.color_clear_value = clear->value; } else { - memset(tile_status_z, TILE_STATUS_CLEAR, sizeof(tile_status_z)); + memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); spu.fb.depth_clear_value = clear->value; } return; @@ -134,11 +145,11 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) if (clear->surface == 0) { spu.fb.color_clear_value = clear->value; - clear_c_tile(&ctile); + clear_c_tile(&spu.ctile); } else { spu.fb.depth_clear_value = clear->value; - clear_z_tile(&ztile); + clear_z_tile(&spu.ztile); } /* @@ -150,9 +161,9 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) uint tx = i % spu.fb.width_tiles; uint ty = i / spu.fb.width_tiles; if (clear->surface == 0) - put_tile(tx, ty, &ctile, TAG_SURFACE_CLEAR, 0); + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); else - put_tile(tx, ty, &ztile, TAG_SURFACE_CLEAR, 1); + put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); /* XXX we don't want this here, but it fixes bad tile results */ } @@ -165,229 +176,14 @@ cmd_clear_surface(const struct cell_command_clear_surface *clear) } -/** - * Given a rendering command's bounding box (in pixels) compute the - * location of the corresponding screen tile bounding box. - */ -static INLINE void -tile_bounding_box(const struct cell_command_render *render, - uint *txmin, uint *tymin, - uint *box_num_tiles, uint *box_width_tiles) -{ -#if 1 - /* Debug: full-window bounding box */ - uint txmax = spu.fb.width_tiles - 1; - uint tymax = spu.fb.height_tiles - 1; - *txmin = 0; - *tymin = 0; - *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - *box_width_tiles = spu.fb.width_tiles; - (void) render; - (void) txmax; - (void) tymax; -#else - uint txmax, tymax, box_height_tiles; - - *txmin = (uint) render->xmin / TILE_SIZE; - *tymin = (uint) render->ymin / TILE_SIZE; - txmax = (uint) render->xmax / TILE_SIZE; - tymax = (uint) render->ymax / TILE_SIZE; - *box_width_tiles = txmax - *txmin + 1; - box_height_tiles = tymax - *tymin + 1; - *box_num_tiles = *box_width_tiles * box_height_tiles; -#endif -#if 0 - printf("Render bounds: %g, %g ... %g, %g\n", - render->xmin, render->ymin, render->xmax, render->ymax); - printf("Render tiles: %u, %u .. %u, %u\n", *txmin, *tymin, txmax, tymax); -#endif -} - - -/** - * Render primitives - * \param pos_incr returns value indicating how may words to skip after - * this command in the batch buffer - */ static void -cmd_render(const struct cell_command_render *render, uint *pos_incr) +cmd_release_verts(const struct cell_command_release_verts *release) { - /* we'll DMA into these buffers */ - ubyte vertex_data[CELL_MAX_VBUF_SIZE] ALIGN16_ATTRIB; - ushort index_data[CELL_MAX_VBUF_INDEXES] ALIGN16_ATTRIB; - const uint vertex_size = render->vertex_size; /* in bytes */ - const uint total_vertex_bytes = render->num_verts * vertex_size; - const ubyte *vertices; - const ushort *indexes; - uint mask; - uint i, j; - - - if (Debug) { - printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " - "inline_vert=%u inline_ind=%u\n", - spu.init.id, - render->prim_type, - render->num_verts, - render->num_indexes, - render->inline_verts, - render->inline_indexes); - - /* - printf(" bound: %g, %g .. %g, %g\n", - render->xmin, render->ymin, render->xmax, render->ymax); - */ - printf("SPU %u: indices at %p vertices at %p\n", - spu.init.id, - render->index_data, render->vertex_data); - } - - ASSERT(sizeof(*render) % 4 == 0); - ASSERT_ALIGN16(render->vertex_data); - ASSERT_ALIGN16(render->index_data); - - - /** - ** Get vertex, index buffers if not inlined - **/ - if (!render->inline_verts) { - ASSERT(total_vertex_bytes % 16 == 0); - - mfc_get(vertex_data, /* dest */ - (unsigned int) render->vertex_data, /* src */ - total_vertex_bytes, /* size */ - TAG_VERTEX_BUFFER, - 0, /* tid */ - 0 /* rid */); - - vertices = vertex_data; - } - - if (!render->inline_indexes) { - uint total_index_bytes; - - *pos_incr = 0; - - total_index_bytes = render->num_indexes * sizeof(ushort); - if (total_index_bytes < 16) - total_index_bytes = 16; - else - total_index_bytes = ROUNDUP16(total_index_bytes); - - indexes = index_data; - - /* get index data from main memory */ - mfc_get(index_data, /* dest */ - (unsigned int) render->index_data, /* src */ - total_index_bytes, - TAG_INDEX_BUFFER, - 0, /* tid */ - 0 /* rid */); - } - - - /** - ** Get pointers to inlined indexes, verts, if present - **/ - if (render->inline_indexes) { - /* indexes are right after the render command in the batch buffer */ - indexes = (ushort *) (render + 1); - *pos_incr = (render->num_indexes * 2 + 3) / 4; - - if (render->inline_verts) { - /* vertices are after indexes, if inlined */ - vertices = (const ubyte *) (render + 1) + *pos_incr * 4; - *pos_incr = *pos_incr + total_vertex_bytes / 4; - } - } - - - /* wait for vertex and/or index buffers if not inlined */ - mask = 0x0; - if (!render->inline_verts) - mask |= (1 << TAG_VERTEX_BUFFER); - if (!render->inline_indexes) - mask |= (1 << TAG_INDEX_BUFFER); - wait_on_mask_all(mask); - - - /** - ** find tiles which intersect the prim bounding box - **/ - uint txmin, tymin, box_width_tiles, box_num_tiles; -#if 0 - tile_bounding_box(render, &txmin, &tymin, - &box_num_tiles, &box_width_tiles); -#else - txmin = 0; - tymin = 0; - box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - box_width_tiles = spu.fb.width_tiles; -#endif - - /* make sure any pending clears have completed */ - wait_on_mask(1 << TAG_SURFACE_CLEAR); - - - /** - ** loop over tiles, rendering tris - **/ - for (i = spu.init.id; i < box_num_tiles; i += spu.init.num_spus) { - const uint tx = txmin + i % box_width_tiles; - const uint ty = tymin + i / box_width_tiles; - - ASSERT(tx < spu.fb.width_tiles); - ASSERT(ty < spu.fb.height_tiles); - - /* Start fetching color/z tiles. We'll wait for completion when - * we need read/write to them later in triangle rasterization. - */ - if (spu.depth_stencil.depth.enabled) { - if (tile_status_z[ty][tx] != TILE_STATUS_CLEAR) { - get_tile(tx, ty, &ztile, TAG_READ_TILE_Z, 1); - } - } - - if (tile_status[ty][tx] != TILE_STATUS_CLEAR) { - get_tile(tx, ty, &ctile, TAG_READ_TILE_COLOR, 0); - } - - ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); - ASSERT(render->num_indexes % 3 == 0); - - /* loop over tris */ - for (j = 0; j < render->num_indexes; j += 3) { - const float *v0, *v1, *v2; - - v0 = (const float *) (vertices + indexes[j+0] * vertex_size); - v1 = (const float *) (vertices + indexes[j+1] * vertex_size); - v2 = (const float *) (vertices + indexes[j+2] * vertex_size); - - tri_draw(v0, v1, v2, tx, ty); - } - - /* write color/z tiles back to main framebuffer, if dirtied */ - if (tile_status[ty][tx] == TILE_STATUS_DIRTY) { - put_tile(tx, ty, &ctile, TAG_WRITE_TILE_COLOR, 0); - tile_status[ty][tx] = TILE_STATUS_DEFINED; - } - if (spu.depth_stencil.depth.enabled) { - if (tile_status_z[ty][tx] == TILE_STATUS_DIRTY) { - put_tile(tx, ty, &ztile, TAG_WRITE_TILE_Z, 1); - tile_status_z[ty][tx] = TILE_STATUS_DEFINED; - } - } - - /* XXX move these... */ - wait_on_mask(1 << TAG_WRITE_TILE_COLOR); - if (spu.depth_stencil.depth.enabled) { - wait_on_mask(1 << TAG_WRITE_TILE_Z); - } - } - if (Debug) - printf("SPU %u: RENDER done\n", - spu.init.id); + printf("SPU %u: RELEASE VERTS %u\n", + spu.init.id, release->vertex_buf); + ASSERT(release->vertex_buf != ~0U); + release_buffer(release->vertex_buf); } @@ -421,6 +217,29 @@ cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) spu.fb.zsize = 2; else spu.fb.zsize = 0; + + if (spu.fb.color_format == PIPE_FORMAT_A8R8G8B8_UNORM) + spu.color_shuffle = ((vector unsigned char) { + 12, 0, 4, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}); + else if (spu.fb.color_format == PIPE_FORMAT_B8G8R8A8_UNORM) + spu.color_shuffle = ((vector unsigned char) { + 8, 4, 0, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}); + else + ASSERT(0); +} + + +static void +cmd_state_blend(const struct pipe_blend_state *state) +{ + if (Debug) + printf("SPU %u: BLEND: enabled %d\n", + spu.init.id, + state->blend_enable); + + memcpy(&spu.blend, state, sizeof(*state)); } @@ -444,19 +263,53 @@ cmd_state_sampler(const struct pipe_sampler_state *state) spu.init.id); memcpy(&spu.sampler[0], state, sizeof(*state)); + if (spu.sampler[0].min_img_filter == PIPE_TEX_FILTER_LINEAR) + spu.sample_texture = sample_texture_bilinear; + else + spu.sample_texture = sample_texture_nearest; } static void -cmd_state_vertex_info(const struct vertex_info *vinfo) +cmd_state_texture(const struct cell_command_texture *texture) { if (Debug) + printf("SPU %u: TEXTURE at %p size %u x %u\n", + spu.init.id, texture->start, texture->width, texture->height); + + memcpy(&spu.texture, texture, sizeof(*texture)); + spu.tex_size = (vector float) + { spu.texture.width, spu.texture.height, 0.0, 0.0}; + spu.tex_size_mask = (vector unsigned int) + { spu.texture.width - 1, spu.texture.height - 1, 0, 0 }; +} + + +static void +cmd_state_vertex_info(const struct vertex_info *vinfo) +{ + if (Debug) { printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id, vinfo->num_attribs); + } + ASSERT(vinfo->num_attribs >= 1); + ASSERT(vinfo->num_attribs <= 8); memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); } +static void +cmd_state_vs_array_info(const struct cell_array_info *vs_info) +{ + const unsigned attr = vs_info->attr; + + ASSERT(attr < PIPE_ATTRIB_MAX); + draw.vertex_fetch.src_ptr[attr] = vs_info->base; + draw.vertex_fetch.pitch[attr] = vs_info->pitch; + draw.vertex_fetch.format[attr] = vs_info->format; + draw.vertex_fetch.dirty = 1; +} + static void cmd_finish(void) @@ -473,38 +326,6 @@ cmd_finish(void) /** - * Tell the PPU that this SPU has finished copying a batch buffer to - * local store and that it may be reused by the PPU. - * This is done by writting a 16-byte batch-buffer-status block back into - * main memory (in cell_contex->buffer_status[]). - */ -static void -release_batch_buffer(uint buffer) -{ - /* Evidently, using less than a 16-byte status doesn't work reliably */ - static const uint status[4] ALIGN16_ATTRIB - = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; - - const uint index = 4 * (spu.init.id * CELL_NUM_BATCH_BUFFERS + buffer); - uint *dst = spu.init.buffer_status + index; - - ASSERT(buffer < CELL_NUM_BATCH_BUFFERS); - - /* - printf("SPU %u: Set batch status buf=%u, index %u, at %p to FREE\n", - spu.init.id, buffer, index, dst); - */ - - mfc_put((void *) &status, /* src in local memory */ - (unsigned int) dst, /* dst in main memory */ - sizeof(status), /* size */ - TAG_MISC, /* tag is unimportant */ - 0, /* tid */ - 0 /* rid */); -} - - -/** * Execute a batch of commands * The opcode param encodes the location of the buffer and its size. */ @@ -513,24 +334,24 @@ cmd_batch(uint opcode) { const uint buf = (opcode >> 8) & 0xff; uint size = (opcode >> 16); - uint buffer[CELL_BATCH_BUFFER_SIZE / 4] ALIGN16_ATTRIB; - const uint usize = size / sizeof(uint); + uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB; + const unsigned usize = size / sizeof(buffer[0]); uint pos; if (Debug) printf("SPU %u: BATCH buffer %u, len %u, from %p\n", - spu.init.id, buf, size, spu.init.batch_buffers[buf]); + spu.init.id, buf, size, spu.init.buffers[buf]); ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); - ASSERT_ALIGN16(spu.init.batch_buffers[buf]); + ASSERT_ALIGN16(spu.init.buffers[buf]); size = ROUNDUP16(size); - ASSERT_ALIGN16(spu.init.batch_buffers[buf]); + ASSERT_ALIGN16(spu.init.buffers[buf]); mfc_get(buffer, /* dest */ - (unsigned int) spu.init.batch_buffers[buf], /* src */ + (unsigned int) spu.init.buffers[buf], /* src */ size, TAG_BATCH_BUFFER, 0, /* tid */ @@ -538,7 +359,9 @@ cmd_batch(uint opcode) wait_on_mask(1 << TAG_BATCH_BUFFER); /* Tell PPU we're done copying the buffer to local store */ - release_batch_buffer(buf); + if (Debug) + printf("SPU %u: release batch buf %u\n", spu.init.id, buf); + release_buffer(buf); for (pos = 0; pos < usize; /* no incr */) { switch (buffer[pos]) { @@ -547,7 +370,7 @@ cmd_batch(uint opcode) struct cell_command_framebuffer *fb = (struct cell_command_framebuffer *) &buffer[pos]; cmd_state_framebuffer(fb); - pos += sizeof(*fb) / 4; + pos += sizeof(*fb) / 8; } break; case CELL_CMD_CLEAR_SURFACE: @@ -555,7 +378,7 @@ cmd_batch(uint opcode) struct cell_command_clear_surface *clr = (struct cell_command_clear_surface *) &buffer[pos]; cmd_clear_surface(clr); - pos += sizeof(*clr) / 4; + pos += sizeof(*clr) / 8; } break; case CELL_CMD_RENDER: @@ -564,28 +387,54 @@ cmd_batch(uint opcode) = (struct cell_command_render *) &buffer[pos]; uint pos_incr; cmd_render(render, &pos_incr); - pos += sizeof(*render) / 4 + pos_incr; + pos += pos_incr; + } + break; + case CELL_CMD_RELEASE_VERTS: + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) &buffer[pos]; + cmd_release_verts(release); + pos += sizeof(*release) / 8; } break; case CELL_CMD_FINISH: cmd_finish(); pos += 1; break; + case CELL_CMD_STATE_BLEND: + cmd_state_blend((struct pipe_blend_state *) + &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct pipe_blend_state)) / 8); + break; case CELL_CMD_STATE_DEPTH_STENCIL: cmd_state_depth_stencil((struct pipe_depth_stencil_alpha_state *) &buffer[pos+1]); - pos += (1 + sizeof(struct pipe_depth_stencil_alpha_state) / 4); + pos += (1 + ROUNDUP8(sizeof(struct pipe_depth_stencil_alpha_state)) / 8); break; case CELL_CMD_STATE_SAMPLER: cmd_state_sampler((struct pipe_sampler_state *) &buffer[pos+1]); - pos += (1 + sizeof(struct pipe_sampler_state) / 4); + pos += (1 + ROUNDUP8(sizeof(struct pipe_sampler_state)) / 8); + break; + case CELL_CMD_STATE_TEXTURE: + cmd_state_texture((struct cell_command_texture *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_command_texture)) / 8); break; case CELL_CMD_STATE_VERTEX_INFO: cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); - pos += (1 + sizeof(struct vertex_info) / 4); + pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8); + break; + case CELL_CMD_STATE_VIEWPORT: + (void) memcpy(& draw.viewport, &buffer[pos+1], + sizeof(struct pipe_viewport_state)); + pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); + break; + case CELL_CMD_STATE_VS_ARRAY_INFO: + cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); break; default: - printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, buffer[pos]); + printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); ASSERT(0); break; } @@ -633,31 +482,22 @@ main_loop(void) 0 /* rid */); wait_on_mask( 1 << tag ); + /* + * NOTE: most commands should be contained in a batch buffer + */ + switch (opcode & CELL_CMD_OPCODE_MASK) { case CELL_CMD_EXIT: if (Debug) printf("SPU %u: EXIT\n", spu.init.id); exitFlag = 1; break; - case CELL_CMD_STATE_FRAMEBUFFER: - cmd_state_framebuffer(&cmd.fb); - break; - case CELL_CMD_CLEAR_SURFACE: - cmd_clear_surface(&cmd.clear); - break; - case CELL_CMD_RENDER: - { - uint pos_incr; - cmd_render(&cmd.render, &pos_incr); - assert(pos_incr == 0); - } + case CELL_CMD_VS_EXECUTE: + spu_execute_vertex_shader(&draw, &cmd.vs); break; case CELL_CMD_BATCH: cmd_batch(opcode); break; - case CELL_CMD_FINISH: - cmd_finish(); - break; default: printf("Bad opcode!\n"); } @@ -673,11 +513,13 @@ main_loop(void) static void one_time_init(void) { - memset(tile_status, TILE_STATUS_DEFINED, sizeof(tile_status)); - memset(tile_status_z, TILE_STATUS_DEFINED, sizeof(tile_status_z)); + memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); + memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); + invalidate_tex_cache(); } + /* In some versions of the SDK the SPE main takes 'unsigned long' as a * parameter. In others it takes 'unsigned long long'. Use a define to * select between the two. @@ -698,6 +540,9 @@ main(main_param_t speid, main_param_t argp) (void) speid; + ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); + ASSERT(sizeof(struct cell_command_render) % 8 == 0); + one_time_init(); if (Debug) @@ -711,6 +556,10 @@ main(main_param_t speid, main_param_t argp) 0 /* rid */); wait_on_mask( 1 << tag ); +#if 0 + if (spu.init.id==0) + spu_test_misc(); +#endif main_loop(); diff --git a/src/mesa/pipe/cell/spu/spu_main.h b/src/mesa/pipe/cell/spu/spu_main.h index 5bc5d9fa99..1710a17512 100644 --- a/src/mesa/pipe/cell/spu/spu_main.h +++ b/src/mesa/pipe/cell/spu/spu_main.h @@ -29,11 +29,33 @@ #define SPU_MAIN_H +#include <spu_mfcio.h> + #include "pipe/cell/common.h" #include "pipe/draw/draw_vertex.h" #include "pipe/p_state.h" + +#define MAX_WIDTH 1024 +#define MAX_HEIGHT 1024 + + +typedef union { + ushort us[TILE_SIZE][TILE_SIZE]; + uint ui[TILE_SIZE][TILE_SIZE]; + vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4]; + vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2]; +} tile_t; + + +#define TILE_STATUS_CLEAR 1 +#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */ +#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */ +#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */ +#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ + + struct spu_framebuffer { void *color_start; /**< addr of color surface in main memory */ void *depth_start; /**< addr of depth surface in main memory */ @@ -57,18 +79,42 @@ struct spu_global struct cell_init_info init; struct spu_framebuffer fb; + struct pipe_blend_state blend_stencil; struct pipe_depth_stencil_alpha_state depth_stencil; struct pipe_blend_state blend; struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; + struct cell_command_texture texture; struct vertex_info vertex_info; /* XXX more state to come */ + + /** current color and Z tiles */ + tile_t ctile ALIGN16_ATTRIB; + tile_t ztile ALIGN16_ATTRIB; + + /** Current tiles' status */ + ubyte cur_ctile_status, cur_ztile_status; + + /** Status of all tiles in framebuffer */ + ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + + + /** for converting RGBA to PIPE_FORMAT_x colors */ + vector unsigned char color_shuffle; + + vector float tex_size; + vector unsigned int tex_size_mask; /**< == int(size - 1) */ + + vector float (*sample_texture)(vector float texcoord); + } ALIGN16_ATTRIB; extern struct spu_global spu; +extern boolean Debug; @@ -84,10 +130,30 @@ extern struct spu_global spu; #define TAG_INDEX_BUFFER 16 #define TAG_BATCH_BUFFER 17 #define TAG_MISC 18 +#define TAG_TEXTURE_TILE 19 +#define TAG_INSTRUCTION_FETCH 20 + + + +static INLINE void +wait_on_mask(unsigned tagMask) +{ + mfc_write_tag_mask( tagMask ); + /* wait for completion of _any_ DMAs specified by tagMask */ + mfc_read_tag_status_any(); +} + + +static INLINE void +wait_on_mask_all(unsigned tagMask) +{ + mfc_write_tag_mask( tagMask ); + /* wait for completion of _any_ DMAs specified by tagMask */ + mfc_read_tag_status_all(); +} + -extern void -wait_on_mask(unsigned tag); static INLINE void diff --git a/src/mesa/pipe/cell/spu/spu_render.c b/src/mesa/pipe/cell/spu/spu_render.c new file mode 100644 index 0000000000..932fb500b3 --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_render.c @@ -0,0 +1,301 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdio.h> +#include <libmisc.h> +#include <spu_mfcio.h> + +#include "spu_main.h" +#include "spu_render.h" +#include "spu_tri.h" +#include "spu_tile.h" +#include "pipe/cell/common.h" + + + +/** + * Given a rendering command's bounding box (in pixels) compute the + * location of the corresponding screen tile bounding box. + */ +static INLINE void +tile_bounding_box(const struct cell_command_render *render, + uint *txmin, uint *tymin, + uint *box_num_tiles, uint *box_width_tiles) +{ +#if 0 + /* Debug: full-window bounding box */ + uint txmax = spu.fb.width_tiles - 1; + uint tymax = spu.fb.height_tiles - 1; + *txmin = 0; + *tymin = 0; + *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + *box_width_tiles = spu.fb.width_tiles; + (void) render; + (void) txmax; + (void) tymax; +#else + uint txmax, tymax, box_height_tiles; + + *txmin = (uint) render->xmin / TILE_SIZE; + *tymin = (uint) render->ymin / TILE_SIZE; + txmax = (uint) render->xmax / TILE_SIZE; + tymax = (uint) render->ymax / TILE_SIZE; + if (txmax >= spu.fb.width_tiles) + txmax = spu.fb.width_tiles-1; + if (tymax >= spu.fb.height_tiles) + tymax = spu.fb.height_tiles-1; + *box_width_tiles = txmax - *txmin + 1; + box_height_tiles = tymax - *tymin + 1; + *box_num_tiles = *box_width_tiles * box_height_tiles; +#endif +#if 0 + printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id, + render->xmin, render->ymin, render->xmax, render->ymax); + printf("SPU %u: tiles: %u, %u .. %u, %u\n", + spu.init.id, *txmin, *tymin, txmax, tymax); + ASSERT(render->xmin <= render->xmax); + ASSERT(render->ymin <= render->ymax); +#endif +} + + +/** Check if the tile at (tx,ty) belongs to this SPU */ +static INLINE boolean +my_tile(uint tx, uint ty) +{ + return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id; +} + + +/** + * Start fetching non-clear color/Z tiles from main memory + */ +static INLINE void +get_cz_tiles(uint tx, uint ty) +{ + if (spu.depth_stencil.depth.enabled) { + if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); + get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_GETTING; + } + } + + if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty); + get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_GETTING; + } +} + + +/** + * Start putting dirty color/Z tiles back to main memory + */ +static INLINE void +put_cz_tiles(uint tx, uint ty) +{ + if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty); + put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_DEFINED; + } + else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { + /* tile was never used */ + spu.cur_ztile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty); + } + + if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty); + put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_DEFINED; + } + else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { + /* tile was never used */ + spu.cur_ctile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty); + } +} + + +/** + * Wait for 'put' of color/z tiles to complete. + */ +static INLINE void +wait_put_cz_tiles(void) +{ + wait_on_mask(1 << TAG_WRITE_TILE_COLOR); + if (spu.depth_stencil.depth.enabled) { + wait_on_mask(1 << TAG_WRITE_TILE_Z); + } +} + + +/** + * Render primitives + * \param pos_incr returns value indicating how may words to skip after + * this command in the batch buffer + */ +void +cmd_render(const struct cell_command_render *render, uint *pos_incr) +{ + /* we'll DMA into these buffers */ + ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + const uint vertex_size = render->vertex_size; /* in bytes */ + /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; + uint index_bytes; + const ubyte *vertices; + const ushort *indexes; + uint i, j; + + + if (Debug) { + printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " + "inline_vert=%u\n", + spu.init.id, + render->prim_type, + render->num_verts, + render->num_indexes, + render->inline_verts); + + /* + printf(" bound: %g, %g .. %g, %g\n", + render->xmin, render->ymin, render->xmax, render->ymax); + */ + } + + ASSERT(sizeof(*render) % 4 == 0); + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); + ASSERT(render->num_indexes % 3 == 0); + + + /* indexes are right after the render command in the batch buffer */ + indexes = (const ushort *) (render + 1); + index_bytes = ROUNDUP8(render->num_indexes * 2); + *pos_incr = index_bytes / 8 + sizeof(*render) / 8; + + + if (render->inline_verts) { + /* Vertices are after indexes in batch buffer at next 16-byte addr */ + vertices = (const ubyte *) render + (*pos_incr * 8); + vertices = (const ubyte *) align_pointer((void *) vertices, 16); + ASSERT_ALIGN16(vertices); + *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8; + } + else { + /* Begin DMA fetch of vertex buffer */ + ubyte *src = spu.init.buffers[render->vertex_buf]; + ubyte *dest = vertex_data; + + /* skip vertex data we won't use */ +#if 01 + src += render->min_index * vertex_size; + dest += render->min_index * vertex_size; + total_vertex_bytes -= render->min_index * vertex_size; +#endif + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT_ALIGN16(dest); + ASSERT_ALIGN16(src); + + mfc_get(dest, /* in vertex_data[] array */ + (unsigned int) src, /* src in main memory */ + total_vertex_bytes, /* size */ + TAG_VERTEX_BUFFER, + 0, /* tid */ + 0 /* rid */); + + vertices = vertex_data; + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + } + + + /** + ** find tiles which intersect the prim bounding box + **/ + uint txmin, tymin, box_width_tiles, box_num_tiles; + tile_bounding_box(render, &txmin, &tymin, + &box_num_tiles, &box_width_tiles); + + + /* make sure any pending clears have completed */ + wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ + + + /** + ** loop over tiles, rendering tris + **/ + for (i = 0; i < box_num_tiles; i++) { + const uint tx = txmin + i % box_width_tiles; + const uint ty = tymin + i / box_width_tiles; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + + if (!my_tile(tx, ty)) + continue; + + spu.cur_ctile_status = spu.ctile_status[ty][tx]; + spu.cur_ztile_status = spu.ztile_status[ty][tx]; + + get_cz_tiles(tx, ty); + + uint drawn = 0; + + /* loop over tris */ + for (j = 0; j < render->num_indexes; j += 3) { + const float *v0, *v1, *v2; + + v0 = (const float *) (vertices + indexes[j+0] * vertex_size); + v1 = (const float *) (vertices + indexes[j+1] * vertex_size); + v2 = (const float *) (vertices + indexes[j+2] * vertex_size); + + drawn += tri_draw(v0, v1, v2, tx, ty); + } + + //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); + + /* write color/z tiles back to main framebuffer, if dirtied */ + put_cz_tiles(tx, ty); + + wait_put_cz_tiles(); /* XXX seems unnecessary... */ + + spu.ctile_status[ty][tx] = spu.cur_ctile_status; + spu.ztile_status[ty][tx] = spu.cur_ztile_status; + } + + if (Debug) + printf("SPU %u: RENDER done\n", + spu.init.id); +} + + diff --git a/src/mesa/pipe/cell/spu/spu_render.h b/src/mesa/pipe/cell/spu/spu_render.h new file mode 100644 index 0000000000..fbcdc5ec31 --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_render.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_RENDER_H +#define SPU_RENDER_H + +#include "pipe/cell/common.h" + +extern void +cmd_render(const struct cell_command_render *render, uint *pos_incr); + +#endif /* SPU_RENDER_H */ + diff --git a/src/mesa/pipe/cell/spu/spu_texture.c b/src/mesa/pipe/cell/spu/spu_texture.c new file mode 100644 index 0000000000..3962aaa4a9 --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_texture.c @@ -0,0 +1,217 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "spu_main.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_colorpack.h" + + +/** + * Number of texture tiles to cache. + * Note that this will probably be the largest consumer of SPU local store/ + * memory for this driver! + */ +#define CACHE_SIZE 16 + +static tile_t tex_tiles[CACHE_SIZE] ALIGN16_ATTRIB; + +static vector unsigned int tex_tile_xy[CACHE_SIZE]; + + + +/** + * Mark all tex cache entries as invalid. + */ +void +invalidate_tex_cache(void) +{ + /* XXX memset? */ + uint i; + for (i = 0; i < CACHE_SIZE; i++) { + tex_tile_xy[i] = ((vector unsigned int) { ~0U, ~0U, ~0U, ~0U }); + } +} + + +/** + * Return the cache pos/index which corresponds to tile (tx,ty) + */ +static INLINE uint +cache_pos(vector unsigned int txty) +{ + uint pos = (spu_extract(txty,0) + spu_extract(txty,1) * 4) % CACHE_SIZE; + return pos; +} + + +/** + * Make sure the tile for texel (i,j) is present, return its position/index + * in the cache. + */ +static uint +get_tex_tile(vector unsigned int ij) +{ + /* tile address: tx,ty */ + const vector unsigned int txty = spu_rlmask(ij, -5); /* divide by 32 */ + const uint pos = cache_pos(txty); + + if ((spu_extract(tex_tile_xy[pos], 0) != spu_extract(txty, 0)) || + (spu_extract(tex_tile_xy[pos], 1) != spu_extract(txty, 1))) { + + /* texture cache miss, fetch tile from main memory */ + const uint tiles_per_row = spu.texture.width / TILE_SIZE; + const uint bytes_per_tile = sizeof(tile_t); + const void *src = (const ubyte *) spu.texture.start + + (spu_extract(txty,1) * tiles_per_row + spu_extract(txty,0)) * bytes_per_tile; + + printf("SPU %u: tex cache miss at %d, %d pos=%u old=%d,%d\n", + spu.init.id, + spu_extract(txty,0), + spu_extract(txty,1), + pos, + spu_extract(tex_tile_xy[pos],0), + spu_extract(tex_tile_xy[pos],1)); + + ASSERT_ALIGN16(tex_tiles[pos].ui); + ASSERT_ALIGN16(src); + + mfc_get(tex_tiles[pos].ui, /* dest */ + (unsigned int) src, + bytes_per_tile, /* size */ + TAG_TEXTURE_TILE, + 0, /* tid */ + 0 /* rid */); + + wait_on_mask(1 << TAG_TEXTURE_TILE); + + tex_tile_xy[pos] = txty; + } + else { +#if 0 + printf("SPU %u: tex cache HIT at %d, %d\n", + spu.init.id, tx, ty); +#endif + } + + return pos; +} + + +/** + * Get texture sample at texcoord. + * XXX this is extremely primitive for now. + */ +vector float +sample_texture_nearest(vector float texcoord) +{ + vector float tc = spu_mul(texcoord, spu.tex_size); + vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ + itc = spu_and(itc, spu.tex_size_mask); /* mask (GL_REPEAT) */ + vector unsigned int ij = spu_and(itc, TILE_SIZE-1); /* intra tile addr */ + uint pos = get_tex_tile(itc); + uint texel = tex_tiles[pos].ui[spu_extract(ij, 1)][spu_extract(ij, 0)]; + return spu_unpack_A8R8G8B8(texel); +} + + +vector float +sample_texture_bilinear(vector float texcoord) +{ + static const vector unsigned int offset10 = {1, 0, 0, 0}; + static const vector unsigned int offset01 = {0, 1, 0, 0}; + + vector float tc = spu_mul(texcoord, spu.tex_size); + tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */ + + /* integer texcoords S,T: */ + vector unsigned int itc00 = spu_convtu(tc, 0); /* convert to int */ + vector unsigned int itc01 = spu_add(itc00, offset01); + vector unsigned int itc10 = spu_add(itc00, offset10); + vector unsigned int itc11 = spu_add(itc10, offset01); + + /* mask (GL_REPEAT) */ + itc00 = spu_and(itc00, spu.tex_size_mask); + itc01 = spu_and(itc01, spu.tex_size_mask); + itc10 = spu_and(itc10, spu.tex_size_mask); + itc11 = spu_and(itc11, spu.tex_size_mask); + + /* intra tile addr */ + vector unsigned int ij00 = spu_and(itc00, TILE_SIZE-1); + vector unsigned int ij01 = spu_and(itc01, TILE_SIZE-1); + vector unsigned int ij10 = spu_and(itc10, TILE_SIZE-1); + vector unsigned int ij11 = spu_and(itc11, TILE_SIZE-1); + + /* get tile cache positions */ + uint pos00 = get_tex_tile(itc00); + uint pos01, pos10, pos11; + if ((spu_extract(ij00, 0) < TILE_SIZE-1) && + (spu_extract(ij00, 1) < TILE_SIZE-1)) { + /* all texels are in the same tile */ + pos01 = pos10 = pos11 = pos00; + } + else { + pos01 = get_tex_tile(itc01); + pos10 = get_tex_tile(itc10); + pos11 = get_tex_tile(itc11); + } + + /* get texels from tiles and convert to float[4] */ + vector float texel00 = spu_unpack_A8R8G8B8(tex_tiles[pos00].ui[spu_extract(ij00, 1)][spu_extract(ij00, 0)]); + vector float texel01 = spu_unpack_A8R8G8B8(tex_tiles[pos01].ui[spu_extract(ij01, 1)][spu_extract(ij01, 0)]); + vector float texel10 = spu_unpack_A8R8G8B8(tex_tiles[pos10].ui[spu_extract(ij10, 1)][spu_extract(ij10, 0)]); + vector float texel11 = spu_unpack_A8R8G8B8(tex_tiles[pos11].ui[spu_extract(ij11, 1)][spu_extract(ij11, 0)]); + + /* Compute weighting factors in [0,1] + * Multiply texcoord by 1024, AND with 1023, convert back to float. + */ + vector float tc1024 = spu_mul(tc, spu_splats(1024.0f)); + vector signed int itc1024 = spu_convts(tc1024, 0); + itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1)); + vector float weight = spu_convtf(itc1024, 10); + + /* smeared frac and 1-frac */ + vector float sfrac = spu_splats(spu_extract(weight, 0)); + vector float tfrac = spu_splats(spu_extract(weight, 1)); + vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac); + vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac); + + /* multiply the samples (colors) by the S/T weights */ + texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1); + texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1); + texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac ); + texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac ); + + /* compute sum of weighted samples */ + vector float texel_sum = spu_add(texel00, texel01); + texel_sum = spu_add(texel_sum, texel10); + texel_sum = spu_add(texel_sum, texel11); + + return texel_sum; +} diff --git a/src/mesa/pipe/cell/spu/spu_texture.h b/src/mesa/pipe/cell/spu/spu_texture.h new file mode 100644 index 0000000000..95eb87080f --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_texture.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_TEXTURE_H +#define SPU_TEXTURE_H + + +#include "pipe/p_compiler.h" + + +extern void +invalidate_tex_cache(void); + + +extern vector float +sample_texture_nearest(vector float texcoord); + + +extern vector float +sample_texture_bilinear(vector float texcoord); + + +#endif /* SPU_TEXTURE_H */ diff --git a/src/mesa/pipe/cell/spu/spu_tile.c b/src/mesa/pipe/cell/spu/spu_tile.c index ca1352f9f8..12dc246328 100644 --- a/src/mesa/pipe/cell/spu/spu_tile.c +++ b/src/mesa/pipe/cell/spu/spu_tile.c @@ -28,15 +28,7 @@ #include "spu_tile.h" - - - -tile_t ctile ALIGN16_ATTRIB; -tile_t ztile ALIGN16_ATTRIB; - -ubyte tile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; -ubyte tile_status_z[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - +#include "spu_main.h" void @@ -55,7 +47,7 @@ get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) printf("get_tile: dest: %p src: 0x%x size: %d\n", tile, (unsigned int) src, bytesPerTile); */ - mfc_get(tile->t32, /* dest in local memory */ + mfc_get(tile->ui, /* dest in local memory */ (unsigned int) src, /* src in main memory */ bytesPerTile, tag, @@ -81,7 +73,7 @@ put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) spu.init.id, tile, (unsigned int) dst, bytesPerTile); */ - mfc_put((void *) tile->t32, /* src in local memory */ + mfc_put((void *) tile->ui, /* src in local memory */ (unsigned int) dst, /* dst in main memory */ bytesPerTile, tag, diff --git a/src/mesa/pipe/cell/spu/spu_tile.h b/src/mesa/pipe/cell/spu/spu_tile.h index f83dc009c2..e53340a55a 100644 --- a/src/mesa/pipe/cell/spu/spu_tile.h +++ b/src/mesa/pipe/cell/spu/spu_tile.h @@ -35,27 +35,6 @@ #include "pipe/cell/common.h" -#define MAX_WIDTH 1024 -#define MAX_HEIGHT 1024 - - -typedef union { - ushort t16[TILE_SIZE][TILE_SIZE]; - uint t32[TILE_SIZE][TILE_SIZE]; -} tile_t; - - -extern tile_t ctile ALIGN16_ATTRIB; -extern tile_t ztile ALIGN16_ATTRIB; - - -#define TILE_STATUS_CLEAR 1 -#define TILE_STATUS_DEFINED 2 /**< defined pixel data */ -#define TILE_STATUS_DIRTY 3 /**< modified, but not put back yet */ - -extern ubyte tile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; -extern ubyte tile_status_z[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - void get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); @@ -68,7 +47,7 @@ put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf); static INLINE void clear_c_tile(tile_t *ctile) { - memset32((uint*) ctile->t32, + memset32((uint*) ctile->ui, spu.fb.color_clear_value, TILE_SIZE * TILE_SIZE); } @@ -78,12 +57,13 @@ static INLINE void clear_z_tile(tile_t *ztile) { if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { - memset16((ushort*) ztile->t16, + memset16((ushort*) ztile->us, spu.fb.depth_clear_value, TILE_SIZE * TILE_SIZE); } else { - memset32((uint*) ztile->t32, + ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM); + memset32((uint*) ztile->ui, spu.fb.depth_clear_value, TILE_SIZE * TILE_SIZE); } diff --git a/src/mesa/pipe/cell/spu/spu_tri.c b/src/mesa/pipe/cell/spu/spu_tri.c index 3d0d106c10..be9624cf7d 100644 --- a/src/mesa/pipe/cell/spu/spu_tri.c +++ b/src/mesa/pipe/cell/spu/spu_tri.c @@ -32,22 +32,33 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "pipe/p_util.h" +#include "spu_blend.h" +#include "spu_colorpack.h" #include "spu_main.h" +#include "spu_texture.h" #include "spu_tile.h" #include "spu_tri.h" +#include "spu_ztest.h" + + +/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ +typedef vector unsigned int mask_t; + +typedef union +{ + vector float v; + float f[4]; +} float4; /** * Simplified types taken from other parts of Gallium */ struct vertex_header { - float data[0][4]; + vector float data[1]; }; -struct prim_header { - struct vertex_header *v[3]; -}; /* XXX fix this */ @@ -82,9 +93,9 @@ struct edge { struct interp_coef { - float a0[4]; - float dadx[4]; - float dady[4]; + float4 a0; + float4 dadx; + float4 dady; }; @@ -133,6 +144,12 @@ struct setup_stage { }; + +static struct setup_stage setup; + + + + #if 0 /** * Basically a cast wrapper. @@ -145,33 +162,33 @@ static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) #if 0 /** - * Clip setup->quad against the scissor/surface bounds. + * Clip setup.quad against the scissor/surface bounds. */ static INLINE void quad_clip(struct setup_stage *setup) { - const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect; const int minx = (int) cliprect->minx; const int maxx = (int) cliprect->maxx; const int miny = (int) cliprect->miny; const int maxy = (int) cliprect->maxy; - if (setup->quad.x0 >= maxx || - setup->quad.y0 >= maxy || - setup->quad.x0 + 1 < minx || - setup->quad.y0 + 1 < miny) { + if (setup.quad.x0 >= maxx || + setup.quad.y0 >= maxy || + setup.quad.x0 + 1 < minx || + setup.quad.y0 + 1 < miny) { /* totally clipped */ - setup->quad.mask = 0x0; + setup.quad.mask = 0x0; return; } - if (setup->quad.x0 < minx) - setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (setup->quad.y0 < miny) - setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (setup->quad.x0 == maxx - 1) - setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (setup->quad.y0 == maxy - 1) - setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); + if (setup.quad.x0 < minx) + setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (setup.quad.y0 < miny) + setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (setup.quad.x0 == maxx - 1) + setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (setup.quad.y0 == maxy - 1) + setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); } #endif @@ -183,9 +200,9 @@ static INLINE void clip_emit_quad(struct setup_stage *setup) { quad_clip(setup); - if (setup->quad.mask) { - struct softpipe_context *sp = setup->softpipe; - sp->quad.first->run(sp->quad.first, &setup->quad); + if (setup.quad.mask) { + struct softpipe_context *sp = setup.softpipe; + sp->quad.first->run(sp->quad.first, &setup.quad); } } #endif @@ -196,200 +213,145 @@ clip_emit_quad(struct setup_stage *setup) * Eg: four colors will be compute. */ static INLINE void -eval_coeff( struct setup_stage *setup, uint slot, - float x, float y, float result[4][4]) +eval_coeff(uint slot, float x, float y, vector float result[4]) { - uint i; - const float *dadx = setup->coef[slot].dadx; - const float *dady = setup->coef[slot].dady; - - /* loop over XYZW comps */ - for (i = 0; i < 4; i++) { - result[QUAD_TOP_LEFT][i] = setup->coef[slot].a0[i] + x * dadx[i] + y * dady[i]; - result[QUAD_TOP_RIGHT][i] = result[0][i] + dadx[i]; - result[QUAD_BOTTOM_LEFT][i] = result[0][i] + dady[i]; - result[QUAD_BOTTOM_RIGHT][i] = result[0][i] + dadx[i] + dady[i]; + switch (spu.vertex_info.interp_mode[slot]) { + case INTERP_CONSTANT: + result[QUAD_TOP_LEFT] = + result[QUAD_TOP_RIGHT] = + result[QUAD_BOTTOM_LEFT] = + result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v; + break; + + case INTERP_LINEAR: + /* fall-through, for now */ + default: + { + register vector float dadx = setup.coef[slot].dadx.v; + register vector float dady = setup.coef[slot].dady.v; + register vector float topLeft + = spu_add(setup.coef[slot].a0.v, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); + + result[QUAD_TOP_LEFT] = topLeft; + result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx); + result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady); + result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady); + } } } -static INLINE void -eval_z( struct setup_stage *setup, - float x, float y, float result[4]) +static INLINE vector float +eval_z(float x, float y) { const uint slot = 0; - const uint i = 2; - const float *dadx = setup->coef[slot].dadx; - const float *dady = setup->coef[slot].dady; - - result[QUAD_TOP_LEFT] = setup->coef[slot].a0[i] + x * dadx[i] + y * dady[i]; - result[QUAD_TOP_RIGHT] = result[0] + dadx[i]; - result[QUAD_BOTTOM_LEFT] = result[0] + dady[i]; - result[QUAD_BOTTOM_RIGHT] = result[0] + dadx[i] + dady[i]; + const float dzdx = setup.coef[slot].dadx.f[2]; + const float dzdy = setup.coef[slot].dady.f[2]; + const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy; + const vector float topLeftv = spu_splats(topLeft); + const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy }; + return spu_add(topLeftv, derivs); } -static INLINE uint -pack_color(const float color[4]) +static INLINE mask_t +do_depth_test(int x, int y, mask_t quadmask) { - uint r = (uint) (color[0] * 255.0); - uint g = (uint) (color[1] * 255.0); - uint b = (uint) (color[2] * 255.0); - uint a = (uint) (color[3] * 255.0); - r = MIN2(r, 255); - g = MIN2(g, 255); - b = MIN2(b, 255); - a = MIN2(a, 255); - switch (spu.fb.color_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - return (a << 24) | (r << 16) | (g << 8) | b; - case PIPE_FORMAT_B8G8R8A8_UNORM: - return (b << 24) | (g << 16) | (r << 8) | a; - default: - ASSERT(0); - return 0; - } -} - - -static uint -do_depth_test(struct setup_stage *setup, int x, int y, unsigned mask) -{ - int ix = x - setup->cliprect_minx; - int iy = y - setup->cliprect_miny; - float zvals[4]; - - eval_z(setup, (float) x, (float) y, zvals); - - if (tile_status_z[setup->ty][setup->tx] == TILE_STATUS_CLEAR) { - /* now, _really_ clear the tile */ - clear_z_tile(&ztile); - } - else { - /* make sure we've got the tile from main mem */ - wait_on_mask(1 << TAG_READ_TILE_Z); - } - tile_status_z[setup->ty][setup->tx] = TILE_STATUS_DIRTY; + float4 zvals; + mask_t mask; + zvals.v = eval_z((float) x, (float) y); if (spu.fb.depth_format == PIPE_FORMAT_Z16_UNORM) { - const float zscale = 65535.0; - if (mask & MASK_TOP_LEFT) { - uint z = (uint) (zvals[0] * zscale); - if (z < ztile.t16[iy][ix]) - ztile.t16[iy][ix] = z; - else - mask &= ~MASK_TOP_LEFT; - } - - if (mask & MASK_TOP_RIGHT) { - uint z = (uint) (zvals[1] * zscale); - if (z < ztile.t16[iy][ix+1]) - ztile.t16[iy][ix+1] = z; - else - mask &= ~MASK_TOP_RIGHT; - } - - if (mask & MASK_BOTTOM_LEFT) { - uint z = (uint) (zvals[2] * zscale); - if (z < ztile.t16[iy+1][ix]) - ztile.t16[iy+1][ix] = z; - else - mask &= ~MASK_BOTTOM_LEFT; - } - - if (mask & MASK_BOTTOM_RIGHT) { - uint z = (uint) (zvals[3] * zscale); - if (z < ztile.t16[iy+1][ix+1]) - ztile.t16[iy+1][ix+1] = z; - else - mask &= ~MASK_BOTTOM_RIGHT; - } + int ix = (x - setup.cliprect_minx) / 4; + int iy = (y - setup.cliprect_miny) / 2; + mask = spu_z16_test_less(zvals.v, &spu.ztile.us8[iy][ix], x>>1, quadmask); } else { - const float zscale = (float) 0xffffffff; - ASSERT(spu.fb.depth_format == PIPE_FORMAT_Z32_UNORM); - if (mask & MASK_TOP_LEFT) { - uint z = (uint) (zvals[0] * zscale); - if (z < ztile.t32[iy][ix]) - ztile.t32[iy][ix] = z; - else - mask &= ~MASK_TOP_LEFT; - } - - if (mask & MASK_TOP_RIGHT) { - uint z = (uint) (zvals[1] * zscale); - if (z < ztile.t32[iy][ix+1]) - ztile.t32[iy][ix+1] = z; - else - mask &= ~MASK_TOP_RIGHT; - } - - if (mask & MASK_BOTTOM_LEFT) { - uint z = (uint) (zvals[2] * zscale); - if (z < ztile.t32[iy+1][ix]) - ztile.t32[iy+1][ix] = z; - else - mask &= ~MASK_BOTTOM_LEFT; - } - - if (mask & MASK_BOTTOM_RIGHT) { - uint z = (uint) (zvals[3] * zscale); - if (z < ztile.t32[iy+1][ix+1]) - ztile.t32[iy+1][ix+1] = z; - else - mask &= ~MASK_BOTTOM_RIGHT; - } + int ix = (x - setup.cliprect_minx) / 2; + int iy = (y - setup.cliprect_miny) / 2; + mask = spu_z32_test_less(zvals.v, &spu.ztile.ui4[iy][ix], quadmask); } + if (spu_extract(spu_orx(mask), 0)) + spu.cur_ztile_status = TILE_STATUS_DIRTY; + return mask; } /** * Emit a quad (pass to next stage). No clipping is done. + * Note: about 1/5 to 1/7 of the time, mask is zero and this function + * should be skipped. But adding the test for that slows things down + * overall. */ static INLINE void -emit_quad( struct setup_stage *setup, int x, int y, unsigned mask ) +emit_quad( int x, int y, mask_t mask ) { #if 0 - struct softpipe_context *sp = setup->softpipe; - setup->quad.x0 = x; - setup->quad.y0 = y; - setup->quad.mask = mask; - sp->quad.first->run(sp->quad.first, &setup->quad); + struct softpipe_context *sp = setup.softpipe; + setup.quad.x0 = x; + setup.quad.y0 = y; + setup.quad.mask = mask; + sp->quad.first->run(sp->quad.first, &setup.quad); #else - /* Cell: "write" quad fragments to the tile by setting prim color */ - const int ix = x - setup->cliprect_minx; - const int iy = y - setup->cliprect_miny; - float colors[4][4]; - - eval_coeff(setup, 1, (float) x, (float) y, colors); if (spu.depth_stencil.depth.enabled) { - mask &= do_depth_test(setup, x, y, mask); + mask = do_depth_test(x, y, mask); } - if (mask) { - if (tile_status[setup->ty][setup->tx] == TILE_STATUS_CLEAR) { - /* now, _really_ clear the tile */ - clear_c_tile(&ctile); + /* If any bits in mask are set... */ + if (spu_extract(spu_orx(mask), 0)) { + const int ix = x - setup.cliprect_minx; + const int iy = y - setup.cliprect_miny; + const vector unsigned char shuffle = spu.color_shuffle; + vector float colors[4]; + + spu.cur_ctile_status = TILE_STATUS_DIRTY; + + if (spu.texture.start) { + /* texture mapping */ + vector float texcoords[4]; + eval_coeff(2, (float) x, (float) y, texcoords); + + if (spu_extract(mask, 0)) + colors[0] = spu.sample_texture(texcoords[0]); + if (spu_extract(mask, 1)) + colors[1] = spu.sample_texture(texcoords[1]); + if (spu_extract(mask, 2)) + colors[2] = spu.sample_texture(texcoords[2]); + if (spu_extract(mask, 3)) + colors[3] = spu.sample_texture(texcoords[3]); } else { - /* make sure we've got the tile from main mem */ - wait_on_mask(1 << TAG_READ_TILE_COLOR); + /* simple shading */ + eval_coeff(1, (float) x, (float) y, colors); } - tile_status[setup->ty][setup->tx] = TILE_STATUS_DIRTY; - - if (mask & MASK_TOP_LEFT) - ctile.t32[iy][ix] = pack_color(colors[QUAD_TOP_LEFT]); - if (mask & MASK_TOP_RIGHT) - ctile.t32[iy][ix+1] = pack_color(colors[QUAD_TOP_RIGHT]); - if (mask & MASK_BOTTOM_LEFT) - ctile.t32[iy+1][ix] = pack_color(colors[QUAD_BOTTOM_LEFT]); - if (mask & MASK_BOTTOM_RIGHT) - ctile.t32[iy+1][ix+1] = pack_color(colors[QUAD_BOTTOM_RIGHT]); + +#if 1 + if (spu.blend.blend_enable) + blend_quad(ix % TILE_SIZE, iy % TILE_SIZE, colors); +#endif + + if (spu_extract(mask, 0)) + spu.ctile.ui[iy][ix] = spu_pack_color_shuffle(colors[0], shuffle); + if (spu_extract(mask, 1)) + spu.ctile.ui[iy][ix+1] = spu_pack_color_shuffle(colors[1], shuffle); + if (spu_extract(mask, 2)) + spu.ctile.ui[iy+1][ix] = spu_pack_color_shuffle(colors[2], shuffle); + if (spu_extract(mask, 3)) + spu.ctile.ui[iy+1][ix+1] = spu_pack_color_shuffle(colors[3], shuffle); + +#if 0 + /* SIMD_Z with swizzled color buffer (someday) */ + vector unsigned int uicolors = *((vector unsigned int *) &colors); + spu.ctile.ui4[iy/2][ix/2] = spu_sel(spu.ctile.ui4[iy/2][ix/2], uicolors, mask); +#endif } + #endif } @@ -407,26 +369,19 @@ static INLINE int block( int x ) /** * Compute mask which indicates which pixels in the 2x2 quad are actually inside * the triangle's bounds. - * - * this is pretty nasty... may need to rework flush_spans again to - * fix it, if possible. + * The mask is a uint4 vector and each element will be 0 or 0xffffffff. */ -static unsigned calculate_mask( struct setup_stage *setup, int x ) +static INLINE mask_t calculate_mask( int x ) { - unsigned mask = 0x0; - - if (x >= setup->span.left[0] && x < setup->span.right[0]) - mask |= MASK_TOP_LEFT; - - if (x >= setup->span.left[1] && x < setup->span.right[1]) - mask |= MASK_BOTTOM_LEFT; - - if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0]) - mask |= MASK_TOP_RIGHT; - - if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1]) - mask |= MASK_BOTTOM_RIGHT; - + /* This is a little tricky. + * Use & instead of && to avoid branches. + * Use negation to convert true/false to ~0/0 values. + */ + mask_t mask; + mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0); + mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1); + mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2); + mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3); return mask; } @@ -434,144 +389,175 @@ static unsigned calculate_mask( struct setup_stage *setup, int x ) /** * Render a horizontal span of quads */ -static void flush_spans( struct setup_stage *setup ) +static void flush_spans( void ) { int minleft, maxright; int x; - switch (setup->span.y_flags) { + switch (setup.span.y_flags) { case 0x3: /* both odd and even lines written (both quad rows) */ - minleft = MIN2(setup->span.left[0], setup->span.left[1]); - maxright = MAX2(setup->span.right[0], setup->span.right[1]); + minleft = MIN2(setup.span.left[0], setup.span.left[1]); + maxright = MAX2(setup.span.right[0], setup.span.right[1]); break; case 0x1: /* only even line written (quad top row) */ - minleft = setup->span.left[0]; - maxright = setup->span.right[0]; + minleft = setup.span.left[0]; + maxright = setup.span.right[0]; break; case 0x2: /* only odd line written (quad bottom row) */ - minleft = setup->span.left[1]; - maxright = setup->span.right[1]; + minleft = setup.span.left[1]; + maxright = setup.span.right[1]; break; default: return; } + + /* OK, we're very likely to need the tile data now. + * clear or finish waiting if needed. + */ + if (spu.cur_ctile_status == TILE_STATUS_GETTING) { + /* wait for mfc_get() to complete */ + //printf("SPU: %u: waiting for ctile\n", spu.init.id); + wait_on_mask(1 << TAG_READ_TILE_COLOR); + spu.cur_ctile_status = TILE_STATUS_CLEAN; + } + else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) { + //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty); + clear_c_tile(&spu.ctile); + spu.cur_ctile_status = TILE_STATUS_DIRTY; + } + ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); + + if (spu.depth_stencil.depth.enabled) { + if (spu.cur_ztile_status == TILE_STATUS_GETTING) { + /* wait for mfc_get() to complete */ + //printf("SPU: %u: waiting for ztile\n", spu.init.id); + wait_on_mask(1 << TAG_READ_TILE_Z); + spu.cur_ztile_status = TILE_STATUS_CLEAN; + } + else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) { + //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty); + clear_z_tile(&spu.ztile); + spu.cur_ztile_status = TILE_STATUS_DIRTY; + } + ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); + } + /* XXX this loop could be moved into the above switch cases and * calculate_mask() could be simplified a bit... */ for (x = block(minleft); x <= block(maxright); x += 2) { - emit_quad( setup, x, setup->span.y, - calculate_mask( setup, x ) ); +#if 1 + emit_quad( x, setup.span.y, calculate_mask( x ) ); +#endif } - setup->span.y = 0; - setup->span.y_flags = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; + setup.span.y = 0; + setup.span.y_flags = 0; + setup.span.right[0] = 0; + setup.span.right[1] = 0; } #if DEBUG_VERTS -static void print_vertex(const struct setup_stage *setup, - const struct vertex_header *v) +static void print_vertex(const struct vertex_header *v) { int i; fprintf(stderr, "Vertex: (%p)\n", v); - for (i = 0; i < setup->quad.nr_attrs; i++) { + for (i = 0; i < setup.quad.nr_attrs; i++) { fprintf(stderr, " %d: %f %f %f %f\n", i, v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); } } #endif -static boolean setup_sort_vertices( struct setup_stage *setup, - const struct prim_header *prim ) + +static boolean setup_sort_vertices(const struct vertex_header *v0, + const struct vertex_header *v1, + const struct vertex_header *v2) { - const struct vertex_header *v0 = prim->v[0]; - const struct vertex_header *v1 = prim->v[1]; - const struct vertex_header *v2 = prim->v[2]; #if DEBUG_VERTS fprintf(stderr, "Triangle:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); - print_vertex(setup, v2); + print_vertex(v0); + print_vertex(v1); + print_vertex(v2); #endif - setup->vprovoke = v2; + setup.vprovoke = v2; /* determine bottom to top order of vertices */ { - float y0 = v0->data[0][1]; - float y1 = v1->data[0][1]; - float y2 = v2->data[0][1]; + float y0 = spu_extract(v0->data[0], 1); + float y1 = spu_extract(v1->data[0], 1); + float y2 = spu_extract(v2->data[0], 1); if (y0 <= y1) { if (y1 <= y2) { /* y0<=y1<=y2 */ - setup->vmin = v0; - setup->vmid = v1; - setup->vmax = v2; + setup.vmin = v0; + setup.vmid = v1; + setup.vmax = v2; } else if (y2 <= y0) { /* y2<=y0<=y1 */ - setup->vmin = v2; - setup->vmid = v0; - setup->vmax = v1; + setup.vmin = v2; + setup.vmid = v0; + setup.vmax = v1; } else { /* y0<=y2<=y1 */ - setup->vmin = v0; - setup->vmid = v2; - setup->vmax = v1; + setup.vmin = v0; + setup.vmid = v2; + setup.vmax = v1; } } else { if (y0 <= y2) { /* y1<=y0<=y2 */ - setup->vmin = v1; - setup->vmid = v0; - setup->vmax = v2; + setup.vmin = v1; + setup.vmid = v0; + setup.vmax = v2; } else if (y2 <= y1) { /* y2<=y1<=y0 */ - setup->vmin = v2; - setup->vmid = v1; - setup->vmax = v0; + setup.vmin = v2; + setup.vmid = v1; + setup.vmax = v0; } else { /* y1<=y2<=y0 */ - setup->vmin = v1; - setup->vmid = v2; - setup->vmax = v0; + setup.vmin = v1; + setup.vmid = v2; + setup.vmax = v0; } } } /* Check if triangle is completely outside the tile bounds */ - if (setup->vmin->data[0][1] > setup->cliprect_maxy) + if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy) return FALSE; - if (setup->vmax->data[0][1] < setup->cliprect_miny) + if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny) return FALSE; - if (setup->vmin->data[0][0] < setup->cliprect_minx && - setup->vmid->data[0][0] < setup->cliprect_minx && - setup->vmax->data[0][0] < setup->cliprect_minx) + if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx && + spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx && + spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx) return FALSE; - if (setup->vmin->data[0][0] > setup->cliprect_maxx && - setup->vmid->data[0][0] > setup->cliprect_maxx && - setup->vmax->data[0][0] > setup->cliprect_maxx) + if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx && + spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx && + spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx) return FALSE; - setup->ebot.dx = setup->vmid->data[0][0] - setup->vmin->data[0][0]; - setup->ebot.dy = setup->vmid->data[0][1] - setup->vmin->data[0][1]; - setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0]; - setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1]; - setup->etop.dx = setup->vmax->data[0][0] - setup->vmid->data[0][0]; - setup->etop.dy = setup->vmax->data[0][1] - setup->vmid->data[0][1]; + setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0); + setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1); + setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0); + setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1); + setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0); + setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1); /* * Compute triangle's area. Use 1/area to compute partial @@ -584,13 +570,13 @@ static boolean setup_sort_vertices( struct setup_stage *setup, * use the prim->det value because its sign is correct. */ { - const float area = (setup->emaj.dx * setup->ebot.dy - - setup->ebot.dx * setup->emaj.dy); + const float area = (setup.emaj.dx * setup.ebot.dy - + setup.ebot.dx * setup.emaj.dy); - setup->oneoverarea = 1.0f / area; + setup.oneoverarea = 1.0f / area; /* _mesa_printf("%s one-over-area %f area %f det %f\n", - __FUNCTION__, setup->oneoverarea, area, prim->det ); + __FUNCTION__, setup.oneoverarea, area, prim->det ); */ } @@ -599,56 +585,52 @@ static boolean setup_sort_vertices( struct setup_stage *setup, * - the GLSL gl_FrontFacing fragment attribute (bool) * - two-sided stencil test */ - setup->quad.facing = (prim->det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); + setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW); #endif return TRUE; } -#if 0 /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex->data[slot][i]. - * The result will be put into setup->coef[slot].a0[i]. + * The value value comes from vertex->data[slot]. + * The result will be put into setup.coef[slot].a0. * \param slot which attribute slot - * \param i which component of the slot (0..3) */ -static void const_coeff( struct setup_stage *setup, - unsigned slot, - unsigned i ) +static INLINE void +const_coeff(uint slot) { - assert(slot < PIPE_MAX_SHADER_INPUTS); - assert(i <= 3); - - setup->coef[slot].dadx[i] = 0; - setup->coef[slot].dady[i] = 0; - - /* need provoking vertex info! - */ - setup->coef[slot].a0[i] = setup->vprovoke->data[slot][i]; + setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].a0.v = setup.vprovoke->data[slot]; } -#endif /** * Compute a0, dadx and dady for a linearly interpolated coefficient, * for a triangle. */ -static void tri_linear_coeff( struct setup_stage *setup, - uint slot, uint firstComp, uint lastComp ) +static INLINE void +tri_linear_coeff(uint slot, uint firstComp, uint lastComp) { uint i; + const float *vmin_d = (float *) &setup.vmin->data[slot]; + const float *vmid_d = (float *) &setup.vmid->data[slot]; + const float *vmax_d = (float *) &setup.vmax->data[slot]; + const float x = spu_extract(setup.vmin->data[0], 0) - 0.5f; + const float y = spu_extract(setup.vmin->data[0], 1) - 0.5f; + for (i = firstComp; i < lastComp; i++) { - float botda = setup->vmid->data[slot][i] - setup->vmin->data[slot][i]; - float majda = setup->vmax->data[slot][i] - setup->vmin->data[slot][i]; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float botda = vmid_d[i] - vmin_d[i]; + float majda = vmax_d[i] - vmin_d[i]; + float a = setup.ebot.dy * majda - botda * setup.emaj.dy; + float b = setup.emaj.dx * botda - majda * setup.ebot.dx; ASSERT(slot < PIPE_MAX_SHADER_INPUTS); - setup->coef[slot].dadx[i] = a * setup->oneoverarea; - setup->coef[slot].dady[i] = b * setup->oneoverarea; + setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; + setup.coef[slot].dady.f[i] = b * setup.oneoverarea; /* calculate a0 as the value which would be sampled for the * fragment at (0,0), taking into account that we want to sample at @@ -662,21 +644,52 @@ static void tri_linear_coeff( struct setup_stage *setup, * to define a0 as the sample at a pixel center somewhere near vmin * instead - i'll switch to this later. */ - setup->coef[slot].a0[i] = (setup->vmin->data[slot][i] - - (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5f) + - setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5f))); + setup.coef[slot].a0.f[i] = (vmin_d[i] - + (setup.coef[slot].dadx.f[i] * x + + setup.coef[slot].dady.f[i] * y)); } /* _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n", slot, "xyzw"[i], - setup->coef[slot].a0[i], - setup->coef[slot].dadx[i], - setup->coef[slot].dady[i]); + setup.coef[slot].a0[i], + setup.coef[slot].dadx.f[i], + setup.coef[slot].dady.f[i]); */ } +/** + * As above, but interp setup all four vector components. + */ +static INLINE void +tri_linear_coeff4(uint slot) +{ + const vector float vmin_d = setup.vmin->data[slot]; + const vector float vmid_d = setup.vmid->data[slot]; + const vector float vmax_d = setup.vmax->data[slot]; + const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); + const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); + + vector float botda = vmid_d - vmin_d; + vector float majda = vmax_d - vmin_d; + + vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), + spu_mul(botda, spu_splats(setup.emaj.dy))); + vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), + spu_mul(majda, spu_splats(setup.ebot.dx))); + + setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea)); + setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea)); + + vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy); + + setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy)); +} + + + #if 0 /** * Compute a0, dadx and dady for a perspective-corrected interpolant, @@ -686,46 +699,45 @@ static void tri_linear_coeff( struct setup_stage *setup, * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void tri_persp_coeff( struct setup_stage *setup, - unsigned slot, +static void tri_persp_coeff( unsigned slot, unsigned i ) { /* premultiply by 1/w: */ - float mina = setup->vmin->data[slot][i] * setup->vmin->data[0][3]; - float mida = setup->vmid->data[slot][i] * setup->vmid->data[0][3]; - float maxa = setup->vmax->data[slot][i] * setup->vmax->data[0][3]; + float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3]; + float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3]; + float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3]; float botda = mida - mina; float majda = maxa - mina; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float a = setup.ebot.dy * majda - botda * setup.emaj.dy; + float b = setup.emaj.dx * botda - majda * setup.ebot.dx; /* printf("tri persp %d,%d: %f %f %f\n", slot, i, - setup->vmin->data[slot][i], - setup->vmid->data[slot][i], - setup->vmax->data[slot][i] + setup.vmin->data[slot][i], + setup.vmid->data[slot][i], + setup.vmax->data[slot][i] ); */ assert(slot < PIPE_MAX_SHADER_INPUTS); assert(i <= 3); - setup->coef[slot].dadx[i] = a * setup->oneoverarea; - setup->coef[slot].dady[i] = b * setup->oneoverarea; - setup->coef[slot].a0[i] = (mina - - (setup->coef[slot].dadx[i] * (setup->vmin->data[0][0] - 0.5f) + - setup->coef[slot].dady[i] * (setup->vmin->data[0][1] - 0.5f))); + setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; + setup.coef[slot].dady.f[i] = b * setup.oneoverarea; + setup.coef[slot].a0.f[i] = (mina - + (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) + + setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f))); } #endif /** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. + * Compute the setup.coef[] array dadx, dady, a0 values. + * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized. */ -static void setup_tri_coefficients( struct setup_stage *setup ) +static void setup_tri_coefficients(void) { #if 1 uint i; @@ -735,15 +747,18 @@ static void setup_tri_coefficients( struct setup_stage *setup ) case INTERP_NONE: break; case INTERP_POS: - tri_linear_coeff(setup, i, 2, 3); /* slot 0, z */ + /*tri_linear_coeff(i, 2, 3);*/ /* XXX interp W if PERSPECTIVE... */ + tri_linear_coeff4(i); break; case INTERP_CONSTANT: - /* fall-through */ + const_coeff(i); + break; case INTERP_LINEAR: - tri_linear_coeff(setup, i, 0, 4); /* slot 1, color */ + tri_linear_coeff4(i); break; case INTERP_PERSPECTIVE: + tri_linear_coeff4(i); /* temporary */ break; default: ASSERT(0); @@ -753,35 +768,35 @@ static void setup_tri_coefficients( struct setup_stage *setup ) ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS); ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR || spu.vertex_info.interp_mode[1] == INTERP_CONSTANT); - tri_linear_coeff(setup, 0, 2, 3); /* slot 0, z */ - tri_linear_coeff(setup, 1, 0, 4); /* slot 1, color */ + tri_linear_coeff(0, 2, 3); /* slot 0, z */ + tri_linear_coeff(1, 0, 4); /* slot 1, color */ #endif } -static void setup_tri_edges( struct setup_stage *setup ) +static void setup_tri_edges(void) { - float vmin_x = setup->vmin->data[0][0] + 0.5f; - float vmid_x = setup->vmid->data[0][0] + 0.5f; - - float vmin_y = setup->vmin->data[0][1] - 0.5f; - float vmid_y = setup->vmid->data[0][1] - 0.5f; - float vmax_y = setup->vmax->data[0][1] - 0.5f; - - setup->emaj.sy = CEILF(vmin_y); - setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy); - setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; - setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; - - setup->etop.sy = CEILF(vmid_y); - setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy); - setup->etop.dxdy = setup->etop.dx / setup->etop.dy; - setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; - - setup->ebot.sy = CEILF(vmin_y); - setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy); - setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; - setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; + float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f; + float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f; + + float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f; + float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f; + float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f; + + setup.emaj.sy = CEILF(vmin_y); + setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy); + setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy; + setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy; + + setup.etop.sy = CEILF(vmid_y); + setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy); + setup.etop.dxdy = setup.etop.dx / setup.etop.dy; + setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy; + + setup.ebot.sy = CEILF(vmin_y); + setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy); + setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy; + setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy; } @@ -789,15 +804,14 @@ static void setup_tri_edges( struct setup_stage *setup ) * Render the upper or lower half of a triangle. * Scissoring/cliprect is applied here too. */ -static void subtriangle( struct setup_stage *setup, - struct edge *eleft, +static void subtriangle( struct edge *eleft, struct edge *eright, unsigned lines ) { - const int minx = setup->cliprect_minx; - const int maxx = setup->cliprect_maxx; - const int miny = setup->cliprect_miny; - const int maxy = setup->cliprect_maxy; + const int minx = setup.cliprect_minx; + const int maxx = setup.cliprect_maxx; + const int miny = setup.cliprect_miny; + const int maxy = setup.cliprect_maxy; int y, start_y, finish_y; int sy = (int)eleft->sy; @@ -839,14 +853,14 @@ static void subtriangle( struct setup_stage *setup, if (left < right) { int _y = sy + y; - if (block(_y) != setup->span.y) { - flush_spans(setup); - setup->span.y = block(_y); + if (block(_y) != setup.span.y) { + flush_spans(); + setup.span.y = block(_y); } - setup->span.left[_y&1] = left; - setup->span.right[_y&1] = right; - setup->span.y_flags |= 1<<(_y&1); + setup.span.left[_y&1] = left; + setup.span.right[_y&1] = right; + setup.span.y_flags |= 1<<(_y&1); } } @@ -861,70 +875,52 @@ static void subtriangle( struct setup_stage *setup, /** - * Do setup for triangle rasterization, then render the triangle. + * Draw triangle into tile at (tx, ty) (tile coords) + * The tile data should have already been fetched. */ -static void -setup_tri(struct setup_stage *setup, struct prim_header *prim) +boolean +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) { - if (!setup_sort_vertices( setup, prim )) { - return; /* totally clipped */ - } + setup.tx = tx; + setup.ty = ty; - setup_tri_coefficients( setup ); - setup_tri_edges( setup ); + /* set clipping bounds to tile bounds */ + setup.cliprect_minx = tx * TILE_SIZE; + setup.cliprect_miny = ty * TILE_SIZE; + setup.cliprect_maxx = (tx + 1) * TILE_SIZE; + setup.cliprect_maxy = (ty + 1) * TILE_SIZE; -#if 0 - setup->quad.prim = PRIM_TRI; -#endif + if (!setup_sort_vertices((struct vertex_header *) v0, + (struct vertex_header *) v1, + (struct vertex_header *) v2)) { + return FALSE; /* totally clipped */ + } + + setup_tri_coefficients(); + setup_tri_edges(); - setup->span.y = 0; - setup->span.y_flags = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; - /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ + setup.span.y = 0; + setup.span.y_flags = 0; + setup.span.right[0] = 0; + setup.span.right[1] = 0; + /* setup.span.z_mode = tri_z_mode( setup.ctx ); */ /* init_constant_attribs( setup ); */ - if (setup->oneoverarea < 0.0) { + if (setup.oneoverarea < 0.0) { /* emaj on left: */ - subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); - subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); + subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines ); + subtriangle( &setup.emaj, &setup.etop, setup.etop.lines ); } else { /* emaj on right: */ - subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); - subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); + subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines ); + subtriangle( &setup.etop, &setup.emaj, setup.etop.lines ); } - flush_spans( setup ); -} - + flush_spans(); - -/** - * Draw triangle into tile at (tx, ty) (tile coords) - * The tile data should have already been fetched. - */ -void -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) -{ - struct prim_header tri; - struct setup_stage setup; - - tri.v[0] = (struct vertex_header *) v0; - tri.v[1] = (struct vertex_header *) v1; - tri.v[2] = (struct vertex_header *) v2; - - setup.tx = tx; - setup.ty = ty; - - /* set clipping bounds to tile bounds */ - setup.cliprect_minx = tx * TILE_SIZE; - setup.cliprect_miny = ty * TILE_SIZE; - setup.cliprect_maxx = (tx + 1) * TILE_SIZE; - setup.cliprect_maxy = (ty + 1) * TILE_SIZE; - - setup_tri(&setup, &tri); + return TRUE; } diff --git a/src/mesa/pipe/cell/spu/spu_tri.h b/src/mesa/pipe/cell/spu/spu_tri.h index 86c42b6339..aa694dd7c9 100644 --- a/src/mesa/pipe/cell/spu/spu_tri.h +++ b/src/mesa/pipe/cell/spu/spu_tri.h @@ -30,7 +30,7 @@ #define SPU_TRI_H -extern void +extern boolean tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); diff --git a/src/mesa/pipe/cell/spu/spu_util.c b/src/mesa/pipe/cell/spu/spu_util.c new file mode 100644 index 0000000000..ac373240c1 --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_util.c @@ -0,0 +1,165 @@ +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/tgsi/util/tgsi_parse.h" +//#include "tgsi_build.h" +#include "pipe/tgsi/util/tgsi_util.h" + +unsigned +tgsi_util_get_src_register_swizzle( + const struct tgsi_src_register *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->SwizzleX; + case 1: + return reg->SwizzleY; + case 2: + return reg->SwizzleZ; + case 3: + return reg->SwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_src_register_extswizzle( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->ExtSwizzleX; + case 1: + return reg->ExtSwizzleY; + case 2: + return reg->ExtSwizzleZ; + case 3: + return reg->ExtSwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_full_src_register_extswizzle( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned swizzle; + + /* + * First, calculate the extended swizzle for a given channel. This will give + * us either a channel index into the simple swizzle or a constant 1 or 0. + */ + swizzle = tgsi_util_get_src_register_extswizzle( + ®->SrcRegisterExtSwz, + component ); + + assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); + assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); + assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); + assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); + + /* + * Second, calculate the simple swizzle for the unswizzled channel index. + * Leave the constants intact, they are not affected by the simple swizzle. + */ + if( swizzle <= TGSI_SWIZZLE_W ) { + swizzle = tgsi_util_get_src_register_swizzle( + ®->SrcRegister, + component ); + } + + return swizzle; +} + +unsigned +tgsi_util_get_src_register_extnegate( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->NegateX; + case 1: + return reg->NegateY; + case 2: + return reg->NegateZ; + case 3: + return reg->NegateW; + default: + assert( 0 ); + } + return 0; +} + +void +tgsi_util_set_src_register_extnegate( + struct tgsi_src_register_ext_swz *reg, + unsigned negate, + unsigned component ) +{ + switch( component ) { + case 0: + reg->NegateX = negate; + break; + case 1: + reg->NegateY = negate; + break; + case 2: + reg->NegateZ = negate; + break; + case 3: + reg->NegateW = negate; + break; + default: + assert( 0 ); + } +} + +unsigned +tgsi_util_get_full_src_register_sign_mode( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned sign_mode; + + if( reg->SrcRegisterExtMod.Absolute ) { + /* Consider only the post-abs negation. */ + + if( reg->SrcRegisterExtMod.Negate ) { + sign_mode = TGSI_UTIL_SIGN_SET; + } + else { + sign_mode = TGSI_UTIL_SIGN_CLEAR; + } + } + else { + /* Accumulate the three negations. */ + + unsigned negate; + + negate = reg->SrcRegister.Negate; + if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { + negate = !negate; + } + if( reg->SrcRegisterExtMod.Negate ) { + negate = !negate; + } + + if( negate ) { + sign_mode = TGSI_UTIL_SIGN_TOGGLE; + } + else { + sign_mode = TGSI_UTIL_SIGN_KEEP; + } + } + + return sign_mode; +} diff --git a/src/mesa/pipe/cell/spu/spu_vertex_fetch.c b/src/mesa/pipe/cell/spu/spu_vertex_fetch.c new file mode 100644 index 0000000000..6e86a919ce --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_vertex_fetch.c @@ -0,0 +1,393 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include <spu_mfcio.h> +#include <transpose_matrix4x4.h> + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "spu_exec.h" +#include "spu_vertex_shader.h" +#include "spu_main.h" + + +#define DRAW_DBG 0 + + +static const vec_float4 defaults = { 0.0, 0.0, 0.0, 1.0 }; + +/** + * Fetch a float[4] vertex attribute from memory, doing format/type + * conversion as needed. + * + * This is probably needed/dupliocated elsewhere, eg format + * conversion, texture sampling etc. + */ +#define FETCH_ATTRIB( NAME, SZ, CVT ) \ +static qword \ +fetch_##NAME(const void *ptr) \ +{ \ + vec_float4 attrib = defaults; \ + int i; \ + \ + for (i = 0; i < SZ; i++) { \ + attrib = spu_insert(CVT, attrib, i); \ + } \ + return (qword) attrib; \ +} + +#define CVT_64_FLOAT (float) ((double *) ptr)[i] +#define CVT_32_FLOAT ((float *) ptr)[i] + +#define CVT_8_USCALED (float) ((unsigned char *) ptr)[i] +#define CVT_16_USCALED (float) ((unsigned short *) ptr)[i] +#define CVT_32_USCALED (float) ((unsigned int *) ptr)[i] + +#define CVT_8_SSCALED (float) ((char *) ptr)[i] +#define CVT_16_SSCALED (float) ((short *) ptr)[i] +#define CVT_32_SSCALED (float) ((int *) ptr)[i] + +#define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f +#define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f +#define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f + +#define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f +#define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f +#define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f + +FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT ) +FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT ) +FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT ) +FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT ) + +FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT ) +FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT ) +FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT ) +FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT ) + +FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED ) +FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED ) +FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED ) +FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED ) + +FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED ) +FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED ) +FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED ) +FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED ) + +FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM ) +FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM ) +FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM ) +FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM ) + +FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM ) +FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM ) +FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM ) +FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM ) + +FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED ) +FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED ) +FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED ) +FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED ) + +FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED ) +FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED ) +FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED ) +FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED ) + +FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM ) +FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM ) +FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM ) +FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM ) + +FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM ) +FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM ) +FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM ) +FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM ) + +FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED ) +FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED ) +FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED ) +FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED ) + +FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED ) +FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED ) +FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED ) +FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED ) + +FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) +FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM ) +FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM ) +FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM ) + +FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM ) +FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM ) +FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM ) +FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM ) + +FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM ) +//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) + + + +static spu_fetch_func get_fetch_func( enum pipe_format format ) +{ +#if 0 + { + char tmp[80]; + pf_sprint_name(tmp, format); + _mesa_printf("%s: %s\n", __FUNCTION__, tmp); + } +#endif + + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return fetch_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return fetch_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return fetch_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return fetch_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return fetch_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return fetch_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return fetch_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return fetch_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return fetch_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return fetch_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return fetch_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return fetch_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return fetch_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return fetch_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return fetch_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return fetch_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return fetch_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return fetch_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return fetch_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return fetch_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return fetch_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return fetch_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return fetch_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return fetch_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return fetch_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return fetch_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return fetch_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return fetch_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return fetch_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return fetch_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return fetch_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return fetch_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return fetch_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return fetch_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return fetch_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return fetch_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return fetch_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return fetch_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return fetch_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return fetch_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return fetch_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return fetch_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return fetch_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return fetch_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return fetch_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return fetch_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return fetch_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return fetch_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return fetch_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return fetch_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return fetch_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return fetch_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return fetch_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return fetch_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return fetch_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return fetch_R8G8B8A8_SSCALED; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return fetch_A8R8G8B8_UNORM; + + case 0: + return NULL; /* not sure why this is needed */ + + default: + assert(0); + return NULL; + } +} + + +/** + * Fetch vertex attributes for 'count' vertices. + */ +static void generic_vertex_fetch(struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count) +{ + unsigned nr_attrs = draw->vertex_fetch.nr_attrs; + unsigned attr; + + assert(count <= 4); + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + +#if DRAW_DBG + printf("SPU: %s count = %u, nr_attrs = %u\n", + __FUNCTION__, count, nr_attrs); +#endif + + /* loop over vertex attributes (vertex shader inputs) + */ + for (attr = 0; attr < nr_attrs; attr++) { + const unsigned pitch = draw->vertex_fetch.pitch[attr]; + const uint64_t src = draw->vertex_fetch.src_ptr[attr]; + const spu_fetch_func fetch = draw->vertex_fetch.fetch[attr]; + unsigned i; + qword p[4]; + + + /* Fetch four attributes for four vertices. + * + * Could fetch directly into AOS format, but this is meant to be + * a prototype for an sse implementation, which would have + * difficulties doing that. + */ + for (i = 0; i < count; i++) { + uint8_t buffer[32] ALIGN16_ATTRIB; + const uint64_t addr = src + (elts[i] * pitch); + const unsigned size = ((addr & 0x0f) == 0) ? 16 : 32; + +#if DRAW_DBG + printf("SPU: fetching = 0x%llx\n", addr); +#endif + mfc_get(buffer, addr & ~0x0f, size, TAG_VERTEX_BUFFER, 0, 0); + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + p[i] = (*fetch)(buffer + (addr & 0x0f)); + } + + /* Be nice and zero out any missing vertices: + */ + for (/* empty */; i < 4; i++) + p[i] = si_xor(p[i], p[i]); + + /* Transpose/swizzle into vector-friendly format. Currently + * assuming that all vertex shader inputs are float[4], but this + * isn't true -- if the vertex shader only wants tex0.xy, we + * could optimize for that. + * + * To do so fully without codegen would probably require an + * excessive number of fetch functions, but we could at least + * minimize the transpose step: + */ + _transpose_matrix4x4(&machine->Inputs[attr].xyzw[0].q, p); + } +} + + +void spu_update_vertex_fetch( struct spu_vs_context *draw ) +{ + unsigned i; + + + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + draw->vertex_fetch.fetch[i] = + get_fetch_func(draw->vertex_fetch.format[i]); + } + + draw->vertex_fetch.fetch_func = generic_vertex_fetch; +} diff --git a/src/mesa/pipe/cell/spu/spu_vertex_shader.c b/src/mesa/pipe/cell/spu/spu_vertex_shader.c new file mode 100644 index 0000000000..c1cbbb6d1e --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_vertex_shader.c @@ -0,0 +1,231 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + * Ian Romanick <idr@us.ibm.com> + */ + +#include <spu_mfcio.h> + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "spu_vertex_shader.h" +#include "spu_exec.h" +#include "pipe/draw/draw_private.h" +#include "pipe/draw/draw_context.h" +#include "pipe/cell/common.h" +#include "spu_main.h" + +static INLINE unsigned +compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0; + unsigned i; + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; + if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; + if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; + if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; + if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; + if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1<<i); + } + + return mask; +} + + +/** + * Transform vertices with the current vertex program/shader + * Up to four vertices can be shaded at a time. + * \param vbuffer the input vertex data + * \param elts indexes of four input vertices + * \param count number of vertices to shade [1..4] + * \param vOut array of pointers to four output vertices + */ +static void +run_vertex_program(struct spu_vs_context *draw, + unsigned elts[4], unsigned count, + const uint64_t *vOut) +{ + struct spu_exec_machine *machine = &draw->machine; + unsigned int j; + + ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_ATTRIB_MAX); + const float *scale = draw->viewport.scale; + const float *trans = draw->viewport.translate; + + assert(count <= 4); + + machine->Processor = TGSI_PROCESSOR_VERTEX; + + ASSERT_ALIGN16(draw->constants); + machine->Consts = (float (*)[4]) draw->constants; + + machine->Inputs = ALIGN16_ASSIGN(inputs); + machine->Outputs = ALIGN16_ASSIGN(outputs); + + spu_vertex_fetch( draw, machine, elts, count ); + + /* run shader */ + spu_exec_machine_run( machine ); + + + /* store machine results */ + for (j = 0; j < count; j++) { + unsigned slot; + float x, y, z, w; + unsigned char buffer[sizeof(struct vertex_header) + + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + struct vertex_header *const tmpOut = + (struct vertex_header *) buffer; + const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) + + (sizeof(float) * 4 + * draw->num_vs_outputs)); + + mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + + /* Handle attr[0] (position) specially: + * + * XXX: Computing the clipmask should be done in the vertex + * program as a set of DP4 instructions appended to the + * user-provided code. + */ + x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane, + draw->nr_planes); + tmpOut->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + tmpOut->data[0][0] = x * scale[0] + trans[0]; + tmpOut->data[0][1] = y * scale[1] + trans[1]; + tmpOut->data[0][2] = z * scale[2] + trans[2]; + tmpOut->data[0][3] = w; + + /* Remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + + mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); + } /* loop over vertices */ +} + + +static void +spu_bind_vertex_shader(struct spu_vs_context *draw, + void *uniforms, + void *planes, + unsigned nr_planes, + unsigned num_outputs + ) +{ + draw->constants = (float (*)[4]) uniforms; + + (void) memcpy(draw->plane, planes, sizeof(float) * 4 * nr_planes); + draw->nr_planes = nr_planes; + draw->num_vs_outputs = num_outputs; + + /* specify the shader to interpret/execute */ + spu_exec_machine_init(&draw->machine, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/, + PIPE_SHADER_VERTEX); +} + + +unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] + ALIGN16_ATTRIB; + +void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs) +{ + unsigned i; + + const uint64_t immediate_addr = vs->shader.immediates; + const unsigned immediate_size = + ROUNDUP16((sizeof(float) * 4 * vs->shader.num_immediates) + + (immediate_addr & 0x0f)); + + mfc_get(immediates, immediate_addr & ~0x0f, immediate_size, + TAG_VERTEX_BUFFER, 0, 0); + + draw->machine.Instructions = (struct tgsi_full_instruction *) + vs->shader.instructions; + draw->machine.NumInstructions = vs->shader.num_instructions; + + draw->machine.Declarations = (struct tgsi_full_declaration *) + vs->shader.declarations; + draw->machine.NumDeclarations = vs->shader.num_declarations; + + draw->vertex_fetch.nr_attrs = vs->nr_attrs; + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f], + sizeof(float) * 4 * vs->shader.num_immediates); + + spu_bind_vertex_shader(draw, vs->shader.uniforms, + vs->plane, vs->nr_planes, + vs->shader.num_outputs); + + for (i = 0; i < vs->num_elts; i += 4) { + const unsigned batch_size = MIN2(vs->num_elts - i, 4); + + run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]); + } +} diff --git a/src/mesa/pipe/cell/spu/spu_vertex_shader.h b/src/mesa/pipe/cell/spu/spu_vertex_shader.h new file mode 100644 index 0000000000..c96b93ff0a --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_vertex_shader.h @@ -0,0 +1,61 @@ +#ifndef SPU_VERTEX_SHADER_H +#define SPU_VERTEX_SHADER_H + +#include "pipe/p_format.h" +#include "spu_exec.h" + +struct spu_vs_context; + +typedef qword (*spu_fetch_func)(const void *ptr); +typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count ); + +struct spu_vs_context { + struct pipe_viewport_state viewport; + + struct { + uint64_t src_ptr[PIPE_ATTRIB_MAX]; + unsigned pitch[PIPE_ATTRIB_MAX]; + enum pipe_format format[PIPE_ATTRIB_MAX]; + unsigned nr_attrs; + boolean dirty; + + spu_fetch_func fetch[PIPE_ATTRIB_MAX]; + spu_full_fetch_func fetch_func; + } vertex_fetch; + + /* Clip derived state: + */ + float plane[12][4]; + unsigned nr_planes; + + struct spu_exec_machine machine; + const float (*constants)[4]; + + unsigned num_vs_outputs; +}; + +extern void spu_update_vertex_fetch(struct spu_vs_context *draw); + +static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count) +{ + if (draw->vertex_fetch.dirty) { + spu_update_vertex_fetch(draw); + draw->vertex_fetch.dirty = 0; + } + + (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count); +} + +struct cell_command_vs; + +extern void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs); + +#endif /* SPU_VERTEX_SHADER_H */ diff --git a/src/mesa/pipe/cell/spu/spu_ztest.h b/src/mesa/pipe/cell/spu/spu_ztest.h new file mode 100644 index 0000000000..ce8ad00339 --- /dev/null +++ b/src/mesa/pipe/cell/spu/spu_ztest.h @@ -0,0 +1,135 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Zbuffer/depth test code. + */ + + +#ifndef SPU_ZTEST_H +#define SPU_ZTEST_H + + +#ifdef __SPU__ +#include <spu_intrinsics.h> +#endif + + + +/** + * Perform Z testing for a 16-bit/value Z buffer. + * + * \param zvals vector of four fragment zvalues as floats + * \param zbuf ptr to vector of ushort[8] zbuffer values. Note that this + * contains the Z values for 2 quads, 8 pixels. + * \param x x coordinate of quad (only lsbit is significant) + * \param inMask indicates which fragments in the quad are alive + * \return new mask indicating which fragments are alive after ztest + */ +static INLINE vector unsigned int +spu_z16_test_less(vector float zvals, vector unsigned short *zbuf, + uint x, vector unsigned int inMask) +{ +#define ZERO 0x80 + vector unsigned int zvals_ui4, zbuf_ui4, mask; + + /* convert floats to uints in [0, 65535] */ + zvals_ui4 = spu_convtu(zvals, 32); /* convert to [0, 2^32] */ + zvals_ui4 = spu_rlmask(zvals_ui4, -16); /* right shift 16 */ + + /* XXX this conditional could be removed with a bit of work */ + if (x & 1) { + /* convert zbuffer values from ushorts to uints */ + /* gather lower four ushorts */ + zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, + (vector unsigned int) *zbuf, + ((vector unsigned char) { + ZERO, ZERO, 8, 9, ZERO, ZERO, 10, 11, + ZERO, ZERO, 12, 13, ZERO, ZERO, 14, 15})); + /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ + mask = spu_cmpgt(zbuf_ui4, zvals_ui4); + /* mask &= inMask */ + mask = spu_and(mask, inMask); + /* zbuf = mask ? zval : zbuf */ + zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); + /* convert zbuffer values from uints back to ushorts, preserve lower 4 */ + *zbuf = (vector unsigned short) + spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, + ((vector unsigned char) { + 16, 17, 18, 19, 20, 21, 22, 23, + 2, 3, 6, 7, 10, 11, 14, 15})); + } + else { + /* convert zbuffer values from ushorts to uints */ + /* gather upper four ushorts */ + zbuf_ui4 = spu_shuffle((vector unsigned int) *zbuf, + (vector unsigned int) *zbuf, + ((vector unsigned char) { + ZERO, ZERO, 0, 1, ZERO, ZERO, 2, 3, + ZERO, ZERO, 4, 5, ZERO, ZERO, 6, 7})); + /* mask = (zbuf_ui4 < zvals_ui4) ? ~0 : 0 */ + mask = spu_cmpgt(zbuf_ui4, zvals_ui4); + /* mask &= inMask */ + mask = spu_and(mask, inMask); + /* zbuf = mask ? zval : zbuf */ + zbuf_ui4 = spu_sel(zbuf_ui4, zvals_ui4, mask); + /* convert zbuffer values from uints back to ushorts, preserve upper 4 */ + *zbuf = (vector unsigned short) + spu_shuffle(zbuf_ui4, (vector unsigned int) *zbuf, + ((vector unsigned char) { + 2, 3, 6, 7, 10, 11, 14, 15, + 24, 25, 26, 27, 28, 29, 30, 31})); + } + return mask; +#undef ZERO +} + + +/** + * As above, but Zbuffer values as 32-bit uints + */ +static INLINE vector unsigned int +spu_z32_test_less(vector float zvals, vector unsigned int *zbuf_ptr, + vector unsigned int inMask) +{ + vector unsigned int zvals_ui4, mask, zbuf = *zbuf_ptr; + + /* convert floats to uints in [0, 0xffffffff] */ + zvals_ui4 = spu_convtu(zvals, 32); + /* mask = (zbuf < zvals_ui4) ? ~0 : 0 */ + mask = spu_cmpgt(zbuf, zvals_ui4); + /* mask &= inMask */ + mask = spu_and(mask, inMask); + /* zbuf = mask ? zval : zbuf */ + *zbuf_ptr = spu_sel(zbuf, zvals_ui4, mask); + + return mask; +} + + +#endif /* SPU_ZTEST_H */ diff --git a/src/mesa/pipe/draw/Makefile b/src/mesa/pipe/draw/Makefile new file mode 100644 index 0000000000..451911a354 --- /dev/null +++ b/src/mesa/pipe/draw/Makefile @@ -0,0 +1,2 @@ +default: + cd .. ; make diff --git a/src/mesa/pipe/draw/draw_clip.c b/src/mesa/pipe/draw/draw_clip.c index 2d410e3244..61130c5600 100644 --- a/src/mesa/pipe/draw/draw_clip.c +++ b/src/mesa/pipe/draw/draw_clip.c @@ -33,6 +33,8 @@ #include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + #include "draw_context.h" #include "draw_private.h" @@ -54,6 +56,12 @@ struct clipper { struct draw_stage stage; /**< base class */ + /* Basically duplicate some of the flatshading logic here: + */ + boolean flat; + uint num_color_attribs; + uint color_attribs[4]; /* front/back primary/secondary colors */ + float (*plane)[4]; }; @@ -82,6 +90,17 @@ static void interp_attr( float *fdst, fdst[3] = LINTERP( t, fout[3], fin[3] ); } +static void copy_colors( struct draw_stage *stage, + struct vertex_header *dst, + const struct vertex_header *src ) +{ + const struct clipper *clipper = clipper_stage(stage); + uint i; + for (i = 0; i < clipper->num_color_attribs; i++) { + const uint attr = clipper->color_attribs[i]; + COPY_4FV(dst->data[attr], src->data[attr]); + } +} @@ -134,27 +153,11 @@ static void interp( const struct clipper *clip, } } -#if 0 -static INLINE void do_tri( struct draw_stage *next, - struct prim_header *header ) -{ - unsigned i; - for (i = 0; i < 3; i++) { - float *ndc = header->v[i]->data[0]; - _mesa_printf("ndc %f %f %f\n", ndc[0], ndc[1], ndc[2]); - assert(ndc[0] >= -1 && ndc[0] <= 641); - assert(ndc[1] >= 30 && ndc[1] <= 481); - } - _mesa_printf("\n"); - next->tri(next, header); -} -#endif - static void emit_poly( struct draw_stage *stage, struct vertex_header **inlist, unsigned n, - const struct prim_header *origPrim) + const struct prim_header *origPrim) { struct prim_header header; unsigned i; @@ -163,16 +166,16 @@ static void emit_poly( struct draw_stage *stage, header.det = origPrim->det; for (i = 2; i < n; i++) { - header.v[0] = inlist[0]; - header.v[1] = inlist[i-1]; - header.v[2] = inlist[i]; + header.v[0] = inlist[i-1]; + header.v[1] = inlist[i]; + header.v[2] = inlist[0]; /* keep in v[2] for flatshading */ { - unsigned tmp0 = header.v[0]->edgeflag; + unsigned tmp1 = header.v[1]->edgeflag; unsigned tmp2 = header.v[2]->edgeflag; - if (i != 2) header.v[0]->edgeflag = 0; - if (i != n-1) header.v[2]->edgeflag = 0; + if (i != n-1) header.v[1]->edgeflag = 0; + if (i != 2) header.v[2]->edgeflag = 0; header.edgeflags = ((header.v[0]->edgeflag << 0) | (header.v[1]->edgeflag << 1) | @@ -180,27 +183,13 @@ static void emit_poly( struct draw_stage *stage, stage->next->tri( stage->next, &header ); - header.v[0]->edgeflag = tmp0; + header.v[1]->edgeflag = tmp1; header.v[2]->edgeflag = tmp2; } } } -#if 0 -static void emit_poly( struct draw_stage *stage ) -{ - unsigned i; - - for (i = 2; i < n; i++) { - header->v[0] = inlist[0]; - header->v[1] = inlist[i-1]; - header->v[2] = inlist[i]; - - stage->next->tri( stage->next, header ); - } -} -#endif /* Clip a triangle against the viewport and user clip planes. @@ -281,6 +270,18 @@ do_clip_tri( struct draw_stage *stage, } } + /* If flat-shading, copy color to new provoking vertex. + */ + if (clipper->flat && inlist[0] != header->v[2]) { + if (1) { + inlist[0] = dup_vert(stage, inlist[0], tmpnr++); + } + + copy_colors(stage, inlist[0], header->v[2]); + } + + + /* Emit the polygon as triangles to the setup stage: */ if (n >= 3) @@ -328,6 +329,10 @@ do_clip_line( struct draw_stage *stage, if (v0->clipmask) { interp( clipper, stage->tmp[0], t0, v0, v1 ); + + if (clipper->flat) + copy_colors(stage, stage->tmp[0], v0); + newprim.v[0] = stage->tmp[0]; } else { @@ -393,8 +398,55 @@ clip_tri( struct draw_stage *stage, } } -static void clip_flush( struct draw_stage *stage, unsigned flags ) +/* Update state. Could further delay this until we hit the first + * primitive that really requires clipping. + */ +static void +clip_init_state( struct draw_stage *stage ) +{ + struct clipper *clipper = clipper_stage( stage ); + + clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; + + if (clipper->flat) { + const struct pipe_shader_state *vs = stage->draw->vertex_shader->state; + uint i; + + clipper->num_color_attribs = 0; + for (i = 0; i < vs->num_outputs; i++) { + if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR || + vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + clipper->color_attribs[clipper->num_color_attribs++] = i; + } + } + } + + stage->tri = clip_tri; + stage->line = clip_line; +} + + + +static void clip_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + clip_init_state( stage ); + stage->tri( stage, header ); +} + +static void clip_first_line( struct draw_stage *stage, + struct prim_header *header ) +{ + clip_init_state( stage ); + stage->line( stage, header ); +} + + +static void clip_flush( struct draw_stage *stage, + unsigned flags ) { + stage->tri = clip_first_tri; + stage->line = clip_first_line; stage->next->flush( stage->next, flags ); } @@ -420,12 +472,12 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw ) { struct clipper *clipper = CALLOC_STRUCT(clipper); - draw_alloc_tmps( &clipper->stage, MAX_CLIPPED_VERTICES ); + draw_alloc_tmps( &clipper->stage, MAX_CLIPPED_VERTICES+1 ); clipper->stage.draw = draw; clipper->stage.point = clip_point; - clipper->stage.line = clip_line; - clipper->stage.tri = clip_tri; + clipper->stage.line = clip_first_line; + clipper->stage.tri = clip_first_tri; clipper->stage.flush = clip_flush; clipper->stage.reset_stipple_counter = clip_reset_stipple_counter; clipper->stage.destroy = clip_destroy; diff --git a/src/mesa/pipe/draw/draw_context.c b/src/mesa/pipe/draw/draw_context.c index e8ca1f035b..b15f57c824 100644 --- a/src/mesa/pipe/draw/draw_context.c +++ b/src/mesa/pipe/draw/draw_context.c @@ -71,12 +71,15 @@ struct draw_context *draw_create( void ) */ { uint i; - char *tmp = (char*) MALLOC( Elements(draw->vcache.vertex) * MAX_VERTEX_SIZE ); + const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; + char *tmp = align_malloc(Elements(draw->vcache.vertex) * size, 16); for (i = 0; i < Elements(draw->vcache.vertex); i++) - draw->vcache.vertex[i] = (struct vertex_header *)(tmp + i * MAX_VERTEX_SIZE); + draw->vcache.vertex[i] = (struct vertex_header *)(tmp + i * size); } + draw->shader_queue_flush = draw_vertex_shader_queue_flush; + draw->convert_wide_points = TRUE; draw->convert_wide_lines = TRUE; @@ -103,7 +106,7 @@ void draw_destroy( struct draw_context *draw ) if (draw->pipeline.rasterize) draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); tgsi_exec_machine_free_data(&draw->machine); - FREE( draw->vcache.vertex[0] ); /* Frees all the vertices. */ + align_free( draw->vcache.vertex[0] ); /* Frees all the vertices. */ FREE( draw ); } diff --git a/src/mesa/pipe/draw/draw_prim.c b/src/mesa/pipe/draw/draw_prim.c index 243381aec0..51e2242719 100644 --- a/src/mesa/pipe/draw/draw_prim.c +++ b/src/mesa/pipe/draw/draw_prim.c @@ -30,6 +30,8 @@ * Keith Whitwell <keith@tungstengraphics.com> */ +#include "pipe/p_debug.h" + #include "draw_private.h" #include "draw_context.h" @@ -60,38 +62,55 @@ static void draw_prim_queue_flush( struct draw_context *draw ) unsigned i; if (0) - fprintf(stdout,"Flushing with %d prims, %d verts\n", - draw->pq.queue_nr, draw->vs.queue_nr); + debug_printf("Flushing with %d prims, %d verts\n", + draw->pq.queue_nr, draw->vs.queue_nr); - if (draw->pq.queue_nr == 0) - return; + assert (draw->pq.queue_nr != 0); /* NOTE: we cannot save draw->pipeline->first in a local var because * draw->pipeline->first is often changed by the first call to tri(), * line(), etc. */ - switch (draw->reduced_prim) { - case RP_TRI: - for (i = 0; i < draw->pq.queue_nr; i++) { - if (draw->pq.queue[i].reset_line_stipple) - draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); - - draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] ); + if (draw->rasterizer->line_stipple_enable) { + switch (draw->reduced_prim) { + case RP_TRI: + for (i = 0; i < draw->pq.queue_nr; i++) { + if (draw->pq.queue[i].reset_line_stipple) + draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); + + draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] ); + } + break; + case RP_LINE: + for (i = 0; i < draw->pq.queue_nr; i++) { + if (draw->pq.queue[i].reset_line_stipple) + draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); + + draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] ); + } + break; + case RP_POINT: + draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); + for (i = 0; i < draw->pq.queue_nr; i++) + draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] ); + break; } - break; - case RP_LINE: - for (i = 0; i < draw->pq.queue_nr; i++) { - if (draw->pq.queue[i].reset_line_stipple) - draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); - - draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] ); + } + else { + switch (draw->reduced_prim) { + case RP_TRI: + for (i = 0; i < draw->pq.queue_nr; i++) + draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] ); + break; + case RP_LINE: + for (i = 0; i < draw->pq.queue_nr; i++) + draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] ); + break; + case RP_POINT: + for (i = 0; i < draw->pq.queue_nr; i++) + draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] ); + break; } - break; - case RP_POINT: - draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); - for (i = 0; i < draw->pq.queue_nr; i++) - draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] ); - break; } draw->pq.queue_nr = 0; @@ -103,16 +122,18 @@ static void draw_prim_queue_flush( struct draw_context *draw ) void draw_do_flush( struct draw_context *draw, unsigned flags ) { if (0) - fprintf(stdout,"Flushing with %d verts, %d prims\n", - draw->vs.queue_nr, - draw->pq.queue_nr ); + debug_printf("Flushing with %d verts, %d prims\n", + draw->vs.queue_nr, + draw->pq.queue_nr ); if (flags >= DRAW_FLUSH_SHADER_QUEUE) { - draw_vertex_shader_queue_flush(draw); + if (draw->vs.queue_nr) + (*draw->shader_queue_flush)(draw); if (flags >= DRAW_FLUSH_PRIM_QUEUE) { - draw_prim_queue_flush(draw); + if (draw->pq.queue_nr) + draw_prim_queue_flush(draw); if (flags >= DRAW_FLUSH_VERTEX_CACHE) { draw_vertex_cache_invalidate(draw); @@ -138,11 +159,11 @@ static struct prim_header *get_queued_prim( struct draw_context *draw, unsigned nr_verts ) { if (!draw_vertex_cache_check_space( draw, nr_verts )) { -// fprintf(stderr, "v"); +// debug_printf("v"); draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE ); } else if (draw->pq.queue_nr == PRIM_QUEUE_LENGTH) { -// fprintf(stderr, "p"); +// debug_printf("p"); draw_do_flush( draw, DRAW_FLUSH_PRIM_QUEUE ); } @@ -230,7 +251,7 @@ static void do_ef_triangle( struct draw_context *draw, } -static void do_quad( struct draw_context *draw, +static void do_ef_quad( struct draw_context *draw, unsigned v0, unsigned v1, unsigned v2, @@ -242,6 +263,16 @@ static void do_quad( struct draw_context *draw, do_ef_triangle( draw, 0, omitEdge3, v1, v2, v3 ); } +static void do_quad( struct draw_context *draw, + unsigned v0, + unsigned v1, + unsigned v2, + unsigned v3 ) +{ + do_triangle( draw, v0, v1, v3 ); + do_triangle( draw, v1, v2, v3 ); +} + /** * Main entrypoint to draw some number of points/lines/triangles @@ -251,8 +282,10 @@ draw_prim( struct draw_context *draw, unsigned prim, unsigned start, unsigned count ) { unsigned i; + boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL); -// _mesa_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count ); +// debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count ); switch (prim) { case PIPE_PRIM_POINTS: @@ -288,24 +321,32 @@ draw_prim( struct draw_context *draw, break; case PIPE_PRIM_LINE_STRIP: - if (count >= 2) { - for (i = 1; i < count; i++) { - do_line( draw, - i == 1, - start + i - 1, - start + i ); - } + for (i = 1; i < count; i++) { + do_line( draw, + i == 1, + start + i - 1, + start + i ); } break; case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - do_ef_triangle( draw, - 1, - ~0, + if (unfilled) { + for (i = 0; i+2 < count; i += 3) { + do_ef_triangle( draw, + 1, + ~0, + start + i + 0, + start + i + 1, + start + i + 2 ); + } + } + else { + for (i = 0; i+2 < count; i += 3) { + do_triangle( draw, start + i + 0, start + i + 1, start + i + 2 ); + } } break; @@ -339,27 +380,49 @@ draw_prim( struct draw_context *draw, case PIPE_PRIM_QUADS: - for (i = 0; i+3 < count; i += 4) { - do_quad( draw, - start + i + 0, - start + i + 1, - start + i + 2, - start + i + 3); + if (unfilled) { + for (i = 0; i+3 < count; i += 4) { + do_ef_quad( draw, + start + i + 0, + start + i + 1, + start + i + 2, + start + i + 3); + } + } + else { + for (i = 0; i+3 < count; i += 4) { + do_quad( draw, + start + i + 0, + start + i + 1, + start + i + 2, + start + i + 3); + } } break; case PIPE_PRIM_QUAD_STRIP: - for (i = 0; i+3 < count; i += 2) { - do_quad( draw, - start + i + 2, - start + i + 0, - start + i + 1, - start + i + 3); + if (unfilled) { + for (i = 0; i+3 < count; i += 2) { + do_ef_quad( draw, + start + i + 2, + start + i + 0, + start + i + 1, + start + i + 3); + } + } + else { + for (i = 0; i+3 < count; i += 2) { + do_quad( draw, + start + i + 2, + start + i + 0, + start + i + 1, + start + i + 3); + } } break; case PIPE_PRIM_POLYGON: - if (count >= 3) { + if (unfilled) { unsigned ef_mask = (1<<2) | (1<<0); for (i = 0; i+2 < count; i++) { @@ -377,6 +440,14 @@ draw_prim( struct draw_context *draw, ef_mask &= ~(1<<2); } } + else { + for (i = 0; i+2 < count; i++) { + do_triangle( draw, + start + i + 1, + start + i + 2, + start + 0); + } + } break; default: diff --git a/src/mesa/pipe/draw/draw_private.h b/src/mesa/pipe/draw/draw_private.h index 1e59f5bd8d..7782db0477 100644 --- a/src/mesa/pipe/draw/draw_private.h +++ b/src/mesa/pipe/draw/draw_private.h @@ -141,6 +141,10 @@ struct draw_vertex_shader { /* Internal function for vertex fetch. */ typedef void (*fetch_func)(const void *ptr, float *attrib); +typedef void (*full_fetch_func)( struct draw_context *draw, + struct tgsi_exec_machine *machine, + const unsigned *elts, + unsigned count ); @@ -210,6 +214,7 @@ struct draw_context unsigned pitch[PIPE_ATTRIB_MAX]; fetch_func fetch[PIPE_ATTRIB_MAX]; unsigned nr_attrs; + full_fetch_func fetch_func; } vertex_fetch; /* Post-tnl vertex cache: @@ -235,6 +240,11 @@ struct draw_context unsigned queue_nr; } vs; + /** + * Run the vertex shader on all vertices in the vertex queue. + */ + void (*shader_queue_flush)(struct draw_context *draw); + /* Prim pipeline queue: */ struct { @@ -249,6 +259,8 @@ struct draw_context #ifdef MESA_LLVM struct gallivm_cpu_engine *engine; #endif + + void *driver_private; }; @@ -287,10 +299,6 @@ extern void draw_vertex_shader_queue_flush_llvm( struct draw_context *draw ); struct tgsi_exec_machine; extern void draw_update_vertex_fetch( struct draw_context *draw ); -extern void draw_vertex_fetch( struct draw_context *draw, - struct tgsi_exec_machine *machine, - const unsigned *elts, - unsigned count ); #define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */ diff --git a/src/mesa/pipe/draw/draw_validate.c b/src/mesa/pipe/draw/draw_validate.c index 86d5a5f814..4375ebabbc 100644 --- a/src/mesa/pipe/draw/draw_validate.c +++ b/src/mesa/pipe/draw/draw_validate.c @@ -78,6 +78,11 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) precalc_flat = 1; /* only needed for triangles really */ need_det = 1; } + + if (draw->rasterizer->flatshade && precalc_flat) { + draw->pipeline.flatshade->next = next; + next = draw->pipeline.flatshade; + } if (draw->rasterizer->offset_cw || draw->rasterizer->offset_ccw) { @@ -110,13 +115,8 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) { draw->pipeline.clip->next = next; next = draw->pipeline.clip; - precalc_flat = 1; /* XXX: FIX ME! Only needed for clipped prims */ } - if (draw->rasterizer->flatshade && precalc_flat) { - draw->pipeline.flatshade->next = next; - next = draw->pipeline.flatshade; - } draw->pipeline.first = next; return next; diff --git a/src/mesa/pipe/draw/draw_vbuf.c b/src/mesa/pipe/draw/draw_vbuf.c index 1e260c6156..be96c8fdeb 100644 --- a/src/mesa/pipe/draw/draw_vbuf.c +++ b/src/mesa/pipe/draw/draw_vbuf.c @@ -34,13 +34,14 @@ */ -#include <assert.h> - -#include "pipe/draw/draw_vbuf.h" -#include "pipe/draw/draw_private.h" -#include "pipe/draw/draw_vertex.h" +#include "pipe/p_debug.h" #include "pipe/p_util.h" +#include "draw_vbuf.h" +#include "draw_private.h" +#include "draw_vertex.h" +#include "draw_vf.h" + /** * Vertex buffer emit stage. @@ -55,6 +56,8 @@ struct vbuf_stage { /** Vertex size in bytes */ unsigned vertex_size; + struct draw_vertex_fetch *vf; + /* FIXME: we have no guarantee that 'unsigned' is 32bit */ /** Vertices in hardware format */ @@ -110,88 +113,175 @@ check_space( struct vbuf_stage *vbuf, unsigned nr ) } -/** - * Extract the needed fields from post-transformed vertex and emit - * a hardware(driver) vertex. - * Recall that the vertices are constructed by the 'draw' module and - * have a couple of slots at the beginning (1-dword header, 4-dword - * clip pos) that we ignore here. We only use the vertex->data[] fields. - */ -static INLINE void -emit_vertex( struct vbuf_stage *vbuf, - struct vertex_header *vertex ) +#if 0 +static INLINE void +dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) { - const struct vertex_info *vinfo = vbuf->vinfo; - - uint i; - uint count = 0; /* for debug/sanity */ - assert(vinfo == vbuf->render->get_vertex_info(vbuf->render)); - -// fprintf(stderr, "emit vertex %d to %p\n", -// vbuf->nr_vertices, vbuf->vertex_ptr); - - if(vertex->vertex_id != UNDEFINED_VERTEX_ID) { - if(vertex->vertex_id < vbuf->nr_vertices) - return; - else - fprintf(stderr, "Bad vertex id 0x%04x (>= 0x%04x)\n", - vertex->vertex_id, vbuf->nr_vertices); - return; - } - - vertex->vertex_id = vbuf->nr_vertices++; + unsigned i, j, k; for (i = 0; i < vinfo->num_attribs; i++) { - uint j = vinfo->src_index[i]; + j = vinfo->src_index[i]; switch (vinfo->emit[i]) { case EMIT_OMIT: - /* no-op */ + debug_printf("EMIT_OMIT:"); break; case EMIT_ALL: - /* just copy the whole vertex as-is to the vbuf */ assert(i == 0); - memcpy(vbuf->vertex_ptr, vertex, vinfo->size * 4); - vbuf->vertex_ptr += vinfo->size; - return; + assert(j == 0); + debug_printf("EMIT_ALL:\t"); + for(k = 0; k < vinfo->size*4; ++k) + debug_printf("%02x ", *data++); + break; case EMIT_1F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - count++; + debug_printf("EMIT_1F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); break; case EMIT_1F_PSIZE: - *vbuf->vertex_ptr++ = fui(vbuf->stage.draw->rasterizer->point_size); - count++; + debug_printf("EMIT_1F_PSIZE:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); break; case EMIT_2F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); - count += 2; + debug_printf("EMIT_2F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); break; case EMIT_3F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][2]); - count += 3; + debug_printf("EMIT_3F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + data += sizeof(float); break; case EMIT_4F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][2]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][3]); - count += 4; + debug_printf("EMIT_4F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); break; case EMIT_4UB: - *vbuf->vertex_ptr++ = pack_ub4(float_to_ubyte( vertex->data[j][2] ), - float_to_ubyte( vertex->data[j][1] ), - float_to_ubyte( vertex->data[j][0] ), - float_to_ubyte( vertex->data[j][3] )); - count += 1; + debug_printf("EMIT_4UB:\t"); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); break; default: assert(0); } + debug_printf("\n"); + } + debug_printf("\n"); +} +#endif + + +/** + * Extract the needed fields from post-transformed vertex and emit + * a hardware(driver) vertex. + * Recall that the vertices are constructed by the 'draw' module and + * have a couple of slots at the beginning (1-dword header, 4-dword + * clip pos) that we ignore here. We only use the vertex->data[] fields. + */ +static INLINE void +emit_vertex( struct vbuf_stage *vbuf, + struct vertex_header *vertex ) +{ +#if 0 + debug_printf("emit vertex %d to %p\n", + vbuf->nr_vertices, vbuf->vertex_ptr); +#endif + + if(vertex->vertex_id != UNDEFINED_VERTEX_ID) { + if(vertex->vertex_id < vbuf->nr_vertices) + return; + else + debug_printf("Bad vertex id 0x%04x (>= 0x%04x)\n", + vertex->vertex_id, vbuf->nr_vertices); + return; + } + + vertex->vertex_id = vbuf->nr_vertices++; + + if(!vbuf->vf) { + const struct vertex_info *vinfo = vbuf->vinfo; + uint i; + uint count = 0; /* for debug/sanity */ + + assert(vinfo == vbuf->render->get_vertex_info(vbuf->render)); + + for (i = 0; i < vinfo->num_attribs; i++) { + uint j = vinfo->src_index[i]; + switch (vinfo->emit[i]) { + case EMIT_OMIT: + /* no-op */ + break; + case EMIT_ALL: + /* just copy the whole vertex as-is to the vbuf */ + assert(i == 0); + assert(j == 0); + memcpy(vbuf->vertex_ptr, vertex, vinfo->size * 4); + vbuf->vertex_ptr += vinfo->size; + count += vinfo->size; + break; + case EMIT_1F: + *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); + count++; + break; + case EMIT_1F_PSIZE: + *vbuf->vertex_ptr++ = fui(vbuf->stage.draw->rasterizer->point_size); + count++; + break; + case EMIT_2F: + *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); + *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); + count += 2; + break; + case EMIT_3F: + *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); + *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); + *vbuf->vertex_ptr++ = fui(vertex->data[j][2]); + count += 3; + break; + case EMIT_4F: + *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); + *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); + *vbuf->vertex_ptr++ = fui(vertex->data[j][2]); + *vbuf->vertex_ptr++ = fui(vertex->data[j][3]); + count += 4; + break; + case EMIT_4UB: + *vbuf->vertex_ptr++ = pack_ub4(float_to_ubyte( vertex->data[j][2] ), + float_to_ubyte( vertex->data[j][1] ), + float_to_ubyte( vertex->data[j][0] ), + float_to_ubyte( vertex->data[j][3] )); + count += 1; + break; + default: + assert(0); + } + } + assert(count == vinfo->size); +#if 0 + { + static float data[256]; + draw_vf_emit_vertex(vbuf->vf, vertex, data); + if(memcmp((uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size, data, vbuf->vertex_size)) { + debug_printf("With VF:\n"); + dump_emitted_vertex(vbuf->vinfo, (uint8_t *)data); + debug_printf("Without VF:\n"); + dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size); + assert(0); + } + } +#endif + } + else { + draw_vf_emit_vertex(vbuf->vf, vertex, vbuf->vertex_ptr); + + vbuf->vertex_ptr += vbuf->vertex_size/4; } - assert(count == vinfo->size); } @@ -269,6 +359,10 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint newprim ) vbuf->vinfo = vinfo; vbuf->vertex_size = vertex_size; + if(vbuf->vf) + draw_vf_set_vertex_info(vbuf->vf, + vbuf->vinfo, + vbuf->stage.draw->rasterizer->point_size); if (!vbuf->vertices) vbuf_alloc_vertices(vbuf); @@ -423,7 +517,12 @@ static void vbuf_destroy( struct draw_stage *stage ) { struct vbuf_stage *vbuf = vbuf_stage( stage ); - align_free( vbuf->indices ); + if(vbuf->indices) + align_free( vbuf->indices ); + + if(vbuf->vf) + draw_vf_destroy( vbuf->vf ); + FREE( stage ); } @@ -436,6 +535,9 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw, { struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage); + if(!vbuf) + return NULL; + vbuf->stage.draw = draw; vbuf->stage.point = vbuf_first_point; vbuf->stage.line = vbuf_first_line; @@ -450,11 +552,16 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw, vbuf->max_indices = render->max_indices; vbuf->indices = (ushort *) align_malloc( vbuf->max_indices * sizeof(vbuf->indices[0]), 16 ); + if(!vbuf->indices) + vbuf_destroy(&vbuf->stage); vbuf->vertices = NULL; vbuf->vertex_ptr = vbuf->vertices; vbuf->prim = ~0; + if(!GETENV("GALLIUM_NOVF")) + vbuf->vf = draw_vf_create(); + return &vbuf->stage; } diff --git a/src/mesa/pipe/draw/draw_vertex_fetch.c b/src/mesa/pipe/draw/draw_vertex_fetch.c index fb64723a19..e13df04605 100644 --- a/src/mesa/pipe/draw/draw_vertex_fetch.c +++ b/src/mesa/pipe/draw/draw_vertex_fetch.c @@ -62,50 +62,244 @@ fetch_##NAME(const void *ptr, float *attrib) \ } \ } +#define CVT_64_FLOAT (float) ((double *) ptr)[i] #define CVT_32_FLOAT ((float *) ptr)[i] + +#define CVT_8_USCALED (float) ((unsigned char *) ptr)[i] +#define CVT_16_USCALED (float) ((unsigned short *) ptr)[i] +#define CVT_32_USCALED (float) ((unsigned int *) ptr)[i] + +#define CVT_8_SSCALED (float) ((char *) ptr)[i] +#define CVT_16_SSCALED (float) ((short *) ptr)[i] #define CVT_32_SSCALED (float) ((int *) ptr)[i] + #define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f +#define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f +#define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f + +#define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f +#define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f +#define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f + +FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT ) +FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT ) +FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT ) +FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT ) FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT ) FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT ) FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT ) FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT ) + +FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED ) +FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED ) +FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED ) +FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED ) + FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED ) FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED ) FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED ) FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED ) + +FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM ) +FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM ) +FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM ) +FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM ) + +FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM ) +FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM ) +FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM ) +FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM ) + +FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED ) +FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED ) +FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED ) +FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED ) + +FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED ) +FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED ) +FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED ) +FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED ) + +FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM ) +FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM ) +FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM ) +FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM ) + +FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM ) +FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM ) +FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM ) +FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM ) + +FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED ) +FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED ) +FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED ) +FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED ) + +FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED ) +FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED ) +FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED ) +FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED ) + +FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) +FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM ) +FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM ) +FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM ) + +FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM ) +FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM ) +FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM ) +FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM ) + FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM ) -FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) +//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) static fetch_func get_fetch_func( enum pipe_format format ) { +#if 0 + { + char tmp[80]; + pf_sprint_name(tmp, format); + debug_printf("%s: %s\n", __FUNCTION__, tmp); + } +#endif + switch (format) { - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return fetch_R32G32B32A32_FLOAT; - case PIPE_FORMAT_R32G32B32_FLOAT: - return fetch_R32G32B32_FLOAT; - case PIPE_FORMAT_R32G32_FLOAT: - return fetch_R32G32_FLOAT; + case PIPE_FORMAT_R64_FLOAT: + return fetch_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return fetch_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return fetch_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return fetch_R64G64B64A64_FLOAT; + case PIPE_FORMAT_R32_FLOAT: return fetch_R32_FLOAT; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - return fetch_R32G32B32A32_SSCALED; - case PIPE_FORMAT_R32G32B32_SSCALED: - return fetch_R32G32B32_SSCALED; - case PIPE_FORMAT_R32G32_SSCALED: - return fetch_R32G32_SSCALED; + case PIPE_FORMAT_R32G32_FLOAT: + return fetch_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return fetch_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return fetch_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return fetch_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return fetch_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return fetch_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return fetch_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return fetch_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return fetch_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return fetch_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return fetch_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return fetch_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return fetch_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return fetch_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return fetch_R32G32B32A32_SNORM; + case PIPE_FORMAT_R32_SSCALED: return fetch_R32_SSCALED; - case PIPE_FORMAT_A8R8G8B8_UNORM: - return fetch_A8R8G8B8_UNORM; + case PIPE_FORMAT_R32G32_SSCALED: + return fetch_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return fetch_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return fetch_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return fetch_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return fetch_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return fetch_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return fetch_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return fetch_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return fetch_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return fetch_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return fetch_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return fetch_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return fetch_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return fetch_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return fetch_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return fetch_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return fetch_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return fetch_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return fetch_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return fetch_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return fetch_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return fetch_R8G8B8_UNORM; case PIPE_FORMAT_R8G8B8A8_UNORM: return fetch_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return fetch_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return fetch_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return fetch_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return fetch_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return fetch_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return fetch_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return fetch_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return fetch_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return fetch_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return fetch_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return fetch_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return fetch_R8G8B8A8_SSCALED; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return fetch_A8R8G8B8_UNORM; + case 0: - return NULL; + return NULL; /* not sure why this is needed */ + default: - /* Lots of missing cases! */ assert(0); return NULL; } @@ -126,47 +320,108 @@ transpose_4x4( float *out, const float *in ) } - -void draw_update_vertex_fetch( struct draw_context *draw ) + +static void fetch_xyz_rgb( struct draw_context *draw, + struct tgsi_exec_machine *machine, + const unsigned *elts, + unsigned count ) { - unsigned nr_attrs, i; + const unsigned *pitch = draw->vertex_fetch.pitch; + const ubyte **src = draw->vertex_fetch.src_ptr; + int i; - /* this may happend during context init */ - if (!draw->vertex_shader) - return; + assert(count <= 4); - nr_attrs = draw->vertex_shader->state->num_inputs; +// debug_printf("%s\n", __FUNCTION__); - for (i = 0; i < nr_attrs; i++) { - unsigned buf = draw->vertex_element[i].vertex_buffer_index; - enum pipe_format format = draw->vertex_element[i].src_format; + /* loop over vertex attributes (vertex shader inputs) + */ - draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] + - draw->vertex_buffer[buf].buffer_offset + - draw->vertex_element[i].src_offset; + for (i = 0; i < 4; i++) { + { + const float *in = (const float *)(src[0] + elts[i] * pitch[0]); + float *out = &machine->Inputs[0].xyzw[0].f[i]; + out[0] = in[0]; + out[4] = in[1]; + out[8] = in[2]; + out[12] = 1.0f; + } - draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch; - draw->vertex_fetch.fetch[i] = get_fetch_func( format ); + { + const float *in = (const float *)(src[1] + elts[i] * pitch[1]); + float *out = &machine->Inputs[1].xyzw[0].f[i]; + out[0] = in[0]; + out[4] = in[1]; + out[8] = in[2]; + out[12] = 1.0f; + } } +} - draw->vertex_fetch.nr_attrs = nr_attrs; + + + +static void fetch_xyz_rgb_st( struct draw_context *draw, + struct tgsi_exec_machine *machine, + const unsigned *elts, + unsigned count ) +{ + const unsigned *pitch = draw->vertex_fetch.pitch; + const ubyte **src = draw->vertex_fetch.src_ptr; + int i; + + assert(count <= 4); + + /* loop over vertex attributes (vertex shader inputs) + */ + + for (i = 0; i < 4; i++) { + { + const float *in = (const float *)(src[0] + elts[i] * pitch[0]); + float *out = &machine->Inputs[0].xyzw[0].f[i]; + out[0] = in[0]; + out[4] = in[1]; + out[8] = in[2]; + out[12] = 1.0f; + } + + { + const float *in = (const float *)(src[1] + elts[i] * pitch[1]); + float *out = &machine->Inputs[1].xyzw[0].f[i]; + out[0] = in[0]; + out[4] = in[1]; + out[8] = in[2]; + out[12] = 1.0f; + } + + { + const float *in = (const float *)(src[2] + elts[i] * pitch[2]); + float *out = &machine->Inputs[2].xyzw[0].f[i]; + out[0] = in[0]; + out[4] = in[1]; + out[8] = 0.0f; + out[12] = 1.0f; + } + } } + + /** * Fetch vertex attributes for 'count' vertices. */ -void draw_vertex_fetch( struct draw_context *draw, - struct tgsi_exec_machine *machine, - const unsigned *elts, - unsigned count ) +static void generic_vertex_fetch( struct draw_context *draw, + struct tgsi_exec_machine *machine, + const unsigned *elts, + unsigned count ) { unsigned nr_attrs = draw->vertex_fetch.nr_attrs; unsigned attr; assert(count <= 4); -// _mesa_printf("%s %d\n", __FUNCTION__, count); +// debug_printf("%s %d\n", __FUNCTION__, count); /* loop over vertex attributes (vertex shader inputs) */ @@ -206,3 +461,50 @@ void draw_vertex_fetch( struct draw_context *draw, } } + + +void draw_update_vertex_fetch( struct draw_context *draw ) +{ + unsigned nr_attrs, i; + +// debug_printf("%s\n", __FUNCTION__); + + /* this may happend during context init */ + if (!draw->vertex_shader) + return; + + nr_attrs = draw->vertex_shader->state->num_inputs; + + for (i = 0; i < nr_attrs; i++) { + unsigned buf = draw->vertex_element[i].vertex_buffer_index; + enum pipe_format format = draw->vertex_element[i].src_format; + + draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] + + draw->vertex_buffer[buf].buffer_offset + + draw->vertex_element[i].src_offset; + + draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch; + draw->vertex_fetch.fetch[i] = get_fetch_func( format ); + } + + draw->vertex_fetch.nr_attrs = nr_attrs; + + draw->vertex_fetch.fetch_func = generic_vertex_fetch; + + switch (nr_attrs) { + case 2: + if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT && + draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT) + draw->vertex_fetch.fetch_func = fetch_xyz_rgb; + break; + case 3: + if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT && + draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT && + draw->vertex_element[2].src_format == PIPE_FORMAT_R32G32_FLOAT) + draw->vertex_fetch.fetch_func = fetch_xyz_rgb_st; + break; + default: + break; + } + +} diff --git a/src/mesa/pipe/draw/draw_vertex_shader.c b/src/mesa/pipe/draw/draw_vertex_shader.c index 3041974b9a..5ca93aa615 100644 --- a/src/mesa/pipe/draw/draw_vertex_shader.c +++ b/src/mesa/pipe/draw/draw_vertex_shader.c @@ -110,7 +110,7 @@ run_vertex_program(struct draw_context *draw, machine->Inputs = ALIGN16_ASSIGN(inputs); machine->Outputs = ALIGN16_ASSIGN(outputs); - draw_vertex_fetch( draw, machine, elts, count ); + draw->vertex_fetch.fetch_func( draw, machine, elts, count ); /* run shader */ #if defined(__i386__) || defined(__386__) @@ -121,11 +121,16 @@ run_vertex_program(struct draw_context *draw, = (struct draw_vertex_shader *)draw->vertex_shader; codegen_function func = (codegen_function) x86_get_func( &shader->sse2_program ); - func( - machine->Inputs, - machine->Outputs, - machine->Consts, - machine->Temps ); + + if (func) + func( + machine->Inputs, + machine->Outputs, + machine->Consts, + machine->Temps ); + else + /* interpreter */ + tgsi_exec_machine_run( machine ); } else #endif @@ -166,7 +171,7 @@ run_vertex_program(struct draw_context *draw, vOut[j]->data[0][3] = w; #if DBG_VS - printf("output[%d]win: %f %f %f %f\n", j, + debug_printf("output[%d]win: %f %f %f %f\n", j, vOut[j]->data[0][0], vOut[j]->data[0][1], vOut[j]->data[0][2], @@ -181,7 +186,7 @@ run_vertex_program(struct draw_context *draw, vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; #if DBG_VS - printf("output[%d][%d]: %f %f %f %f\n", j, slot, + debug_printf("output[%d][%d]: %f %f %f %f\n", j, slot, vOut[j]->data[slot][0], vOut[j]->data[slot][1], vOut[j]->data[slot][2], @@ -199,13 +204,15 @@ run_vertex_program(struct draw_context *draw, void draw_vertex_shader_queue_flush(struct draw_context *draw) { - unsigned i, j; + unsigned i; + + assert(draw->vs.queue_nr != 0); /* XXX: do this on statechange: */ draw_update_vertex_fetch( draw ); -// fprintf(stderr, " q(%d) ", draw->vs.queue_nr ); +// debug_printf( " q(%d) ", draw->vs.queue_nr ); #ifdef MESA_LLVM if (draw->vertex_shader->llvm_prog) { draw_vertex_shader_queue_flush_llvm(draw); @@ -217,14 +224,18 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) for (i = 0; i < draw->vs.queue_nr; i += 4) { struct vertex_header *dests[4]; unsigned elts[4]; - int n; + int j, n = MIN2(4, draw->vs.queue_nr - i); - for (j = 0; j < 4; j++) { + for (j = 0; j < n; j++) { elts[j] = draw->vs.queue[i + j].elt; dests[j] = draw->vs.queue[i + j].dest; } - n = MIN2(4, draw->vs.queue_nr - i); + for ( ; j < 4; j++) { + elts[j] = elts[0]; + dests[j] = dests[0]; + } + assert(n > 0); assert(n <= 4); @@ -263,7 +274,12 @@ draw_create_vertex_shader(struct draw_context *draw, struct pipe_shader_state *sh = (struct pipe_shader_state *) shader; x86_init_func( &vs->sse2_program ); - tgsi_emit_sse2( (struct tgsi_token *) sh->tokens, &vs->sse2_program ); + if (!tgsi_emit_sse2( (struct tgsi_token *) sh->tokens, + &vs->sse2_program )) { + x86_release_func( (struct x86_function *) &vs->sse2_program ); + fprintf(stdout /*err*/, + "tgsi_emit_sse2() failed, falling back to interpreter\n"); + } } #endif diff --git a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c b/src/mesa/pipe/draw/draw_vertex_shader_llvm.c index 4228c4f388..63551c993e 100644 --- a/src/mesa/pipe/draw/draw_vertex_shader_llvm.c +++ b/src/mesa/pipe/draw/draw_vertex_shader_llvm.c @@ -152,7 +152,7 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw) z = vOut->clip[2] = dests[0][2]; w = vOut->clip[3] = dests[0][3]; #if DBG - printf("output %d: %f %f %f %f\n", 0, x, y, z, w); + debug_printf("output %d: %f %f %f %f\n", 0, x, y, z, w); #endif vOut->clipmask = compute_clipmask(vOut->clip, draw->plane, draw->nr_planes); @@ -179,7 +179,7 @@ void draw_vertex_shader_queue_flush_llvm(struct draw_context *draw) vOut->data[slot][3] = dests[slot][3]; #if DBG - printf("output %d: %f %f %f %f\n", slot, + debug_printf("output %d: %f %f %f %f\n", slot, vOut->data[slot][0], vOut->data[slot][1], vOut->data[slot][2], diff --git a/src/mesa/pipe/draw/draw_vf.c b/src/mesa/pipe/draw/draw_vf.c new file mode 100644 index 0000000000..f23d7fcec5 --- /dev/null +++ b/src/mesa/pipe/draw/draw_vf.c @@ -0,0 +1,428 @@ +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keithw@tungstengraphics.com> + */ + + +#include <stddef.h> + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" + +#include "draw_vf.h" + + +#define DRAW_VF_DBG 0 + + +/* TODO: remove this */ +extern void +_mesa_exec_free( void *addr ); + + +static boolean match_fastpath( struct draw_vertex_fetch *vf, + const struct draw_vf_fastpath *fp) +{ + unsigned j; + + if (vf->attr_count != fp->attr_count) + return FALSE; + + for (j = 0; j < vf->attr_count; j++) + if (vf->attr[j].format != fp->attr[j].format || + vf->attr[j].inputsize != fp->attr[j].size || + vf->attr[j].vertoffset != fp->attr[j].offset) + return FALSE; + + if (fp->match_strides) { + if (vf->vertex_stride != fp->vertex_stride) + return FALSE; + + for (j = 0; j < vf->attr_count; j++) + if (vf->attr[j].inputstride != fp->attr[j].stride) + return FALSE; + } + + return TRUE; +} + +static boolean search_fastpath_emit( struct draw_vertex_fetch *vf ) +{ + struct draw_vf_fastpath *fp = vf->fastpath; + + for ( ; fp ; fp = fp->next) { + if (match_fastpath(vf, fp)) { + vf->emit = fp->func; + return TRUE; + } + } + + return FALSE; +} + +void draw_vf_register_fastpath( struct draw_vertex_fetch *vf, + boolean match_strides ) +{ + struct draw_vf_fastpath *fastpath = CALLOC_STRUCT(draw_vf_fastpath); + unsigned i; + + fastpath->vertex_stride = vf->vertex_stride; + fastpath->attr_count = vf->attr_count; + fastpath->match_strides = match_strides; + fastpath->func = vf->emit; + fastpath->attr = (struct draw_vf_attr_type *) + MALLOC(vf->attr_count * sizeof(fastpath->attr[0])); + + for (i = 0; i < vf->attr_count; i++) { + fastpath->attr[i].format = vf->attr[i].format; + fastpath->attr[i].stride = vf->attr[i].inputstride; + fastpath->attr[i].size = vf->attr[i].inputsize; + fastpath->attr[i].offset = vf->attr[i].vertoffset; + } + + fastpath->next = vf->fastpath; + vf->fastpath = fastpath; +} + + + + +/*********************************************************************** + * Build codegen functions or return generic ones: + */ +static void choose_emit_func( struct draw_vertex_fetch *vf, + unsigned count, + uint8_t *dest) +{ + vf->emit = NULL; + + /* Does this match an existing (hardwired, codegen or known-bad) + * fastpath? + */ + if (search_fastpath_emit(vf)) { + /* Use this result. If it is null, then it is already known + * that the current state will fail for codegen and there is no + * point trying again. + */ + } + else if (vf->codegen_emit) { + vf->codegen_emit( vf ); + } + + if (!vf->emit) { + draw_vf_generate_hardwired_emit(vf); + } + + /* Otherwise use the generic version: + */ + if (!vf->emit) + vf->emit = draw_vf_generic_emit; + + vf->emit( vf, count, dest ); +} + + + + + +/*********************************************************************** + * Public entrypoints, mostly dispatch to the above: + */ + + + +static unsigned +draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf, + const struct draw_vf_attr_map *map, + unsigned nr, + unsigned vertex_stride ) +{ + unsigned offset = 0; + unsigned i, j; + + assert(nr < PIPE_ATTRIB_MAX); + + for (j = 0, i = 0; i < nr; i++) { + const unsigned format = map[i].format; + if (format == DRAW_EMIT_PAD) { +#if (DRAW_VF_DBG) + debug_printf("%d: pad %d, offset %d\n", i, + map[i].offset, offset); +#endif + + offset += map[i].offset; + + } + else { + vf->attr[j].attrib = map[i].attrib; + vf->attr[j].format = format; + vf->attr[j].insert = draw_vf_format_info[format].insert; + vf->attr[j].vertattrsize = draw_vf_format_info[format].attrsize; + vf->attr[j].vertoffset = offset; + vf->attr[j].isconst = draw_vf_format_info[format].isconst; + if(vf->attr[j].isconst) + memcpy(vf->attr[j].data, &map[i].data, vf->attr[j].vertattrsize); + +#if (DRAW_VF_DBG) + debug_printf("%d: %s, offset %d\n", i, + draw_vf_format_info[format].name, + vf->attr[j].vertoffset); +#endif + + offset += draw_vf_format_info[format].attrsize; + j++; + } + } + + vf->attr_count = j; + vf->vertex_stride = vertex_stride ? vertex_stride : offset; + vf->emit = choose_emit_func; + + assert(vf->vertex_stride >= offset); + return vf->vertex_stride; +} + + +void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, + const struct vertex_info *vinfo, + float point_size ) +{ + unsigned i, j, k; + struct draw_vf_attr *a = vf->attr; + struct draw_vf_attr_map attrs[PIPE_MAX_SHADER_INPUTS]; + unsigned count = 0; /* for debug/sanity */ + unsigned nr_attrs = 0; + + for (i = 0; i < vinfo->num_attribs; i++) { + j = vinfo->src_index[i]; + switch (vinfo->emit[i]) { + case EMIT_OMIT: + /* no-op */ + break; + case EMIT_ALL: { + /* just copy the whole vertex as-is to the vbuf */ + unsigned s = vinfo->size; + assert(i == 0); + assert(j == 0); + /* copy the vertex header */ + /* XXX: we actually don't copy the header, just pad it */ + attrs[nr_attrs].attrib = 0; + attrs[nr_attrs].format = DRAW_EMIT_PAD; + attrs[nr_attrs].offset = offsetof(struct vertex_header, data); + s -= offsetof(struct vertex_header, data)/4; + count += offsetof(struct vertex_header, data)/4; + nr_attrs++; + /* copy the vertex data */ + for(k = 0; k < (s & ~0x3); k += 4) { + attrs[nr_attrs].attrib = k/4; + attrs[nr_attrs].format = DRAW_EMIT_4F; + attrs[nr_attrs].offset = 0; + nr_attrs++; + count += 4; + } + /* tail */ + /* XXX: actually, this shouldn't be needed */ + attrs[nr_attrs].attrib = k/4; + attrs[nr_attrs].offset = 0; + switch(s & 0x3) { + case 0: + break; + case 1: + attrs[nr_attrs].format = DRAW_EMIT_1F; + nr_attrs++; + count += 1; + break; + case 2: + attrs[nr_attrs].format = DRAW_EMIT_2F; + nr_attrs++; + count += 2; + break; + case 3: + attrs[nr_attrs].format = DRAW_EMIT_3F; + nr_attrs++; + count += 3; + break; + } + break; + } + case EMIT_1F: + attrs[nr_attrs].attrib = j; + attrs[nr_attrs].format = DRAW_EMIT_1F; + attrs[nr_attrs].offset = 0; + nr_attrs++; + count++; + break; + case EMIT_1F_PSIZE: + attrs[nr_attrs].attrib = j; + attrs[nr_attrs].format = DRAW_EMIT_1F_CONST; + attrs[nr_attrs].offset = 0; + attrs[nr_attrs].data.f[0] = point_size; + nr_attrs++; + count++; + break; + case EMIT_2F: + attrs[nr_attrs].attrib = j; + attrs[nr_attrs].format = DRAW_EMIT_2F; + attrs[nr_attrs].offset = 0; + nr_attrs++; + count += 2; + break; + case EMIT_3F: + attrs[nr_attrs].attrib = j; + attrs[nr_attrs].format = DRAW_EMIT_3F; + attrs[nr_attrs].offset = 0; + nr_attrs++; + count += 3; + break; + case EMIT_4F: + attrs[nr_attrs].attrib = j; + attrs[nr_attrs].format = DRAW_EMIT_4F; + attrs[nr_attrs].offset = 0; + nr_attrs++; + count += 4; + break; + case EMIT_4UB: + attrs[nr_attrs].attrib = j; + attrs[nr_attrs].format = DRAW_EMIT_4UB_4F_BGRA; + attrs[nr_attrs].offset = 0; + nr_attrs++; + count += 1; + break; + default: + assert(0); + } + } + + assert(count == vinfo->size); + + draw_vf_set_vertex_attributes(vf, + attrs, + nr_attrs, + vinfo->size * sizeof(float) ); + + for (j = 0; j < vf->attr_count; j++) { + a[j].inputsize = 4; + a[j].do_insert = a[j].insert[4 - 1]; + if(a[j].isconst) { + a[j].inputptr = a[j].data; + a[j].inputstride = 0; + } + } +} + + +#if 0 +/* Set attribute pointers, adjusted for start position: + */ +void draw_vf_set_sources( struct draw_vertex_fetch *vf, + GLvector4f * const sources[], + unsigned start ) +{ + struct draw_vf_attr *a = vf->attr; + unsigned j; + + for (j = 0; j < vf->attr_count; j++) { + const GLvector4f *vptr = sources[a[j].attrib]; + + if ((a[j].inputstride != vptr->stride) || + (a[j].inputsize != vptr->size)) + vf->emit = choose_emit_func; + + a[j].inputstride = vptr->stride; + a[j].inputsize = vptr->size; + a[j].do_insert = a[j].insert[vptr->size - 1]; + a[j].inputptr = ((uint8_t *)vptr->data) + start * vptr->stride; + } +} +#endif + + +/** + * Emit a vertex to dest. + */ +void draw_vf_emit_vertex( struct draw_vertex_fetch *vf, + struct vertex_header *vertex, + void *dest ) +{ + struct draw_vf_attr *a = vf->attr; + unsigned j; + + for (j = 0; j < vf->attr_count; j++) { + if(!a[j].isconst) { + a[j].inputptr = (uint8_t *)&vertex->data[a[j].attrib][0]; + a[j].inputstride = 0; /* XXX: one-vertex-max ATM */ + } + } + + vf->emit( vf, 1, (uint8_t*) dest ); +} + + + +struct draw_vertex_fetch *draw_vf_create( void ) +{ + struct draw_vertex_fetch *vf = CALLOC_STRUCT(draw_vertex_fetch); + unsigned i; + + for (i = 0; i < PIPE_ATTRIB_MAX; i++) + vf->attr[i].vf = vf; + + vf->identity[0] = 0.0; + vf->identity[1] = 0.0; + vf->identity[2] = 0.0; + vf->identity[3] = 1.0; + + vf->codegen_emit = NULL; + +#ifdef USE_SSE_ASM + if (!GETENV("GALLIUM_NO_CODEGEN")) + vf->codegen_emit = draw_vf_generate_sse_emit; +#endif + + return vf; +} + + +void draw_vf_destroy( struct draw_vertex_fetch *vf ) +{ + struct draw_vf_fastpath *fp, *tmp; + + for (fp = vf->fastpath ; fp ; fp = tmp) { + tmp = fp->next; + FREE(fp->attr); + + /* KW: At the moment, fp->func is constrained to be allocated by + * _mesa_exec_alloc(), as the hardwired fastpaths in + * t_vertex_generic.c are handled specially. It would be nice + * to unify them, but this probably won't change until this + * module gets another overhaul. + */ + //_mesa_exec_free((void *) fp->func); + FREE(fp); + } + + vf->fastpath = NULL; + FREE(vf); +} diff --git a/src/mesa/pipe/draw/draw_vf.h b/src/mesa/pipe/draw/draw_vf.h new file mode 100644 index 0000000000..e694b98675 --- /dev/null +++ b/src/mesa/pipe/draw/draw_vf.h @@ -0,0 +1,223 @@ +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keithw@tungstengraphics.com> + */ + +#ifndef DRAW_VF_H +#define DRAW_VF_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + +#include "draw_vertex.h" +#include "draw_private.h" // for vertex_header + + +enum draw_vf_attr_format { + DRAW_EMIT_1F, + DRAW_EMIT_2F, + DRAW_EMIT_3F, + DRAW_EMIT_4F, + DRAW_EMIT_3F_XYW, /**< for projective texture */ + DRAW_EMIT_1UB_1F, /**< for fog coordinate */ + DRAW_EMIT_3UB_3F_RGB, /**< for specular color */ + DRAW_EMIT_3UB_3F_BGR, /**< for specular color */ + DRAW_EMIT_4UB_4F_RGBA, /**< for color */ + DRAW_EMIT_4UB_4F_BGRA, /**< for color */ + DRAW_EMIT_4UB_4F_ARGB, /**< for color */ + DRAW_EMIT_4UB_4F_ABGR, /**< for color */ + DRAW_EMIT_1F_CONST, + DRAW_EMIT_2F_CONST, + DRAW_EMIT_3F_CONST, + DRAW_EMIT_4F_CONST, + DRAW_EMIT_PAD, /**< leave a hole of 'offset' bytes */ + DRAW_EMIT_MAX +}; + +struct draw_vf_attr_map +{ + /** Input attribute number */ + unsigned attrib; + + enum draw_vf_attr_format format; + + unsigned offset; + + /** + * Constant data for DRAW_EMIT_*_CONST + */ + union { + uint8_t ub[4]; + float f[4]; + } data; +}; + +struct draw_vertex_fetch; + + + +#if 0 +unsigned +draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf, + const struct draw_vf_attr_map *map, + unsigned nr, + unsigned vertex_stride ); +#endif + +void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, + const struct vertex_info *vinfo, + float point_size ); + +#if 0 +void +draw_vf_set_sources( struct draw_vertex_fetch *vf, + GLvector4f * const attrib[], + unsigned start ); +#endif + +void +draw_vf_emit_vertex( struct draw_vertex_fetch *vf, + struct vertex_header *vertex, + void *dest ); + +struct draw_vertex_fetch * +draw_vf_create( void ); + +void +draw_vf_destroy( struct draw_vertex_fetch *vf ); + + + +/*********************************************************************** + * Internal functions and structs: + */ + +struct draw_vf_attr; + +typedef void (*draw_vf_extract_func)( const struct draw_vf_attr *a, + float *out, + const uint8_t *v ); + +typedef void (*draw_vf_insert_func)( const struct draw_vf_attr *a, + uint8_t *v, + const float *in ); + +typedef void (*draw_vf_emit_func)( struct draw_vertex_fetch *vf, + unsigned count, + uint8_t *dest ); + + + +/** + * Describes how to convert/move a vertex attribute from a vertex + * array to a vertex structure. + */ +struct draw_vf_attr +{ + struct draw_vertex_fetch *vf; + + unsigned format; + unsigned inputsize; + unsigned inputstride; + unsigned vertoffset; /**< position of the attrib in the vertex struct */ + + boolean isconst; /**< read from const data below */ + uint8_t data[16]; + + unsigned attrib; /**< which vertex attrib (0=position, etc) */ + unsigned vertattrsize; /**< size of the attribute in bytes */ + + uint8_t *inputptr; + const draw_vf_insert_func *insert; + draw_vf_insert_func do_insert; + draw_vf_extract_func extract; +}; + +struct draw_vertex_fetch +{ + struct draw_vf_attr attr[PIPE_ATTRIB_MAX]; + unsigned attr_count; + unsigned vertex_stride; + + draw_vf_emit_func emit; + + /* Parameters and constants for codegen: + */ + float identity[4]; + + struct draw_vf_fastpath *fastpath; + + void (*codegen_emit)( struct draw_vertex_fetch *vf ); +}; + + +struct draw_vf_attr_type { + unsigned format; + unsigned size; + unsigned stride; + unsigned offset; +}; + +struct draw_vf_fastpath { + unsigned vertex_stride; + unsigned attr_count; + boolean match_strides; + + struct draw_vf_attr_type *attr; + + draw_vf_emit_func func; + struct draw_vf_fastpath *next; +}; + + +void +draw_vf_register_fastpath( struct draw_vertex_fetch *vtx, + boolean match_strides ); + +void +draw_vf_generic_emit( struct draw_vertex_fetch *vf, + unsigned count, + uint8_t *v ); + +void +draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf ); + +void +draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ); + + +struct draw_vf_format_info { + const char *name; + draw_vf_insert_func insert[4]; + const unsigned attrsize; + const boolean isconst; +}; + +extern const struct draw_vf_format_info +draw_vf_format_info[DRAW_EMIT_MAX]; + + +#endif diff --git a/src/mesa/pipe/draw/draw_vf_generic.c b/src/mesa/pipe/draw/draw_vf_generic.c new file mode 100644 index 0000000000..7a60a9db9c --- /dev/null +++ b/src/mesa/pipe/draw/draw_vf_generic.c @@ -0,0 +1,585 @@ + +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keithw@tungstengraphics.com> + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" + +#include "draw_vf.h" + + + +static INLINE void insert_4f_4( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +static INLINE void insert_4f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = 1; +} + +static INLINE void insert_4f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = 0; + out[3] = 1; +} + +static INLINE void insert_4f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = 0; + out[2] = 0; + out[3] = 1; +} + +static INLINE void insert_3f_xyw_4( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[3]; +} + +static INLINE void insert_3f_xyw_err( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + (void) a; (void) v; (void) in; + assert(0); +} + +static INLINE void insert_3f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; +} + +static INLINE void insert_3f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = 0; +} + +static INLINE void insert_3f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = 0; + out[2] = 0; +} + + +static INLINE void insert_2f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; +} + +static INLINE void insert_2f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = 0; +} + +static INLINE void insert_1f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; +} + +static INLINE void insert_null( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + (void) a; (void) v; (void) in; +} + +static INLINE void insert_4ub_4f_rgba_4( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]); +} + +static INLINE void insert_4ub_4f_rgba_3( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_rgba_2( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + v[2] = 0; + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_rgba_1( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + v[1] = 0; + v[2] = 0; + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_bgra_4( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]); +} + +static INLINE void insert_4ub_4f_bgra_3( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_bgra_2( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + v[0] = 0; + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_bgra_1( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + v[1] = 0; + v[0] = 0; + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_argb_4( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]); +} + +static INLINE void insert_4ub_4f_argb_3( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]); + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_argb_2( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + v[3] = 0x00; + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_argb_1( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); + v[2] = 0x00; + v[3] = 0x00; + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_abgr_4( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]); +} + +static INLINE void insert_4ub_4f_abgr_3( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]); + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_abgr_2( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + v[1] = 0x00; + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_abgr_1( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); + v[2] = 0x00; + v[1] = 0x00; + v[0] = 0xff; +} + +static INLINE void insert_3ub_3f_rgb_3( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); +} + +static INLINE void insert_3ub_3f_rgb_2( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + v[2] = 0; +} + +static INLINE void insert_3ub_3f_rgb_1( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + v[1] = 0; + v[2] = 0; +} + +static INLINE void insert_3ub_3f_bgr_3( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); +} + +static INLINE void insert_3ub_3f_bgr_2( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + v[0] = 0; +} + +static INLINE void insert_3ub_3f_bgr_1( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + v[1] = 0; + v[0] = 0; +} + + +static INLINE void insert_1ub_1f_1( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); +} + + +const struct draw_vf_format_info draw_vf_format_info[DRAW_EMIT_MAX] = +{ + { "1f", + { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 }, + sizeof(float), FALSE }, + + { "2f", + { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 }, + 2 * sizeof(float), FALSE }, + + { "3f", + { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 }, + 3 * sizeof(float), FALSE }, + + { "4f", + { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 }, + 4 * sizeof(float), FALSE }, + + { "3f_xyw", + { insert_3f_xyw_err, insert_3f_xyw_err, insert_3f_xyw_err, + insert_3f_xyw_4 }, + 3 * sizeof(float), FALSE }, + + { "1ub_1f", + { insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1 }, + sizeof(uint8_t), FALSE }, + + { "3ub_3f_rgb", + { insert_3ub_3f_rgb_1, insert_3ub_3f_rgb_2, insert_3ub_3f_rgb_3, + insert_3ub_3f_rgb_3 }, + 3 * sizeof(uint8_t), FALSE }, + + { "3ub_3f_bgr", + { insert_3ub_3f_bgr_1, insert_3ub_3f_bgr_2, insert_3ub_3f_bgr_3, + insert_3ub_3f_bgr_3 }, + 3 * sizeof(uint8_t), FALSE }, + + { "4ub_4f_rgba", + { insert_4ub_4f_rgba_1, insert_4ub_4f_rgba_2, insert_4ub_4f_rgba_3, + insert_4ub_4f_rgba_4 }, + 4 * sizeof(uint8_t), FALSE }, + + { "4ub_4f_bgra", + { insert_4ub_4f_bgra_1, insert_4ub_4f_bgra_2, insert_4ub_4f_bgra_3, + insert_4ub_4f_bgra_4 }, + 4 * sizeof(uint8_t), FALSE }, + + { "4ub_4f_argb", + { insert_4ub_4f_argb_1, insert_4ub_4f_argb_2, insert_4ub_4f_argb_3, + insert_4ub_4f_argb_4 }, + 4 * sizeof(uint8_t), FALSE }, + + { "4ub_4f_abgr", + { insert_4ub_4f_abgr_1, insert_4ub_4f_abgr_2, insert_4ub_4f_abgr_3, + insert_4ub_4f_abgr_4 }, + 4 * sizeof(uint8_t), FALSE }, + + { "1f_const", + { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 }, + sizeof(float), TRUE }, + + { "2f_const", + { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 }, + 2 * sizeof(float), TRUE }, + + { "3f_const", + { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 }, + 3 * sizeof(float), TRUE }, + + { "4f_const", + { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 }, + 4 * sizeof(float), TRUE }, + + { "pad", + { NULL, NULL, NULL, NULL }, + 0, FALSE }, + +}; + + + + +/*********************************************************************** + * Hardwired fastpaths for emitting whole vertices or groups of + * vertices + */ +#define EMIT5(NR, F0, F1, F2, F3, F4, NAME) \ +static void NAME( struct draw_vertex_fetch *vf, \ + unsigned count, \ + uint8_t *v ) \ +{ \ + struct draw_vf_attr *a = vf->attr; \ + unsigned i; \ + \ + for (i = 0 ; i < count ; i++, v += vf->vertex_stride) { \ + if (NR > 0) { \ + F0( &a[0], v + a[0].vertoffset, (float *)a[0].inputptr ); \ + a[0].inputptr += a[0].inputstride; \ + } \ + \ + if (NR > 1) { \ + F1( &a[1], v + a[1].vertoffset, (float *)a[1].inputptr ); \ + a[1].inputptr += a[1].inputstride; \ + } \ + \ + if (NR > 2) { \ + F2( &a[2], v + a[2].vertoffset, (float *)a[2].inputptr ); \ + a[2].inputptr += a[2].inputstride; \ + } \ + \ + if (NR > 3) { \ + F3( &a[3], v + a[3].vertoffset, (float *)a[3].inputptr ); \ + a[3].inputptr += a[3].inputstride; \ + } \ + \ + if (NR > 4) { \ + F4( &a[4], v + a[4].vertoffset, (float *)a[4].inputptr ); \ + a[4].inputptr += a[4].inputstride; \ + } \ + } \ +} + + +#define EMIT2(F0, F1, NAME) EMIT5(2, F0, F1, insert_null, \ + insert_null, insert_null, NAME) + +#define EMIT3(F0, F1, F2, NAME) EMIT5(3, F0, F1, F2, insert_null, \ + insert_null, NAME) + +#define EMIT4(F0, F1, F2, F3, NAME) EMIT5(4, F0, F1, F2, F3, \ + insert_null, NAME) + + +EMIT2(insert_3f_3, insert_4ub_4f_rgba_4, emit_xyz3_rgba4) + +EMIT3(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, emit_xyzw4_rgba4_st2) + +EMIT4(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, insert_2f_2, emit_xyzw4_rgba4_st2_st2) + + +/* Use the codegen paths to select one of a number of hardwired + * fastpaths. + */ +void draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf ) +{ + draw_vf_emit_func func = NULL; + + /* Does it fit a hardwired fastpath? Help! this is growing out of + * control! + */ + switch (vf->attr_count) { + case 2: + if (vf->attr[0].do_insert == insert_3f_3 && + vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { + func = emit_xyz3_rgba4; + } + break; + case 3: + if (vf->attr[2].do_insert == insert_2f_2) { + if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { + if (vf->attr[0].do_insert == insert_4f_4) + func = emit_xyzw4_rgba4_st2; + } + } + break; + case 4: + if (vf->attr[2].do_insert == insert_2f_2 && + vf->attr[3].do_insert == insert_2f_2) { + if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { + if (vf->attr[0].do_insert == insert_4f_4) + func = emit_xyzw4_rgba4_st2_st2; + } + } + break; + } + + vf->emit = func; +} + +/*********************************************************************** + * Generic (non-codegen) functions for whole vertices or groups of + * vertices + */ + +void draw_vf_generic_emit( struct draw_vertex_fetch *vf, + unsigned count, + uint8_t *v ) +{ + struct draw_vf_attr *a = vf->attr; + const unsigned attr_count = vf->attr_count; + const unsigned stride = vf->vertex_stride; + unsigned i, j; + + for (i = 0 ; i < count ; i++, v += stride) { + for (j = 0; j < attr_count; j++) { + float *in = (float *)a[j].inputptr; + a[j].inputptr += a[j].inputstride; + a[j].do_insert( &a[j], v + a[j].vertoffset, in ); + } + } +} + + diff --git a/src/mesa/pipe/draw/draw_vf_sse.c b/src/mesa/pipe/draw/draw_vf_sse.c new file mode 100644 index 0000000000..1ad2ae756d --- /dev/null +++ b/src/mesa/pipe/draw/draw_vf_sse.c @@ -0,0 +1,614 @@ +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keithw@tungstengraphics.com> + */ + + +#include "simple_list.h" + +#include "pipe/p_compiler.h" + +#include "draw_vf.h" + + +#if defined(USE_SSE_ASM) + +#include "x86/rtasm/x86sse.h" +#include "x86/common_x86_asm.h" + + +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 + + +struct x86_program { + struct x86_function func; + + struct draw_vertex_fetch *vf; + boolean inputs_safe; + boolean outputs_safe; + boolean have_sse2; + + struct x86_reg identity; + struct x86_reg chan0; +}; + + +static struct x86_reg get_identity( struct x86_program *p ) +{ + return p->identity; +} + +static void emit_load4f_4( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movups(&p->func, dest, arg0); +} + +static void emit_load4f_3( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + /* Have to jump through some hoops: + * + * c 0 0 0 + * c 0 0 1 + * 0 0 c 1 + * a b c 1 + */ + sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); + sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); + sse_shufps(&p->func, dest, dest, SHUF(Y,Z,X,W) ); + sse_movlps(&p->func, dest, arg0); +} + +static void emit_load4f_2( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + /* Initialize from identity, then pull in low two words: + */ + sse_movups(&p->func, dest, get_identity(p)); + sse_movlps(&p->func, dest, arg0); +} + +static void emit_load4f_1( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + /* Pull in low word, then swizzle in identity */ + sse_movss(&p->func, dest, arg0); + sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); +} + + + +static void emit_load3f_3( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + /* Over-reads by 1 dword - potential SEGV if input is a vertex + * array. + */ + if (p->inputs_safe) { + sse_movups(&p->func, dest, arg0); + } + else { + /* c 0 0 0 + * c c c c + * a b c c + */ + sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); + sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X)); + sse_movlps(&p->func, dest, arg0); + } +} + +static void emit_load3f_2( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + emit_load4f_2(p, dest, arg0); +} + +static void emit_load3f_1( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + emit_load4f_1(p, dest, arg0); +} + +static void emit_load2f_2( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movlps(&p->func, dest, arg0); +} + +static void emit_load2f_1( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + emit_load4f_1(p, dest, arg0); +} + +static void emit_load1f_1( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movss(&p->func, dest, arg0); +} + +static void (*load[4][4])( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) = { + { emit_load1f_1, + emit_load1f_1, + emit_load1f_1, + emit_load1f_1 }, + + { emit_load2f_1, + emit_load2f_2, + emit_load2f_2, + emit_load2f_2 }, + + { emit_load3f_1, + emit_load3f_2, + emit_load3f_3, + emit_load3f_3 }, + + { emit_load4f_1, + emit_load4f_2, + emit_load4f_3, + emit_load4f_4 } +}; + +static void emit_load( struct x86_program *p, + struct x86_reg dest, + unsigned sz, + struct x86_reg src, + unsigned src_sz) +{ + load[sz-1][src_sz-1](p, dest, src); +} + +static void emit_store4f( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movups(&p->func, dest, arg0); +} + +static void emit_store3f( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + if (p->outputs_safe) { + /* Emit the extra dword anyway. This may hurt writecombining, + * may cause other problems. + */ + sse_movups(&p->func, dest, arg0); + } + else { + /* Alternate strategy - emit two, shuffle, emit one. + */ + sse_movlps(&p->func, dest, arg0); + sse_shufps(&p->func, arg0, arg0, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ + sse_movss(&p->func, x86_make_disp(dest,8), arg0); + } +} + +static void emit_store2f( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movlps(&p->func, dest, arg0); +} + +static void emit_store1f( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movss(&p->func, dest, arg0); +} + + +static void (*store[4])( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) = +{ + emit_store1f, + emit_store2f, + emit_store3f, + emit_store4f +}; + +static void emit_store( struct x86_program *p, + struct x86_reg dest, + unsigned sz, + struct x86_reg temp ) + +{ + store[sz-1](p, dest, temp); +} + +static void emit_pack_store_4ub( struct x86_program *p, + struct x86_reg dest, + struct x86_reg temp ) +{ + /* Scale by 255.0 + */ + sse_mulps(&p->func, temp, p->chan0); + + if (p->have_sse2) { + sse2_cvtps2dq(&p->func, temp, temp); + sse2_packssdw(&p->func, temp, temp); + sse2_packuswb(&p->func, temp, temp); + sse_movss(&p->func, dest, temp); + } + else { + struct x86_reg mmx0 = x86_make_reg(file_MMX, 0); + struct x86_reg mmx1 = x86_make_reg(file_MMX, 1); + sse_cvtps2pi(&p->func, mmx0, temp); + sse_movhlps(&p->func, temp, temp); + sse_cvtps2pi(&p->func, mmx1, temp); + mmx_packssdw(&p->func, mmx0, mmx1); + mmx_packuswb(&p->func, mmx0, mmx0); + mmx_movd(&p->func, dest, mmx0); + } +} + +static int get_offset( const void *a, const void *b ) +{ + return (const char *)b - (const char *)a; +} + +/* Not much happens here. Eventually use this function to try and + * avoid saving/reloading the source pointers each vertex (if some of + * them can fit in registers). + */ +static void get_src_ptr( struct x86_program *p, + struct x86_reg srcREG, + struct x86_reg vfREG, + struct draw_vf_attr *a ) +{ + struct draw_vertex_fetch *vf = p->vf; + struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr)); + + /* Load current a[j].inputptr + */ + x86_mov(&p->func, srcREG, ptr_to_src); +} + +static void update_src_ptr( struct x86_program *p, + struct x86_reg srcREG, + struct x86_reg vfREG, + struct draw_vf_attr *a ) +{ + if (a->inputstride) { + struct draw_vertex_fetch *vf = p->vf; + struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr)); + + /* add a[j].inputstride (hardcoded value - could just as easily + * pull the stride value from memory each time). + */ + x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride)); + + /* save new value of a[j].inputptr + */ + x86_mov(&p->func, ptr_to_src, srcREG); + } +} + + +/* Lots of hardcoding + * + * EAX -- pointer to current output vertex + * ECX -- pointer to current attribute + * + */ +static boolean build_vertex_emit( struct x86_program *p ) +{ + struct draw_vertex_fetch *vf = p->vf; + unsigned j = 0; + + struct x86_reg vertexEAX = x86_make_reg(file_REG32, reg_AX); + struct x86_reg srcECX = x86_make_reg(file_REG32, reg_CX); + struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP); + struct x86_reg vfESI = x86_make_reg(file_REG32, reg_SI); + struct x86_reg temp = x86_make_reg(file_XMM, 0); + uint8_t *fixup, *label; + + /* Push a few regs? + */ + x86_push(&p->func, countEBP); + x86_push(&p->func, vfESI); + + + /* Get vertex count, compare to zero + */ + x86_xor(&p->func, srcECX, srcECX); + x86_mov(&p->func, countEBP, x86_fn_arg(&p->func, 2)); + x86_cmp(&p->func, countEBP, srcECX); + fixup = x86_jcc_forward(&p->func, cc_E); + + /* Initialize destination register. + */ + x86_mov(&p->func, vertexEAX, x86_fn_arg(&p->func, 3)); + + /* Move argument 1 (vf) into a reg: + */ + x86_mov(&p->func, vfESI, x86_fn_arg(&p->func, 1)); + + + /* always load, needed or not: + */ + sse_movups(&p->func, p->identity, x86_make_disp(vfESI, get_offset(vf, &vf->identity[0]))); + + /* Note address for loop jump */ + label = x86_get_label(&p->func); + + /* Emit code for each of the attributes. Currently routes + * everything through SSE registers, even when it might be more + * efficient to stick with regular old x86. No optimization or + * other tricks - enough new ground to cover here just getting + * things working. + */ + while (j < vf->attr_count) { + struct draw_vf_attr *a = &vf->attr[j]; + struct x86_reg dest = x86_make_disp(vertexEAX, a->vertoffset); + + /* Now, load an XMM reg from src, perhaps transform, then save. + * Could be shortcircuited in specific cases: + */ + switch (a->format) { + case DRAW_EMIT_1F: + case DRAW_EMIT_1F_CONST: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 1, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case DRAW_EMIT_2F: + case DRAW_EMIT_2F_CONST: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 2, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case DRAW_EMIT_3F: + case DRAW_EMIT_3F_CONST: + /* Potentially the worst case - hardcode 2+1 copying: + */ + if (0) { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 3, temp); + update_src_ptr(p, srcECX, vfESI, a); + } + else { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 2, temp); + if (a->inputsize > 2) { + emit_load(p, temp, 1, x86_make_disp(srcECX, 8), 1); + emit_store(p, x86_make_disp(dest,8), 1, temp); + } + else { + sse_movss(&p->func, x86_make_disp(dest,8), get_identity(p)); + } + update_src_ptr(p, srcECX, vfESI, a); + } + break; + case DRAW_EMIT_4F: + case DRAW_EMIT_4F_CONST: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 4, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case DRAW_EMIT_3F_XYW: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(X,Y,W,Z)); + emit_store(p, dest, 3, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + + case DRAW_EMIT_1UB_1F: + /* Test for PAD3 + 1UB: + */ + if (j > 0 && + a[-1].vertoffset + a[-1].vertattrsize <= a->vertoffset - 3) + { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(X,X,X,X)); + emit_pack_store_4ub(p, x86_make_disp(dest, -3), temp); /* overkill! */ + update_src_ptr(p, srcECX, vfESI, a); + } + else { + debug_printf("Can't emit 1ub %x %x %d\n", + a->vertoffset, a[-1].vertoffset, a[-1].vertattrsize ); + return FALSE; + } + break; + case DRAW_EMIT_3UB_3F_RGB: + case DRAW_EMIT_3UB_3F_BGR: + /* Test for 3UB + PAD1: + */ + if (j == vf->attr_count - 1 || + a[1].vertoffset >= a->vertoffset + 4) { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); + if (a->format == DRAW_EMIT_3UB_3F_BGR) + sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W)); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + } + /* Test for 3UB + 1UB: + */ + else if (j < vf->attr_count - 1 && + a[1].format == DRAW_EMIT_1UB_1F && + a[1].vertoffset == a->vertoffset + 3) { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); + update_src_ptr(p, srcECX, vfESI, a); + + /* Make room for incoming value: + */ + sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); + + get_src_ptr(p, srcECX, vfESI, &a[1]); + emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize); + update_src_ptr(p, srcECX, vfESI, &a[1]); + + /* Rearrange and possibly do BGR conversion: + */ + if (a->format == DRAW_EMIT_3UB_3F_BGR) + sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X)); + else + sse_shufps(&p->func, temp, temp, SHUF(Y,Z,W,X)); + + emit_pack_store_4ub(p, dest, temp); + j++; /* NOTE: two attrs consumed */ + } + else { + debug_printf("Can't emit 3ub\n"); + } + return FALSE; /* add this later */ + break; + + case DRAW_EMIT_4UB_4F_RGBA: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case DRAW_EMIT_4UB_4F_BGRA: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W)); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case DRAW_EMIT_4UB_4F_ARGB: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case DRAW_EMIT_4UB_4F_ABGR: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X)); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + default: + debug_printf("unknown a[%d].format %d\n", j, a->format); + return FALSE; /* catch any new opcodes */ + } + + /* Increment j by at least 1 - may have been incremented above also: + */ + j++; + } + + /* Next vertex: + */ + x86_lea(&p->func, vertexEAX, x86_make_disp(vertexEAX, vf->vertex_stride)); + + /* decr count, loop if not zero + */ + x86_dec(&p->func, countEBP); + x86_test(&p->func, countEBP, countEBP); + x86_jcc(&p->func, cc_NZ, label); + + /* Exit mmx state? + */ + if (p->func.need_emms) + mmx_emms(&p->func); + + /* Land forward jump here: + */ + x86_fixup_fwd_jump(&p->func, fixup); + + /* Pop regs and return + */ + x86_pop(&p->func, x86_get_base_reg(vfESI)); + x86_pop(&p->func, countEBP); + x86_ret(&p->func); + + vf->emit = (draw_vf_emit_func)x86_get_func(&p->func); + return TRUE; +} + + + +void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) +{ + struct x86_program p; + + if (!cpu_has_xmm) { + vf->codegen_emit = NULL; + return; + } + + memset(&p, 0, sizeof(p)); + + p.vf = vf; + p.inputs_safe = 0; /* for now */ + p.outputs_safe = 1; /* for now */ + p.have_sse2 = cpu_has_xmm2; + p.identity = x86_make_reg(file_XMM, 6); + p.chan0 = x86_make_reg(file_XMM, 7); + + x86_init_func(&p.func); + + if (build_vertex_emit(&p)) { + draw_vf_register_fastpath( vf, TRUE ); + } + else { + /* Note the failure so that we don't keep trying to codegen an + * impossible state: + */ + draw_vf_register_fastpath( vf, FALSE ); + x86_release_func(&p.func); + } +} + +#else + +void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) +{ + /* Dummy version for when USE_SSE_ASM not defined */ +} + +#endif diff --git a/src/mesa/pipe/failover/fo_context.c b/src/mesa/pipe/failover/fo_context.c index cf6c9fed50..7ce4a7df17 100644 --- a/src/mesa/pipe/failover/fo_context.c +++ b/src/mesa/pipe/failover/fo_context.c @@ -114,6 +114,8 @@ struct pipe_context *failover_create( struct pipe_context *hw, if (failover == NULL) return NULL; + failover->hw = hw; + failover->sw = sw; failover->pipe.winsys = hw->winsys; failover->pipe.destroy = failover_destroy; failover->pipe.is_format_supported = hw->is_format_supported; diff --git a/src/mesa/pipe/failover/fo_state.c b/src/mesa/pipe/failover/fo_state.c index fa700b9674..0fc5568da1 100644 --- a/src/mesa/pipe/failover/fo_state.c +++ b/src/mesa/pipe/failover/fo_state.c @@ -54,8 +54,8 @@ failover_create_blend_state( struct pipe_context *pipe, struct fo_state *state = malloc(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); - state->sw_state = failover->sw->create_blend_state(pipe, blend); - state->hw_state = failover->hw->create_blend_state(pipe, blend); + state->sw_state = failover->sw->create_blend_state(failover->sw, blend); + state->hw_state = failover->hw->create_blend_state(failover->hw, blend); return state; } @@ -68,6 +68,7 @@ failover_bind_blend_state( struct pipe_context *pipe, struct fo_state *state = (struct fo_state *)blend; failover->blend = state; failover->dirty |= FO_NEW_BLEND; + failover->sw->bind_blend_state( failover->sw, state->sw_state ); failover->hw->bind_blend_state( failover->hw, state->hw_state ); } @@ -78,8 +79,8 @@ failover_delete_blend_state( struct pipe_context *pipe, struct fo_state *state = (struct fo_state*)blend; struct failover_context *failover = failover_context(pipe); - failover->sw->delete_blend_state(pipe, state->sw_state); - failover->hw->delete_blend_state(pipe, state->hw_state); + failover->sw->delete_blend_state(failover->sw, state->sw_state); + failover->hw->delete_blend_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; free(state); @@ -93,6 +94,7 @@ failover_set_blend_color( struct pipe_context *pipe, failover->blend_color = *blend_color; failover->dirty |= FO_NEW_BLEND_COLOR; + failover->sw->set_blend_color( failover->sw, blend_color ); failover->hw->set_blend_color( failover->hw, blend_color ); } @@ -104,6 +106,7 @@ failover_set_clip_state( struct pipe_context *pipe, failover->clip = *clip; failover->dirty |= FO_NEW_CLIP; + failover->sw->set_clip_state( failover->sw, clip ); failover->hw->set_clip_state( failover->hw, clip ); } @@ -115,8 +118,8 @@ failover_create_depth_stencil_state(struct pipe_context *pipe, struct fo_state *state = malloc(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); - state->sw_state = failover->sw->create_depth_stencil_alpha_state(pipe, templ); - state->hw_state = failover->hw->create_depth_stencil_alpha_state(pipe, templ); + state->sw_state = failover->sw->create_depth_stencil_alpha_state(failover->sw, templ); + state->hw_state = failover->hw->create_depth_stencil_alpha_state(failover->hw, templ); return state; } @@ -129,6 +132,7 @@ failover_bind_depth_stencil_state(struct pipe_context *pipe, struct fo_state *state = (struct fo_state *)depth_stencil; failover->depth_stencil = state; failover->dirty |= FO_NEW_DEPTH_STENCIL; + failover->sw->bind_depth_stencil_alpha_state(failover->sw, state->sw_state); failover->hw->bind_depth_stencil_alpha_state(failover->hw, state->hw_state); } @@ -139,8 +143,8 @@ failover_delete_depth_stencil_state(struct pipe_context *pipe, struct fo_state *state = (struct fo_state*)ds; struct failover_context *failover = failover_context(pipe); - failover->sw->delete_depth_stencil_alpha_state(pipe, state->sw_state); - failover->hw->delete_depth_stencil_alpha_state(pipe, state->hw_state); + failover->sw->delete_depth_stencil_alpha_state(failover->sw, state->sw_state); + failover->hw->delete_depth_stencil_alpha_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; free(state); @@ -154,6 +158,7 @@ failover_set_framebuffer_state(struct pipe_context *pipe, failover->framebuffer = *framebuffer; failover->dirty |= FO_NEW_FRAMEBUFFER; + failover->sw->set_framebuffer_state( failover->sw, framebuffer ); failover->hw->set_framebuffer_state( failover->hw, framebuffer ); } @@ -165,8 +170,8 @@ failover_create_fs_state(struct pipe_context *pipe, struct fo_state *state = malloc(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); - state->sw_state = failover->sw->create_fs_state(pipe, templ); - state->hw_state = failover->hw->create_fs_state(pipe, templ); + state->sw_state = failover->sw->create_fs_state(failover->sw, templ); + state->hw_state = failover->hw->create_fs_state(failover->hw, templ); return state; } @@ -178,6 +183,7 @@ failover_bind_fs_state(struct pipe_context *pipe, void *fs) struct fo_state *state = (struct fo_state*)fs; failover->fragment_shader = state; failover->dirty |= FO_NEW_FRAGMENT_SHADER; + failover->sw->bind_fs_state(failover->sw, state->sw_state); failover->hw->bind_fs_state(failover->hw, state->hw_state); } @@ -188,8 +194,8 @@ failover_delete_fs_state(struct pipe_context *pipe, struct fo_state *state = (struct fo_state*)fs; struct failover_context *failover = failover_context(pipe); - failover->sw->delete_fs_state(pipe, state->sw_state); - failover->hw->delete_fs_state(pipe, state->hw_state); + failover->sw->delete_fs_state(failover->sw, state->sw_state); + failover->hw->delete_fs_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; free(state); @@ -202,8 +208,8 @@ failover_create_vs_state(struct pipe_context *pipe, struct fo_state *state = malloc(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); - state->sw_state = failover->sw->create_vs_state(pipe, templ); - state->hw_state = failover->hw->create_vs_state(pipe, templ); + state->sw_state = failover->sw->create_vs_state(failover->sw, templ); + state->hw_state = failover->hw->create_vs_state(failover->hw, templ); return state; } @@ -217,6 +223,7 @@ failover_bind_vs_state(struct pipe_context *pipe, struct fo_state *state = (struct fo_state*)vs; failover->vertex_shader = state; failover->dirty |= FO_NEW_VERTEX_SHADER; + failover->sw->bind_vs_state(failover->sw, state->sw_state); failover->hw->bind_vs_state(failover->hw, state->hw_state); } @@ -227,8 +234,8 @@ failover_delete_vs_state(struct pipe_context *pipe, struct fo_state *state = (struct fo_state*)vs; struct failover_context *failover = failover_context(pipe); - failover->sw->delete_vs_state(pipe, state->sw_state); - failover->hw->delete_vs_state(pipe, state->hw_state); + failover->sw->delete_vs_state(failover->sw, state->sw_state); + failover->hw->delete_vs_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; free(state); @@ -242,6 +249,7 @@ failover_set_polygon_stipple( struct pipe_context *pipe, failover->poly_stipple = *stipple; failover->dirty |= FO_NEW_STIPPLE; + failover->sw->set_polygon_stipple( failover->sw, stipple ); failover->hw->set_polygon_stipple( failover->hw, stipple ); } @@ -253,8 +261,8 @@ failover_create_rasterizer_state(struct pipe_context *pipe, struct fo_state *state = malloc(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); - state->sw_state = failover->sw->create_rasterizer_state(pipe, templ); - state->hw_state = failover->hw->create_rasterizer_state(pipe, templ); + state->sw_state = failover->sw->create_rasterizer_state(failover->sw, templ); + state->hw_state = failover->hw->create_rasterizer_state(failover->hw, templ); return state; } @@ -268,6 +276,7 @@ failover_bind_rasterizer_state(struct pipe_context *pipe, struct fo_state *state = (struct fo_state*)raster; failover->rasterizer = state; failover->dirty |= FO_NEW_RASTERIZER; + failover->sw->bind_rasterizer_state(failover->sw, state->sw_state); failover->hw->bind_rasterizer_state(failover->hw, state->hw_state); } @@ -278,8 +287,8 @@ failover_delete_rasterizer_state(struct pipe_context *pipe, struct fo_state *state = (struct fo_state*)raster; struct failover_context *failover = failover_context(pipe); - failover->sw->delete_rasterizer_state(pipe, state->sw_state); - failover->hw->delete_rasterizer_state(pipe, state->hw_state); + failover->sw->delete_rasterizer_state(failover->sw, state->sw_state); + failover->hw->delete_rasterizer_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; free(state); @@ -294,6 +303,7 @@ failover_set_scissor_state( struct pipe_context *pipe, failover->scissor = *scissor; failover->dirty |= FO_NEW_SCISSOR; + failover->sw->set_scissor_state( failover->sw, scissor ); failover->hw->set_scissor_state( failover->hw, scissor ); } @@ -305,8 +315,8 @@ failover_create_sampler_state(struct pipe_context *pipe, struct fo_state *state = malloc(sizeof(struct fo_state)); struct failover_context *failover = failover_context(pipe); - state->sw_state = failover->sw->create_sampler_state(pipe, templ); - state->hw_state = failover->hw->create_sampler_state(pipe, templ); + state->sw_state = failover->sw->create_sampler_state(failover->sw, templ); + state->hw_state = failover->hw->create_sampler_state(failover->hw, templ); return state; } @@ -320,6 +330,8 @@ failover_bind_sampler_state(struct pipe_context *pipe, failover->sampler[unit] = state; failover->dirty |= FO_NEW_SAMPLER; failover->dirty_sampler |= (1<<unit); + failover->sw->bind_sampler_state(failover->sw, unit, + state->sw_state); failover->hw->bind_sampler_state(failover->hw, unit, state->hw_state); } @@ -330,8 +342,8 @@ failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) struct fo_state *state = (struct fo_state*)sampler; struct failover_context *failover = failover_context(pipe); - failover->sw->delete_sampler_state(pipe, state->sw_state); - failover->hw->delete_sampler_state(pipe, state->hw_state); + failover->sw->delete_sampler_state(failover->sw, state->sw_state); + failover->hw->delete_sampler_state(failover->hw, state->hw_state); state->sw_state = 0; state->hw_state = 0; free(state); @@ -348,6 +360,7 @@ failover_set_sampler_texture(struct pipe_context *pipe, failover->texture[unit] = texture; failover->dirty |= FO_NEW_TEXTURE; failover->dirty_texture |= (1<<unit); + failover->sw->set_sampler_texture( failover->sw, unit, texture ); failover->hw->set_sampler_texture( failover->hw, unit, texture ); } @@ -360,6 +373,7 @@ failover_set_viewport_state( struct pipe_context *pipe, failover->viewport = *viewport; failover->dirty |= FO_NEW_VIEWPORT; + failover->sw->set_viewport_state( failover->sw, viewport ); failover->hw->set_viewport_state( failover->hw, viewport ); } @@ -374,6 +388,7 @@ failover_set_vertex_buffer(struct pipe_context *pipe, failover->vertex_buffer[unit] = *vertex_buffer; failover->dirty |= FO_NEW_VERTEX_BUFFER; failover->dirty_vertex_buffer |= (1<<unit); + failover->sw->set_vertex_buffer( failover->sw, unit, vertex_buffer ); failover->hw->set_vertex_buffer( failover->hw, unit, vertex_buffer ); } @@ -388,9 +403,24 @@ failover_set_vertex_element(struct pipe_context *pipe, failover->vertex_element[unit] = *vertex_element; failover->dirty |= FO_NEW_VERTEX_ELEMENT; failover->dirty_vertex_element |= (1<<unit); + failover->sw->set_vertex_element( failover->sw, unit, vertex_element ); failover->hw->set_vertex_element( failover->hw, unit, vertex_element ); } +void +failover_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct failover_context *failover = failover_context(pipe); + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + failover->sw->set_constant_buffer(failover->sw, shader, index, buf); + failover->hw->set_constant_buffer(failover->hw, shader, index, buf); +} + void failover_init_state_functions( struct failover_context *failover ) @@ -423,4 +453,5 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_viewport_state = failover_set_viewport_state; failover->pipe.set_vertex_buffer = failover_set_vertex_buffer; failover->pipe.set_vertex_element = failover_set_vertex_element; + failover->pipe.set_constant_buffer = failover_set_constant_buffer; } diff --git a/src/mesa/pipe/i915simple/SConscript b/src/mesa/pipe/i915simple/SConscript new file mode 100644 index 0000000000..f5fb96b995 --- /dev/null +++ b/src/mesa/pipe/i915simple/SConscript @@ -0,0 +1,29 @@ +Import('*') + +env = env.Clone() + +i915simple = env.ConvenienceLibrary( + target = 'i915simple', + source = [ + 'i915_blit.c', + 'i915_clear.c', + 'i915_context.c', + 'i915_debug.c', + 'i915_debug_fp.c', + 'i915_flush.c', + 'i915_fpc_emit.c', + 'i915_fpc_translate.c', + 'i915_prim_emit.c', + 'i915_prim_vbuf.c', + 'i915_state.c', + 'i915_state_derived.c', + 'i915_state_dynamic.c', + 'i915_state_emit.c', + 'i915_state_immediate.c', + 'i915_state_sampler.c', + 'i915_strings.c', + 'i915_surface.c', + 'i915_texture.c', + ]) + +Export('i915simple') diff --git a/src/mesa/pipe/i915simple/i915_fpc_translate.c b/src/mesa/pipe/i915simple/i915_fpc_translate.c index 0185512aeb..868f0c7e04 100644 --- a/src/mesa/pipe/i915simple/i915_fpc_translate.c +++ b/src/mesa/pipe/i915simple/i915_fpc_translate.c @@ -100,7 +100,7 @@ negate(int reg, int x, int y, int z, int w) static void i915_use_passthrough_shader(struct i915_context *i915) { - fprintf(stderr, "**** Using i915 pass-through fragment shader\n"); + debug_printf("**** Using i915 pass-through fragment shader\n"); i915->current.program = (uint *) MALLOC(sizeof(passthrough)); if (i915->current.program) { @@ -119,12 +119,12 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...) va_list args; char buffer[1024]; - fprintf(stderr, "i915_program_error: "); + debug_printf("i915_program_error: "); va_start( args, msg ); vsprintf( buffer, msg, args ); va_end( args ); - fprintf(stderr, buffer); - fprintf(stderr, "\n"); + debug_printf(buffer); + debug_printf("\n"); p->error = 1; } @@ -169,7 +169,7 @@ src_vector(struct i915_fp_compile *p, switch (sem_name) { case TGSI_SEMANTIC_POSITION: - fprintf(stderr, "SKIP SEM POS\n"); + debug_printf("SKIP SEM POS\n"); /* assert(p->wpos_tex != -1); src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); @@ -913,7 +913,7 @@ i915_translate_instructions(struct i915_fp_compile *p, ind = parse.FullToken.FullDeclaration.u.DeclarationRange.First; sem = parse.FullToken.FullDeclaration.Semantic.SemanticName; semi = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; - /*printf("FS Input DECL [%u] sem %u\n", ind, sem);*/ + /*debug_printf("FS Input DECL [%u] sem %u\n", ind, sem);*/ p->input_semantic_name[ind] = sem; p->input_semantic_index[ind] = semi; } @@ -924,7 +924,7 @@ i915_translate_instructions(struct i915_fp_compile *p, ind = parse.FullToken.FullDeclaration.u.DeclarationRange.First; sem = parse.FullToken.FullDeclaration.Semantic.SemanticName; semi = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; - /*printf("FS Output DECL [%u] sem %u\n", ind, sem);*/ + /*debug_printf("FS Output DECL [%u] sem %u\n", ind, sem);*/ p->output_semantic_name[ind] = sem; p->output_semantic_index[ind] = semi; } diff --git a/src/mesa/pipe/i915simple/i915_prim_vbuf.c b/src/mesa/pipe/i915simple/i915_prim_vbuf.c index 39154b2488..e069773fd4 100644 --- a/src/mesa/pipe/i915simple/i915_prim_vbuf.c +++ b/src/mesa/pipe/i915simple/i915_prim_vbuf.c @@ -38,9 +38,8 @@ */ -#include <assert.h> - #include "pipe/draw/draw_vbuf.h" +#include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" diff --git a/src/mesa/pipe/i915simple/i915_state_derived.c b/src/mesa/pipe/i915simple/i915_state_derived.c index 62741e30f8..653983e4a9 100644 --- a/src/mesa/pipe/i915simple/i915_state_derived.c +++ b/src/mesa/pipe/i915simple/i915_state_derived.c @@ -87,7 +87,7 @@ static void calculate_vertex_layout( struct i915_context *i915 ) } break; case TGSI_SEMANTIC_FOG: - fprintf(stderr, "i915 fogcoord not implemented yet\n"); + debug_printf("i915 fogcoord not implemented yet\n"); draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src++); break; default: diff --git a/src/mesa/pipe/i915simple/i915_state_emit.c b/src/mesa/pipe/i915simple/i915_state_emit.c index 657f523893..3339287f49 100644 --- a/src/mesa/pipe/i915simple/i915_state_emit.c +++ b/src/mesa/pipe/i915simple/i915_state_emit.c @@ -107,7 +107,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) ) * 3/2; /* plus 50% margin */ #if 0 - fprintf (stderr, "i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs); + debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs); #endif if(!BEGIN_BATCH(dwords, relocs)) { diff --git a/src/mesa/pipe/i915simple/i915_state_immediate.c b/src/mesa/pipe/i915simple/i915_state_immediate.c index 752d25f233..07031fc6c5 100644 --- a/src/mesa/pipe/i915simple/i915_state_immediate.c +++ b/src/mesa/pipe/i915simple/i915_state_immediate.c @@ -97,7 +97,7 @@ static void upload_S2S4(struct i915_context *i915) LIS2 = i915->current.vertex_info.hwfmt[1]; LIS4 = i915->current.vertex_info.hwfmt[0]; /* - printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4); + debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4); */ assert(LIS4); /* should never be zero? */ } diff --git a/src/mesa/pipe/i915simple/i915_state_sampler.c b/src/mesa/pipe/i915simple/i915_state_sampler.c index 59408b6ba0..0dbbc5241d 100644 --- a/src/mesa/pipe/i915simple/i915_state_sampler.c +++ b/src/mesa/pipe/i915simple/i915_state_sampler.c @@ -169,7 +169,7 @@ translate_texture_format(enum pipe_format pipeFormat) case PIPE_FORMAT_S8Z24_UNORM: return (MAPSURF_32BIT | MT_32BIT_xL824); default: - fprintf(stderr, "i915: translate_texture_format() bad image format %x\n", + debug_printf("i915: translate_texture_format() bad image format %x\n", pipeFormat); assert(0); return 0; diff --git a/src/mesa/pipe/i915simple/i915_texture.c b/src/mesa/pipe/i915simple/i915_texture.c index 61944fe7d9..6faeab134a 100644 --- a/src/mesa/pipe/i915simple/i915_texture.c +++ b/src/mesa/pipe/i915simple/i915_texture.c @@ -477,17 +477,17 @@ i945_miptree_layout(struct pipe_context *pipe, struct i915_texture * tex) return TRUE; } -void -i915_texture_create(struct pipe_context *pipe, struct pipe_texture **pt) + +struct pipe_texture * +i915_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat) { - struct i915_texture *tex = REALLOC(*pt, sizeof(struct pipe_texture), - sizeof(struct i915_texture)); + struct i915_texture *tex = CALLOC_STRUCT(i915_texture); if (tex) { struct i915_context *i915 = i915_context(pipe); - memset(&tex->base + 1, 0, - sizeof(struct i915_texture) - sizeof(struct pipe_texture)); + tex->base = *templat; if (i915->flags.is_i945 ? i945_miptree_layout(pipe, tex) : i915_miptree_layout(pipe, tex)) @@ -498,13 +498,14 @@ i915_texture_create(struct pipe_context *pipe, struct pipe_texture **pt) if (!tex->buffer) { FREE(tex); - tex = NULL; + return NULL; } } - *pt = &tex->base; + return &tex->base; } + void i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) { diff --git a/src/mesa/pipe/i915simple/i915_texture.h b/src/mesa/pipe/i915simple/i915_texture.h index 84a0502e81..330d111dc7 100644 --- a/src/mesa/pipe/i915simple/i915_texture.h +++ b/src/mesa/pipe/i915simple/i915_texture.h @@ -6,8 +6,9 @@ struct pipe_context; struct pipe_texture; -extern void -i915_texture_create(struct pipe_context *pipe, struct pipe_texture **pt); +struct pipe_texture * +i915_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat); extern void i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); diff --git a/src/mesa/pipe/i965simple/SConscript b/src/mesa/pipe/i965simple/SConscript new file mode 100644 index 0000000000..74621de84c --- /dev/null +++ b/src/mesa/pipe/i965simple/SConscript @@ -0,0 +1,55 @@ +Import('*') + +env = env.Clone() + +i965simple = env.ConvenienceLibrary( + target = 'i965simple', + source = [ + 'brw_blit.c', + 'brw_cc.c', + 'brw_clip.c', + 'brw_clip_line.c', + 'brw_clip_point.c', + 'brw_clip_state.c', + 'brw_clip_tri.c', + 'brw_clip_util.c', + 'brw_context.c', + 'brw_curbe.c', + 'brw_draw.c', + 'brw_draw_upload.c', + 'brw_eu.c', + 'brw_eu_debug.c', + 'brw_eu_emit.c', + 'brw_eu_util.c', + 'brw_flush.c', + 'brw_gs.c', + 'brw_gs_emit.c', + 'brw_gs_state.c', + 'brw_misc_state.c', + 'brw_sf.c', + 'brw_sf_emit.c', + 'brw_sf_state.c', + 'brw_shader_info.c', + 'brw_state.c', + 'brw_state_batch.c', + 'brw_state_cache.c', + 'brw_state_pool.c', + 'brw_state_upload.c', + 'brw_strings.c', + 'brw_surface.c', + 'brw_tex_layout.c', + 'brw_urb.c', + 'brw_util.c', + 'brw_vs.c', + 'brw_vs_emit.c', + 'brw_vs_state.c', + 'brw_wm.c', + 'brw_wm_decl.c', + 'brw_wm_glsl.c', + 'brw_wm_iz.c', + 'brw_wm_sampler_state.c', + 'brw_wm_state.c', + 'brw_wm_surface_state.c', + ]) + +Export('i965simple') diff --git a/src/mesa/pipe/i965simple/brw_cc.c b/src/mesa/pipe/i965simple/brw_cc.c index dcee731895..337e4f95f6 100644 --- a/src/mesa/pipe/i965simple/brw_cc.c +++ b/src/mesa/pipe/i965simple/brw_cc.c @@ -58,7 +58,7 @@ static int brw_translate_compare_func(int func) return BRW_COMPAREFUNCTION_ALWAYS; } - fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); + debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); return BRW_COMPAREFUNCTION_ALWAYS; } diff --git a/src/mesa/pipe/i965simple/brw_curbe.c b/src/mesa/pipe/i965simple/brw_curbe.c index 2733eb4e75..52bbd525c1 100644 --- a/src/mesa/pipe/i965simple/brw_curbe.c +++ b/src/mesa/pipe/i965simple/brw_curbe.c @@ -273,10 +273,10 @@ static void upload_constant_buffer(struct brw_context *brw) if (1) { for (i = 0; i < sz; i+=4) - _mesa_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, buf[i+0], buf[i+1], buf[i+2], buf[i+3]); - _mesa_printf("last_buf %p buf %p sz %d/%d cmp %d\n", + debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", brw->curbe.last_buf, buf, bufsz, brw->curbe.last_bufsz, brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); @@ -299,7 +299,7 @@ static void upload_constant_buffer(struct brw_context *brw) bufsz, 1 << 6, &brw->curbe.gs_offset)) { - _mesa_printf("out of GS memory for curbe\n"); + debug_printf("out of GS memory for curbe\n"); assert(0); return; } diff --git a/src/mesa/pipe/i965simple/brw_eu_debug.c b/src/mesa/pipe/i965simple/brw_eu_debug.c index be692f6502..4a94ddefa6 100644 --- a/src/mesa/pipe/i965simple/brw_eu_debug.c +++ b/src/mesa/pipe/i965simple/brw_eu_debug.c @@ -30,6 +30,8 @@ */ +#include "pipe/p_debug.h" + #include "brw_eu.h" void brw_print_reg( struct brw_reg hwreg ) @@ -52,7 +54,7 @@ void brw_print_reg( struct brw_reg hwreg ) "f" }; - _mesa_printf("%s%s", + debug_printf("%s%s", hwreg.abs ? "abs/" : "", hwreg.negate ? "-" : ""); @@ -63,17 +65,17 @@ void brw_print_reg( struct brw_reg hwreg ) hwreg.width == BRW_WIDTH_8 && hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && hwreg.type == BRW_REGISTER_TYPE_F) { - _mesa_printf("vec%d", hwreg.nr); + debug_printf("vec%d", hwreg.nr); } else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && hwreg.vstride == BRW_VERTICAL_STRIDE_0 && hwreg.width == BRW_WIDTH_1 && hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && hwreg.type == BRW_REGISTER_TYPE_F) { - _mesa_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); } else { - _mesa_printf("%s%d.%d<%d;%d,%d>:%s", + debug_printf("%s%d.%d<%d;%d,%d>:%s", file[hwreg.file], hwreg.nr, hwreg.subnr / type_sz(hwreg.type), diff --git a/src/mesa/pipe/i965simple/brw_eu_emit.c b/src/mesa/pipe/i965simple/brw_eu_emit.c index 2423536dd1..400a80b6fb 100644 --- a/src/mesa/pipe/i965simple/brw_eu_emit.c +++ b/src/mesa/pipe/i965simple/brw_eu_emit.c @@ -953,7 +953,7 @@ void brw_SAMPLE(struct brw_compile *p, boolean need_stall = 0; if(writemask == 0) { -/* _mesa_printf("%s: zero writemask??\n", __FUNCTION__); */ +/* debug_printf("%s: zero writemask??\n", __FUNCTION__); */ return; } @@ -985,7 +985,7 @@ void brw_SAMPLE(struct brw_compile *p, if (newmask != writemask) { need_stall = 1; -/* _mesa_printf("need stall %x %x\n", newmask , writemask); */ +/* debug_printf("need stall %x %x\n", newmask , writemask); */ } else { struct brw_reg m1 = brw_message_reg(msg_reg_nr); diff --git a/src/mesa/pipe/i965simple/brw_sf.c b/src/mesa/pipe/i965simple/brw_sf.c index b89b2e4087..7c83b81c85 100644 --- a/src/mesa/pipe/i965simple/brw_sf.c +++ b/src/mesa/pipe/i965simple/brw_sf.c @@ -175,7 +175,7 @@ static void upload_sf_prog( struct brw_context *brw ) //int semantic = parse.FullToken.FullDeclaration.Semantic.SemanticName; //int semantic_index = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; - fprintf(stderr, "fs input %d..%d interp mode %d\n", first, last, interp_mode); + debug_printf("fs input %d..%d interp mode %d\n", first, last, interp_mode); switch (interp_mode) { case TGSI_INTERPOLATE_CONSTANT: @@ -213,9 +213,9 @@ static void upload_sf_prog( struct brw_context *brw ) key.linear_mask |= 1; key.const_mask <<= 1; - fprintf(stderr, "key.persp_mask: %x\n", key.persp_mask); - fprintf(stderr, "key.linear_mask: %x\n", key.linear_mask); - fprintf(stderr, "key.const_mask: %x\n", key.const_mask); + debug_printf("key.persp_mask: %x\n", key.persp_mask); + debug_printf("key.linear_mask: %x\n", key.linear_mask); + debug_printf("key.const_mask: %x\n", key.const_mask); // key.do_point_sprite = brw->attribs.Point->PointSprite; diff --git a/src/mesa/pipe/i965simple/brw_sf_emit.c b/src/mesa/pipe/i965simple/brw_sf_emit.c index 6ff5254ff7..78d6fa5e9e 100644 --- a/src/mesa/pipe/i965simple/brw_sf_emit.c +++ b/src/mesa/pipe/i965simple/brw_sf_emit.c @@ -137,8 +137,8 @@ static boolean calculate_masks( struct brw_sf_compile *c, unsigned persp_mask = c->key.persp_mask; unsigned linear_mask = c->key.linear_mask; - fprintf(stderr, "persp_mask: %x\n", persp_mask); - fprintf(stderr, "linear_mask: %x\n", linear_mask); + debug_printf("persp_mask: %x\n", persp_mask); + debug_printf("linear_mask: %x\n", linear_mask); *pc_persp = 0; *pc_linear = 0; @@ -162,9 +162,9 @@ static boolean calculate_masks( struct brw_sf_compile *c, *pc_linear |= 0xf0; } - fprintf(stderr, "pc: %x\n", *pc); - fprintf(stderr, "pc_persp: %x\n", *pc_persp); - fprintf(stderr, "pc_linear: %x\n", *pc_linear); + debug_printf("pc: %x\n", *pc); + debug_printf("pc_persp: %x\n", *pc_persp); + debug_printf("pc_linear: %x\n", *pc_linear); return is_last_attr; @@ -177,7 +177,7 @@ void brw_emit_tri_setup( struct brw_sf_compile *c ) struct brw_compile *p = &c->func; unsigned i; - fprintf(stderr, "%s START ==============\n", __FUNCTION__); + debug_printf("%s START ==============\n", __FUNCTION__); c->nr_verts = 3; alloc_regs(c); @@ -250,7 +250,7 @@ void brw_emit_tri_setup( struct brw_sf_compile *c ) } } - fprintf(stderr, "%s DONE ==============\n", __FUNCTION__); + debug_printf("%s DONE ==============\n", __FUNCTION__); } diff --git a/src/mesa/pipe/i965simple/brw_state.c b/src/mesa/pipe/i965simple/brw_state.c index daf14ff4ff..95dfce88e4 100644 --- a/src/mesa/pipe/i965simple/brw_state.c +++ b/src/mesa/pipe/i965simple/brw_state.c @@ -225,7 +225,7 @@ static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; brw->state.dirty.brw |= BRW_NEW_VS; - printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); + debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); } static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) diff --git a/src/mesa/pipe/i965simple/brw_state_cache.c b/src/mesa/pipe/i965simple/brw_state_cache.c index c5738733f4..b3a5124461 100644 --- a/src/mesa/pipe/i965simple/brw_state_cache.c +++ b/src/mesa/pipe/i965simple/brw_state_cache.c @@ -149,7 +149,7 @@ unsigned brw_upload_cache( struct brw_cache *cache, if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) { /* Should not be possible: */ - printf("brw_pool_alloc failed\n"); + debug_printf("brw_pool_alloc failed\n"); exit(1); } @@ -177,7 +177,7 @@ unsigned brw_upload_cache( struct brw_cache *cache, } if (BRW_DEBUG & DEBUG_STATE) - printf("upload %s: %d bytes to pool buffer %p offset %x\n", + debug_printf("upload %s: %d bytes to pool buffer %p offset %x\n", cache->name, data_size, (void*)cache->pool->buffer, @@ -416,7 +416,7 @@ void brw_clear_all_caches( struct brw_context *brw ) int i; if (BRW_DEBUG & DEBUG_STATE) - fprintf(stderr, "%s\n", __FUNCTION__); + debug_printf("%s\n", __FUNCTION__); for (i = 0; i < BRW_MAX_CACHE; i++) clear_cache(&brw->cache[i]); diff --git a/src/mesa/pipe/i965simple/brw_state_pool.c b/src/mesa/pipe/i965simple/brw_state_pool.c index 7c67f0ee25..f3174bfe0a 100644 --- a/src/mesa/pipe/i965simple/brw_state_pool.c +++ b/src/mesa/pipe/i965simple/brw_state_pool.c @@ -58,7 +58,7 @@ boolean brw_pool_alloc( struct brw_mem_pool *pool, size = align(size, 4); if (pool->offset + fixup + size >= pool->size) { - printf("%s failed\n", __FUNCTION__); + debug_printf("%s failed\n", __FUNCTION__); assert(0); exit(0); } @@ -74,7 +74,7 @@ static void brw_invalidate_pool( struct brw_mem_pool *pool ) { if (BRW_DEBUG & DEBUG_STATE) - printf("\n\n\n %s \n\n\n", __FUNCTION__); + debug_printf("\n\n\n %s \n\n\n", __FUNCTION__); pool->offset = 0; diff --git a/src/mesa/pipe/i965simple/brw_tex_layout.c b/src/mesa/pipe/i965simple/brw_tex_layout.c index b8b6b579e2..405fd1f794 100644 --- a/src/mesa/pipe/i965simple/brw_tex_layout.c +++ b/src/mesa/pipe/i965simple/brw_tex_layout.c @@ -299,15 +299,14 @@ static boolean brw_miptree_layout(struct pipe_context *pipe, struct brw_texture return TRUE; } -void -brw_texture_create(struct pipe_context *pipe, struct pipe_texture **pt) + +struct pipe_texture * +brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat) { - struct brw_texture *tex = REALLOC(*pt, sizeof(struct pipe_texture), - sizeof(struct brw_texture)); + struct brw_texture *tex = CALLOC_STRUCT(brw_texture); if (tex) { - memset(&tex->base + 1, 0, - sizeof(struct brw_texture) - sizeof(struct pipe_texture)); + tex->base = *templat; if (brw_miptree_layout(pipe, tex)) tex->buffer = pipe->winsys->buffer_create(pipe->winsys, 64, @@ -317,11 +316,11 @@ brw_texture_create(struct pipe_context *pipe, struct pipe_texture **pt) if (!tex->buffer) { FREE(tex); - tex = NULL; + return NULL; } } - *pt = &tex->base; + return &tex->base; } void diff --git a/src/mesa/pipe/i965simple/brw_tex_layout.h b/src/mesa/pipe/i965simple/brw_tex_layout.h index 15e275058a..cfd6b1ef3a 100644 --- a/src/mesa/pipe/i965simple/brw_tex_layout.h +++ b/src/mesa/pipe/i965simple/brw_tex_layout.h @@ -6,8 +6,8 @@ struct pipe_context; struct pipe_texture; -extern void -brw_texture_create(struct pipe_context *pipe, struct pipe_texture **pt); +extern struct pipe_texture * +brw_texture_create(struct pipe_context *pipe, const struct pipe_texture *templat); extern void brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); diff --git a/src/mesa/pipe/i965simple/brw_urb.c b/src/mesa/pipe/i965simple/brw_urb.c index b284526aa6..101a4367b9 100644 --- a/src/mesa/pipe/i965simple/brw_urb.c +++ b/src/mesa/pipe/i965simple/brw_urb.c @@ -120,18 +120,18 @@ static void recalculate_urb_fence( struct brw_context *brw ) * entries and the values for minimum nr of entries * provided above. */ - fprintf(stderr, "couldn't calculate URB layout!\n"); + debug_printf("couldn't calculate URB layout!\n"); exit(1); } if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) - printf("URB CONSTRAINED\n"); + debug_printf("URB CONSTRAINED\n"); } else brw->urb.constrained = 0; if (BRW_DEBUG & DEBUG_URB) - printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", + debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", brw->urb.vs_start, brw->urb.gs_start, brw->urb.clip_start, diff --git a/src/mesa/pipe/i965simple/brw_vs_emit.c b/src/mesa/pipe/i965simple/brw_vs_emit.c index b32c233dd2..98915ba101 100644 --- a/src/mesa/pipe/i965simple/brw_vs_emit.c +++ b/src/mesa/pipe/i965simple/brw_vs_emit.c @@ -1228,7 +1228,7 @@ static void process_instruction(struct brw_vs_compile *c, case TGSI_OPCODE_ENDSUB: break; default: - printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode); + debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode); break; } diff --git a/src/mesa/pipe/i965simple/brw_wm.c b/src/mesa/pipe/i965simple/brw_wm.c index 0ee0fbed51..539b170744 100644 --- a/src/mesa/pipe/i965simple/brw_wm.c +++ b/src/mesa/pipe/i965simple/brw_wm.c @@ -57,7 +57,7 @@ static void do_wm_prog( struct brw_context *brw, c->pixel_w = brw_null_reg(); - fprintf(stderr, "XXXXXXXX FP\n"); + debug_printf("XXXXXXXX FP\n"); brw_wm_glsl_emit(c); diff --git a/src/mesa/pipe/i965simple/brw_wm_glsl.c b/src/mesa/pipe/i965simple/brw_wm_glsl.c index f4b5c13c06..d95645d108 100644 --- a/src/mesa/pipe/i965simple/brw_wm_glsl.c +++ b/src/mesa/pipe/i965simple/brw_wm_glsl.c @@ -982,7 +982,7 @@ static void brw_wm_emit_instruction( struct brw_wm_compile *c, break; default: - _mesa_printf("unsupported IR in fragment shader %d\n", + debug_printf("unsupported IR in fragment shader %d\n", inst->Instruction.Opcode); } #if 0 diff --git a/src/mesa/pipe/i965simple/brw_wm_sampler_state.c b/src/mesa/pipe/i965simple/brw_wm_sampler_state.c index cfb430eb09..de42ffc5b1 100644 --- a/src/mesa/pipe/i965simple/brw_wm_sampler_state.c +++ b/src/mesa/pipe/i965simple/brw_wm_sampler_state.c @@ -71,7 +71,7 @@ static int intel_translate_shadow_compare_func(unsigned func) return COMPAREFUNC_NEVER; } - fprintf(stderr, "Unknown value in %s: %x\n", __FUNCTION__, func); + debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); return COMPAREFUNC_NEVER; } diff --git a/src/mesa/pipe/p_compiler.h b/src/mesa/pipe/p_compiler.h index e939d9cd9b..30cd729c56 100644 --- a/src/mesa/pipe/p_compiler.h +++ b/src/mesa/pipe/p_compiler.h @@ -28,10 +28,9 @@ #ifndef P_COMPILER_H #define P_COMPILER_H -#include <assert.h> + #include <stdlib.h> #include <string.h> -#include <stdio.h> #if defined(_WIN32) && !defined(__WIN32__) diff --git a/src/mesa/pipe/p_context.h b/src/mesa/pipe/p_context.h index 0dda06c53b..92a1cd70c4 100644 --- a/src/mesa/pipe/p_context.h +++ b/src/mesa/pipe/p_context.h @@ -199,8 +199,8 @@ struct pipe_context { /* * Texture functions */ - void (*texture_create)(struct pipe_context *pipe, - struct pipe_texture **pt); + struct pipe_texture * (*texture_create)(struct pipe_context *pipe, + const struct pipe_texture *templat); void (*texture_release)(struct pipe_context *pipe, struct pipe_texture **pt); diff --git a/src/mesa/pipe/p_debug.h b/src/mesa/pipe/p_debug.h new file mode 100644 index 0000000000..2a11627b36 --- /dev/null +++ b/src/mesa/pipe/p_debug.h @@ -0,0 +1,86 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Cross-platform debugging helpers. + * + * For now it just has assert and printf replacements, but it might be extended + * with stack trace reports and more advanced logging in the near future. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef P_DEBUG_H_ +#define P_DEBUG_H_ + + +#include <stdarg.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifdef DBG +#ifndef DEBUG +#define DEBUG 1 +#endif +#else +#ifndef NDEBUG +#define NDEBUG 1 +#endif +#endif + + +void debug_printf(const char *format, ...); + +void debug_vprintf(const char *format, va_list ap); + +void debug_assert_fail(const char *expr, const char *file, unsigned line); + + +/** Assert macro */ +#ifdef DEBUG +#define debug_assert(expr) ((expr) ? (void)0 : debug_assert_fail(#expr, __FILE__, __LINE__)) +#else +#define debug_assert(expr) ((void)0) +#endif + + +#ifdef assert +#undef assert +#endif +#define assert(expr) debug_assert(expr) + + +#ifdef __cplusplus +} +#endif + +#endif /* P_DEBUG_H_ */ diff --git a/src/mesa/pipe/p_defines.h b/src/mesa/pipe/p_defines.h index 85adf2d61d..0bf53ecb79 100644 --- a/src/mesa/pipe/p_defines.h +++ b/src/mesa/pipe/p_defines.h @@ -265,6 +265,6 @@ enum pipe_texture_target { #define PIPE_CAP_MAX_POINT_WIDTH_AA 17 #define PIPE_CAP_MAX_TEXTURE_ANISOTROPY 18 #define PIPE_CAP_MAX_TEXTURE_LOD_BIAS 19 - +#define PIPE_CAP_BITMAP_TEXCOORD_BIAS 20 #endif diff --git a/src/mesa/pipe/p_format.h b/src/mesa/pipe/p_format.h index 9f60cdbb04..c9ad324315 100644 --- a/src/mesa/pipe/p_format.h +++ b/src/mesa/pipe/p_format.h @@ -28,7 +28,10 @@ #ifndef PIPE_FORMAT_H #define PIPE_FORMAT_H +#include <stdio.h> // for sprintf + #include "p_compiler.h" +#include "p_debug.h" /** * The PIPE_FORMAT is a 32-bit wide bitfield that encodes all the information diff --git a/src/mesa/pipe/p_shader_tokens.h b/src/mesa/pipe/p_shader_tokens.h index e9d1d66bda..3ce35310f6 100644 --- a/src/mesa/pipe/p_shader_tokens.h +++ b/src/mesa/pipe/p_shader_tokens.h @@ -626,7 +626,7 @@ struct tgsi_src_register_ext /* * If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_SWZ, - * it should be cast to tgsi_src_register_ext_extswz. + * it should be cast to tgsi_src_register_ext_swz. * * If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_MOD, * it should be cast to tgsi_src_register_ext_mod. diff --git a/src/mesa/pipe/p_util.h b/src/mesa/pipe/p_util.h index 059528787d..469920efee 100644 --- a/src/mesa/pipe/p_util.h +++ b/src/mesa/pipe/p_util.h @@ -29,6 +29,7 @@ #define P_UTIL_H #include "p_compiler.h" +#include "p_debug.h" #include <math.h> @@ -183,6 +184,20 @@ align_free(void *ptr) +/** + * Duplicate of a block of memory + */ +static INLINE void * +mem_dup(const void *src, uint size) +{ + void *dup = malloc(size); + if (dup) + memcpy(dup, src, size); + return dup; +} + + + #define CLAMP( X, MIN, MAX ) ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) ) #define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) #define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) @@ -381,10 +396,6 @@ static INLINE int align(int value, int alignment) return (value + alignment - 1) & ~(alignment - 1); } -/* Convenient... - */ -extern void _mesa_printf(const char *str, ...); - /* util/p_util.c */ diff --git a/src/mesa/pipe/pipebuffer/pb_buffer.h b/src/mesa/pipe/pipebuffer/pb_buffer.h index 17551b3b50..97beb5f72a 100644 --- a/src/mesa/pipe/pipebuffer/pb_buffer.h +++ b/src/mesa/pipe/pipebuffer/pb_buffer.h @@ -44,10 +44,8 @@ #define PB_BUFFER_H_ -#include <assert.h> -#include <stdlib.h> - #include "pipe/p_compiler.h" +#include "pipe/p_debug.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" diff --git a/src/mesa/pipe/pipebuffer/pb_buffer_fenced.c b/src/mesa/pipe/pipebuffer/pb_buffer_fenced.c index 349647fe6e..f4fc3f6d71 100644 --- a/src/mesa/pipe/pipebuffer/pb_buffer_fenced.c +++ b/src/mesa/pipe/pipebuffer/pb_buffer_fenced.c @@ -34,12 +34,10 @@ */ -#include <assert.h> -#include <stdlib.h> - #include "linked_list.h" #include "p_compiler.h" +#include "p_debug.h" #include "p_winsys.h" #include "p_thread.h" #include "p_util.h" @@ -145,7 +143,7 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, /* Do the delayed destroy: */ pb_reference(&fenced_buf->buffer, NULL); - free(fenced_buf); + FREE(fenced_buf); } } @@ -162,7 +160,7 @@ fenced_buffer_destroy(struct pb_buffer *buf) } else { pb_reference(&fenced_buf->buffer, NULL); - free(fenced_buf); + FREE(fenced_buf); } if ((fenced_list->numDelayed % fenced_list->checkDelayed) == 0) diff --git a/src/mesa/pipe/pipebuffer/pb_buffer_fenced.h b/src/mesa/pipe/pipebuffer/pb_buffer_fenced.h index 09082a5390..c40b9c75e1 100644 --- a/src/mesa/pipe/pipebuffer/pb_buffer_fenced.h +++ b/src/mesa/pipe/pipebuffer/pb_buffer_fenced.h @@ -51,7 +51,7 @@ #define PB_BUFFER_FENCED_H_ -#include <assert.h> +#include "pipe/p_debug.h" struct pipe_winsys; diff --git a/src/mesa/pipe/pipebuffer/pb_buffer_malloc.c b/src/mesa/pipe/pipebuffer/pb_buffer_malloc.c index fc83a00f36..c1b7759874 100644 --- a/src/mesa/pipe/pipebuffer/pb_buffer_malloc.c +++ b/src/mesa/pipe/pipebuffer/pb_buffer_malloc.c @@ -34,9 +34,7 @@ */ -#include <assert.h> -#include <stdlib.h> - +#include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pb_buffer.h" @@ -107,10 +105,9 @@ pb_malloc_buffer_create(size_t size, { struct malloc_buffer *buf; - /* TODO: accept an alignment parameter */ /* TODO: do a single allocation */ - buf = (struct malloc_buffer *)MALLOC(sizeof(struct malloc_buffer)); + buf = CALLOC_STRUCT(malloc_buffer); if(!buf) return NULL; diff --git a/src/mesa/pipe/pipebuffer/pb_bufmgr_fenced.c b/src/mesa/pipe/pipebuffer/pb_bufmgr_fenced.c index 3b341c64c2..c535d3276c 100644 --- a/src/mesa/pipe/pipebuffer/pb_bufmgr_fenced.c +++ b/src/mesa/pipe/pipebuffer/pb_bufmgr_fenced.c @@ -34,9 +34,7 @@ */ -#include <assert.h> -#include <stdlib.h> - +#include "p_debug.h" #include "p_util.h" #include "pb_buffer.h" diff --git a/src/mesa/pipe/pipebuffer/pb_bufmgr_mm.c b/src/mesa/pipe/pipebuffer/pb_bufmgr_mm.c index 2694f57bca..8b1b51c0e2 100644 --- a/src/mesa/pipe/pipebuffer/pb_bufmgr_mm.c +++ b/src/mesa/pipe/pipebuffer/pb_bufmgr_mm.c @@ -34,11 +34,10 @@ */ -#include <assert.h> - #include "linked_list.h" #include "p_defines.h" +#include "p_debug.h" #include "p_thread.h" #include "p_util.h" #include "pb_buffer.h" @@ -69,28 +68,28 @@ struct mem_block static void mmDumpMemInfo(const struct mem_block *heap) { - fprintf(stderr, "Memory heap %p:\n", (void *)heap); + debug_printf("Memory heap %p:\n", (void *)heap); if (heap == 0) { - fprintf(stderr, " heap == 0\n"); + debug_printf(" heap == 0\n"); } else { const struct mem_block *p; for(p = heap->next; p != heap; p = p->next) { - fprintf(stderr, " Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, p->free ? 'F':'.', p->reserved ? 'R':'.'); } - fprintf(stderr, "\nFree list:\n"); + debug_printf("\nFree list:\n"); for(p = heap->next_free; p != heap; p = p->next_free) { - fprintf(stderr, " FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, p->free ? 'F':'.', p->reserved ? 'R':'.'); } } - fprintf(stderr, "End of memory blocks\n"); + debug_printf("End of memory blocks\n"); } #endif @@ -308,11 +307,11 @@ mmFreeMem(struct mem_block *b) return 0; if (b->free) { - fprintf(stderr, "block already free\n"); + debug_printf("block already free\n"); return -1; } if (b->reserved) { - fprintf(stderr, "block is reserved\n"); + debug_printf("block is reserved\n"); return -1; } @@ -367,7 +366,7 @@ struct mm_pb_manager }; -static inline struct mm_pb_manager * +static INLINE struct mm_pb_manager * mm_pb_manager(struct pb_manager *mgr) { assert(mgr); @@ -385,7 +384,7 @@ struct mm_buffer }; -static inline struct mm_buffer * +static INLINE struct mm_buffer * mm_buffer(struct pb_buffer *buf) { assert(buf); @@ -399,6 +398,8 @@ mm_buffer_destroy(struct pb_buffer *buf) struct mm_buffer *mm_buf = mm_buffer(buf); struct mm_pb_manager *mm = mm_buf->mgr; + assert(buf->base.refcount == 0); + _glthread_LOCK_MUTEX(mm->mutex); mmFreeMem(mm_buf->block); FREE(buf); @@ -477,7 +478,7 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0); if(!mm_buf->block) { - fprintf(stderr, "warning: heap full\n"); + debug_printf("warning: heap full\n"); #if 0 mmDumpMemInfo(mm->heap); #endif diff --git a/src/mesa/pipe/pipebuffer/pb_bufmgr_pool.c b/src/mesa/pipe/pipebuffer/pb_bufmgr_pool.c index 7c29954112..04477a865a 100644 --- a/src/mesa/pipe/pipebuffer/pb_bufmgr_pool.c +++ b/src/mesa/pipe/pipebuffer/pb_bufmgr_pool.c @@ -35,12 +35,10 @@ */ -#include <assert.h> -#include <stdlib.h> - #include "linked_list.h" #include "p_compiler.h" +#include "p_debug.h" #include "p_thread.h" #include "p_defines.h" #include "p_util.h" @@ -172,13 +170,13 @@ pool_bufmgr_create_buffer(struct pb_manager *mgr, struct list_head *item; assert(size == pool->bufSize); - assert(desc->alignment % pool->bufAlign == 0); + assert(pool->bufAlign % desc->alignment == 0); _glthread_LOCK_MUTEX(pool->mutex); if (pool->numFree == 0) { _glthread_UNLOCK_MUTEX(pool->mutex); - fprintf(stderr, "warning: out of fixed size buffer objects\n"); + debug_printf("warning: out of fixed size buffer objects\n"); return NULL; } @@ -186,7 +184,7 @@ pool_bufmgr_create_buffer(struct pb_manager *mgr, if (item == &pool->free) { _glthread_UNLOCK_MUTEX(pool->mutex); - fprintf(stderr, "error: fixed size buffer pool corruption\n"); + debug_printf("error: fixed size buffer pool corruption\n"); return NULL; } @@ -258,7 +256,7 @@ pool_bufmgr_create(struct pb_manager *provider, if(!pool->map) goto failure; - pool->bufs = (struct pool_buffer *) MALLOC(numBufs * sizeof(*pool->bufs)); + pool->bufs = (struct pool_buffer *)CALLOC(numBufs, sizeof(*pool->bufs)); if (!pool->bufs) goto failure; diff --git a/src/mesa/pipe/softpipe/SConscript b/src/mesa/pipe/softpipe/SConscript new file mode 100644 index 0000000000..d581ee8d3c --- /dev/null +++ b/src/mesa/pipe/softpipe/SConscript @@ -0,0 +1,42 @@ +Import('*') + +env = env.Clone() + +softpipe = env.ConvenienceLibrary( + target = 'softpipe', + source = [ + 'sp_clear.c', + 'sp_context.c', + 'sp_draw_arrays.c', + 'sp_flush.c', + 'sp_prim_setup.c', + 'sp_prim_vbuf.c', + 'sp_quad_alpha_test.c', + 'sp_quad_blend.c', + 'sp_quad_bufloop.c', + 'sp_quad.c', + 'sp_quad_colormask.c', + 'sp_quad_coverage.c', + 'sp_quad_depth_test.c', + 'sp_quad_earlyz.c', + 'sp_quad_fs.c', + 'sp_quad_occlusion.c', + 'sp_quad_output.c', + 'sp_quad_stencil.c', + 'sp_quad_stipple.c', + 'sp_query.c', + 'sp_state_blend.c', + 'sp_state_clip.c', + 'sp_state_derived.c', + 'sp_state_fs.c', + 'sp_state_rasterizer.c', + 'sp_state_sampler.c', + 'sp_state_surface.c', + 'sp_state_vertex.c', + 'sp_surface.c', + 'sp_tex_sample.c', + 'sp_texture.c', + 'sp_tile_cache.c', + ]) + +Export('softpipe')
\ No newline at end of file diff --git a/src/mesa/pipe/softpipe/sp_clear.c b/src/mesa/pipe/softpipe/sp_clear.c index 571f64b38d..8d295a30ca 100644 --- a/src/mesa/pipe/softpipe/sp_clear.c +++ b/src/mesa/pipe/softpipe/sp_clear.c @@ -55,7 +55,9 @@ softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, if (ps == sp_tile_cache_get_surface(softpipe->zsbuf_cache)) { sp_tile_cache_clear(softpipe->zsbuf_cache, clearValue); +#if TILE_CLEAR_OPTIMIZATION return; +#endif } for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { diff --git a/src/mesa/pipe/softpipe/sp_prim_setup.c b/src/mesa/pipe/softpipe/sp_prim_setup.c index b17801d13d..7478b2336b 100644 --- a/src/mesa/pipe/softpipe/sp_prim_setup.c +++ b/src/mesa/pipe/softpipe/sp_prim_setup.c @@ -251,9 +251,9 @@ static void print_vertex(const struct setup_stage *setup, const struct vertex_header *v) { int i; - fprintf(stderr, "Vertex: (%p)\n", v); + debug_printf("Vertex: (%p)\n", v); for (i = 0; i < setup->quad.nr_attrs; i++) { - fprintf(stderr, " %d: %f %f %f %f\n", i, + debug_printf(" %d: %f %f %f %f\n", i, v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); } } @@ -267,7 +267,7 @@ static boolean setup_sort_vertices( struct setup_stage *setup, const struct vertex_header *v2 = prim->v[2]; #if DEBUG_VERTS - fprintf(stderr, "Triangle:\n"); + debug_printf("Triangle:\n"); print_vertex(setup, v0); print_vertex(setup, v1); print_vertex(setup, v2); @@ -345,7 +345,7 @@ static boolean setup_sort_vertices( struct setup_stage *setup, setup->oneoverarea = 1.0f / area; /* - _mesa_printf("%s one-over-area %f area %f det %f\n", + debug_printf("%s one-over-area %f area %f det %f\n", __FUNCTION__, setup->oneoverarea, area, prim->det ); */ } @@ -419,7 +419,7 @@ static void tri_linear_coeff( struct setup_stage *setup, dady * (setup->vmin->data[0][1] - 0.5f))); /* - _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n", + debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", slot, "xyzw"[i], setup->coef[slot].a0[i], setup->coef[slot].dadx[i], @@ -453,10 +453,10 @@ static void tri_persp_coeff( struct setup_stage *setup, float dady = b * setup->oneoverarea; /* - printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, - setup->vmin->data[vertSlot][i], - setup->vmid->data[vertSlot][i], - setup->vmax->data[vertSlot][i] + debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, + setup->vmin->data[vertSlot][i], + setup->vmid->data[vertSlot][i], + setup->vmax->data[vertSlot][i] ); */ assert(i <= 3); @@ -619,7 +619,7 @@ static void subtriangle( struct setup_stage *setup, finish_y -= sy; /* - _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); + debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); */ for (y = start_y; y < finish_y; y++) { @@ -671,7 +671,7 @@ static void setup_tri( struct draw_stage *stage, struct setup_stage *setup = setup_stage( stage ); /* - _mesa_printf("%s\n", __FUNCTION__ ); + debug_printf("%s\n", __FUNCTION__ ); */ setup_sort_vertices( setup, prim ); @@ -1124,7 +1124,7 @@ setup_point(struct draw_stage *stage, struct prim_header *prim) int ix, iy; /* - printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); + debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); */ for (iy = iymin; iy <= iymax; iy += 2) { uint rowMask = 0xf; diff --git a/src/mesa/pipe/softpipe/sp_quad_fs.c b/src/mesa/pipe/softpipe/sp_quad_fs.c index c9cc8afa0c..b5d7dfca1c 100644 --- a/src/mesa/pipe/softpipe/sp_quad_fs.c +++ b/src/mesa/pipe/softpipe/sp_quad_fs.c @@ -168,6 +168,11 @@ shade_quad( sizeof( quad->outputs.color ) ); } + /* + * XXX the following code for updating quad->outputs.depth + * isn't really needed if we did early z testing. + */ + /* store result Z */ if (qss->depthOutSlot >= 0) { /* output[slot] is new Z */ @@ -181,6 +186,10 @@ shade_quad( uint i; for (i = 0; i < 4; i++) { quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i]; + /* XXX not sure the above line is always correct. The following + * might be better: + quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i]; + */ } } @@ -214,13 +223,13 @@ shade_quad_llvm(struct quad_stage *qs, inputs[2][0][1] = fy + 1.0f; inputs[3][0][1] = fy + 1.0f; #if DLLVM - printf("MASK = %d\n", quad->mask); + debug_printf("MASK = %d\n", quad->mask); #endif gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef); #if DLLVM for (int i = 0; i < 4; ++i) { for (int j = 0; j < 2; ++j) { - printf("IN(%d,%d) [%f %f %f %f]\n", i, j, + debug_printf("IN(%d,%d) [%f %f %f %f]\n", i, j, inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]); } } @@ -231,7 +240,7 @@ shade_quad_llvm(struct quad_stage *qs, softpipe->mapped_constants[PIPE_SHADER_FRAGMENT], qss->samplers); #if DLLVM - printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n", + debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n", dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3], dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]); #endif @@ -251,7 +260,7 @@ shade_quad_llvm(struct quad_stage *qs, } #if DLLVM for (int i = 0; i < QUAD_SIZE; ++i) { - printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot, + debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot, quad->outputs.color[0][i], quad->outputs.color[1][i], quad->outputs.color[2][i], @@ -275,7 +284,7 @@ shade_quad_llvm(struct quad_stage *qs, } } #if DLLVM - printf("D [%f, %f, %f, %f] mask = %d\n", + debug_printf("D [%f, %f, %f, %f] mask = %d\n", quad->outputs.depth[0], quad->outputs.depth[1], quad->outputs.depth[2], diff --git a/src/mesa/pipe/softpipe/sp_state_blend.c b/src/mesa/pipe/softpipe/sp_state_blend.c index 160ca5cbc0..2d40d6bd8f 100644 --- a/src/mesa/pipe/softpipe/sp_state_blend.c +++ b/src/mesa/pipe/softpipe/sp_state_blend.c @@ -32,13 +32,12 @@ #include "sp_context.h" #include "sp_state.h" + void * softpipe_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *blend) { - struct pipe_blend_state *state = MALLOC( sizeof(struct pipe_blend_state) ); - memcpy(state, blend, sizeof(struct pipe_blend_state)); - return state; + return mem_dup(blend, sizeof(*blend)); } void softpipe_bind_blend_state( struct pipe_context *pipe, @@ -78,10 +77,7 @@ void * softpipe_create_depth_stencil_state(struct pipe_context *pipe, const struct pipe_depth_stencil_alpha_state *depth_stencil) { - struct pipe_depth_stencil_alpha_state *state = - MALLOC( sizeof(struct pipe_depth_stencil_alpha_state) ); - memcpy(state, depth_stencil, sizeof(struct pipe_depth_stencil_alpha_state)); - return state; + return mem_dup(depth_stencil, sizeof(*depth_stencil)); } void diff --git a/src/mesa/pipe/softpipe/sp_state_rasterizer.c b/src/mesa/pipe/softpipe/sp_state_rasterizer.c index ce8fa4f2b8..53755099dd 100644 --- a/src/mesa/pipe/softpipe/sp_state_rasterizer.c +++ b/src/mesa/pipe/softpipe/sp_state_rasterizer.c @@ -35,12 +35,9 @@ void * softpipe_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *setup) + const struct pipe_rasterizer_state *rast) { - struct pipe_rasterizer_state *state = - MALLOC( sizeof(struct pipe_rasterizer_state) ); - memcpy(state, setup, sizeof(struct pipe_rasterizer_state)); - return state; + return mem_dup(rast, sizeof(*rast)); } void softpipe_bind_rasterizer_state(struct pipe_context *pipe, diff --git a/src/mesa/pipe/softpipe/sp_state_sampler.c b/src/mesa/pipe/softpipe/sp_state_sampler.c index 3842e71503..291bbc40ad 100644 --- a/src/mesa/pipe/softpipe/sp_state_sampler.c +++ b/src/mesa/pipe/softpipe/sp_state_sampler.c @@ -40,9 +40,7 @@ void * softpipe_create_sampler_state(struct pipe_context *pipe, const struct pipe_sampler_state *sampler) { - struct pipe_sampler_state *state = MALLOC( sizeof(struct pipe_sampler_state) ); - memcpy(state, sampler, sizeof(struct pipe_sampler_state)); - return state; + return mem_dup(sampler, sizeof(*sampler)); } void @@ -51,6 +49,8 @@ softpipe_bind_sampler_state(struct pipe_context *pipe, { struct softpipe_context *softpipe = softpipe_context(pipe); + draw_flush(softpipe->draw); + assert(unit < PIPE_MAX_SAMPLERS); softpipe->sampler[unit] = (struct pipe_sampler_state *)sampler; diff --git a/src/mesa/pipe/softpipe/sp_texture.c b/src/mesa/pipe/softpipe/sp_texture.c index 172234843d..fd2cc3dbbb 100644 --- a/src/mesa/pipe/softpipe/sp_texture.c +++ b/src/mesa/pipe/softpipe/sp_texture.c @@ -79,31 +79,30 @@ softpipe_texture_layout(struct softpipe_texture * spt) } -void -softpipe_texture_create(struct pipe_context *pipe, struct pipe_texture **pt) +struct pipe_texture * +softpipe_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat) { - struct softpipe_texture *spt = REALLOC(*pt, sizeof(struct pipe_texture), - sizeof(struct softpipe_texture)); - - if (spt) { - memset(&spt->base + 1, 0, - sizeof(struct softpipe_texture) - sizeof(struct pipe_texture)); + struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); + if (!spt) + return NULL; - softpipe_texture_layout(spt); + spt->base = *templat; - spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32, - PIPE_BUFFER_USAGE_PIXEL, - spt->buffer_size); + softpipe_texture_layout(spt); - if (!spt->buffer) { - FREE(spt); - spt = NULL; - } + spt->buffer = pipe->winsys->buffer_create(pipe->winsys, 32, + PIPE_BUFFER_USAGE_PIXEL, + spt->buffer_size); + if (!spt->buffer) { + FREE(spt); + return NULL; } - *pt = &spt->base; + return &spt->base; } + void softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) { diff --git a/src/mesa/pipe/softpipe/sp_texture.h b/src/mesa/pipe/softpipe/sp_texture.h index c6cf370351..fa646c0de9 100644 --- a/src/mesa/pipe/softpipe/sp_texture.h +++ b/src/mesa/pipe/softpipe/sp_texture.h @@ -55,8 +55,9 @@ softpipe_texture(struct pipe_texture *pt) -extern void -softpipe_texture_create(struct pipe_context *pipe, struct pipe_texture **pt); +extern struct pipe_texture * +softpipe_texture_create(struct pipe_context *pipe, + const struct pipe_texture *templat); extern void softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt); diff --git a/src/mesa/pipe/softpipe/sp_tile_cache.c b/src/mesa/pipe/softpipe/sp_tile_cache.c index 451e157abf..1597361b82 100644 --- a/src/mesa/pipe/softpipe/sp_tile_cache.c +++ b/src/mesa/pipe/softpipe/sp_tile_cache.c @@ -341,7 +341,7 @@ sp_tile_cache_flush_clear(struct pipe_context *pipe, } } #if 0 - printf("num cleared: %u\n", numCleared); + debug_printf("num cleared: %u\n", numCleared); #endif } @@ -384,7 +384,7 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, #endif #if 0 - printf("flushed tiles in use: %d\n", inuse); + debug_printf("flushed tiles in use: %d\n", inuse); #endif } @@ -415,8 +415,8 @@ sp_get_cached_tile(struct softpipe_context *softpipe, /* put dirty tile back in framebuffer */ if (tc->depth_stencil) { pipe_put_tile_raw(pipe, ps, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, - tile->data.depth32, 0/*STRIDE*/); + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); } else { pipe_put_tile_rgba(pipe, ps, @@ -441,9 +441,9 @@ sp_get_cached_tile(struct softpipe_context *softpipe, else { /* get new tile data from surface */ if (tc->depth_stencil) { - pipe_put_tile_raw(pipe, ps, - tile->x, tile->y, TILE_SIZE, TILE_SIZE, - tile->data.depth32, 0/*STRIDE*/); + pipe_get_tile_raw(pipe, ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); } else { pipe_get_tile_rgba(pipe, ps, diff --git a/src/mesa/pipe/tgsi/exec/tgsi_exec.c b/src/mesa/pipe/tgsi/exec/tgsi_exec.c index dcc39362a9..336ae1c8b6 100644 --- a/src/mesa/pipe/tgsi/exec/tgsi_exec.c +++ b/src/mesa/pipe/tgsi/exec/tgsi_exec.c @@ -143,7 +143,7 @@ tgsi_exec_prepare( struct tgsi_exec_machine *mach ) k = tgsi_parse_init( &parse, mach->Tokens ); if (k != TGSI_PARSE_OK) { - fprintf(stderr, "Problem parsing!\n"); + debug_printf("Problem parsing!\n"); return; } @@ -249,7 +249,7 @@ tgsi_exec_machine_init( k = tgsi_parse_init (&parse, mach->Tokens); if (k != TGSI_PARSE_OK) { - fprintf( stderr, "Problem parsing!\n" ); + debug_printf( "Problem parsing!\n" ); return; } @@ -1236,7 +1236,7 @@ exec_tex(struct tgsi_exec_machine *mach, uint chan_index; float lodBias; - /* printf("Sampler %u unit %u\n", sampler, unit); */ + /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ switch (inst->InstructionExtTexture.Texture) { case TGSI_TEXTURE_1D: @@ -2010,7 +2010,7 @@ exec_instruction( case TGSI_OPCODE_TXB: /* Texture lookup with lod bias */ - /* src[0] = texcoord (src[0].w = load bias) */ + /* src[0] = texcoord (src[0].w = LOD bias) */ /* src[1] = sampler unit */ exec_tex(mach, inst, TRUE); break; @@ -2026,7 +2026,7 @@ exec_instruction( case TGSI_OPCODE_TXL: /* Texture lookup with explit LOD */ - /* src[0] = texcoord (src[0].w = load bias) */ + /* src[0] = texcoord (src[0].w = LOD) */ /* src[1] = sampler unit */ exec_tex(mach, inst, TRUE); break; diff --git a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c index f8660e7ad1..40bacf8552 100755 --- a/src/mesa/pipe/tgsi/exec/tgsi_sse2.c +++ b/src/mesa/pipe/tgsi/exec/tgsi_sse2.c @@ -48,28 +48,28 @@ _print_reg( case file_REG32: switch( reg.idx ) { case reg_AX: - printf( "EAX" ); + debug_printf( "EAX" ); break; case reg_CX: - printf( "ECX" ); + debug_printf( "ECX" ); break; case reg_DX: - printf( "EDX" ); + debug_printf( "EDX" ); break; case reg_BX: - printf( "EBX" ); + debug_printf( "EBX" ); break; case reg_SP: - printf( "ESP" ); + debug_printf( "ESP" ); break; case reg_BP: - printf( "EBP" ); + debug_printf( "EBP" ); break; case reg_SI: - printf( "ESI" ); + debug_printf( "ESI" ); break; case reg_DI: - printf( "EDI" ); + debug_printf( "EDI" ); break; } break; @@ -77,7 +77,7 @@ _print_reg( assert( 0 ); break; case file_XMM: - printf( "XMM%u", reg.idx ); + debug_printf( "XMM%u", reg.idx ); break; case file_x87: assert( 0 ); @@ -92,35 +92,35 @@ _fill( unsigned count = 10 - strlen( op ); while( count-- ) { - printf( " " ); + debug_printf( " " ); } } -#define DUMP_START() printf( "\nsse-dump start ----------------" ) -#define DUMP_END() printf( "\nsse-dump end ----------------\n" ) -#define DUMP( OP ) printf( "\n%s", OP ) +#define DUMP_START() debug_printf( "\nsse-dump start ----------------" ) +#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" ) +#define DUMP( OP ) debug_printf( "\n%s", OP ) #define DUMP_I( OP, I ) do {\ - printf( "\n%s", OP );\ + debug_printf( "\n%s", OP );\ _fill( OP );\ - printf( "%u", I ); } while( 0 ) + debug_printf( "%u", I ); } while( 0 ) #define DUMP_R( OP, R0 ) do {\ - printf( "\n%s", OP );\ + debug_printf( "\n%s", OP );\ _fill( OP );\ _print_reg( R0 ); } while( 0 ) #define DUMP_RR( OP, R0, R1 ) do {\ - printf( "\n%s", OP );\ + debug_printf( "\n%s", OP );\ _fill( OP );\ _print_reg( R0 );\ - printf( ", " );\ + debug_printf( ", " );\ _print_reg( R1 ); } while( 0 ) #define DUMP_RRI( OP, R0, R1, I ) do {\ - printf( "\n%s", OP );\ + debug_printf( "\n%s", OP );\ _fill( OP );\ _print_reg( R0 );\ - printf( ", " );\ + debug_printf( ", " );\ _print_reg( R1 );\ - printf( ", " );\ - printf( "%u", I ); } while( 0 ) + debug_printf( ", " );\ + debug_printf( "%u", I ); } while( 0 ) #else @@ -198,9 +198,15 @@ get_output_base( void ) static struct x86_reg get_temp_base( void ) { +#ifdef WIN32 return x86_make_reg( file_REG32, reg_BX ); +#else + return x86_make_reg( + file_REG32, + reg_SI ); +#endif } static struct x86_reg @@ -2248,8 +2254,7 @@ tgsi_emit_sse2( case TGSI_TOKEN_TYPE_IMMEDIATE: /* XXX implement this */ - assert(0); - break; + return 0; default: assert( 0 ); diff --git a/src/mesa/pipe/tgsi/util/tgsi_build.c b/src/mesa/pipe/tgsi/util/tgsi_build.c index 67f7d2c2c2..a00ff1c2a5 100644 --- a/src/mesa/pipe/tgsi/util/tgsi_build.c +++ b/src/mesa/pipe/tgsi/util/tgsi_build.c @@ -1,3 +1,4 @@ +#include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi_build.h" diff --git a/src/mesa/pipe/tgsi/util/tgsi_dump.c b/src/mesa/pipe/tgsi/util/tgsi_dump.c index cdbc0dbc9c..b5c54847e0 100644 --- a/src/mesa/pipe/tgsi/util/tgsi_dump.c +++ b/src/mesa/pipe/tgsi/util/tgsi_dump.c @@ -25,6 +25,9 @@ * **************************************************************************/ +#include <stdio.h> + +#include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi_dump.h" diff --git a/src/mesa/pipe/tgsi/util/tgsi_parse.c b/src/mesa/pipe/tgsi/util/tgsi_parse.c index f0f8d44ac2..bf6b89ce56 100644 --- a/src/mesa/pipe/tgsi/util/tgsi_parse.c +++ b/src/mesa/pipe/tgsi/util/tgsi_parse.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" diff --git a/src/mesa/pipe/tgsi/util/tgsi_util.c b/src/mesa/pipe/tgsi/util/tgsi_util.c index 1e76b0f133..4cdd89182a 100644 --- a/src/mesa/pipe/tgsi/util/tgsi_util.c +++ b/src/mesa/pipe/tgsi/util/tgsi_util.c @@ -1,3 +1,4 @@ +#include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" diff --git a/src/mesa/pipe/util/p_debug.c b/src/mesa/pipe/util/p_debug.c new file mode 100644 index 0000000000..b9607a6ba7 --- /dev/null +++ b/src/mesa/pipe/util/p_debug.c @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdarg.h> + +#ifdef WIN32 +#include <windows.h> +#include <winddi.h> +#else +#include <stdio.h> +#include <stdlib.h> +#endif + +#include "pipe/p_debug.h" +#include "pipe/p_compiler.h" + + +void debug_vprintf(const char *format, va_list ap) +{ +#ifdef WIN32 + EngDebugPrint("Gallium3D: ", (PCHAR)format, ap); +#else + vfprintf(stderr, format, ap); +#endif +} + + +void debug_printf(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + debug_vprintf(format, ap); + va_end(ap); +} + + +static INLINE void debug_abort(void) +{ +#ifdef WIN32 + EngDebugBreak(); +#else + abort(); +#endif +} + + +void debug_assert_fail(const char *expr, const char *file, unsigned line) +{ + debug_printf("%s:%i: Assertion `%s' failed.\n", file, line, expr); + debug_abort(); +} diff --git a/src/mesa/sources b/src/mesa/sources index 97ef7e1936..84492c91ac 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -175,6 +175,9 @@ DRAW_SOURCES = \ pipe/draw/draw_vertex_fetch.c \ pipe/draw/draw_vertex_shader.c \ pipe/draw/draw_vertex_shader_llvm.c \ + pipe/draw/draw_vf.c \ + pipe/draw/draw_vf_generic.c \ + pipe/draw/draw_vf_sse.c \ pipe/draw/draw_wide_prims.c TGSIEXEC_SOURCES = \ @@ -192,6 +195,7 @@ STATECACHE_SOURCES = \ pipe/cso_cache/cso_cache.c PIPEUTIL_SOURCES = \ + pipe/util/p_debug.c \ pipe/util/p_tile.c \ pipe/util/p_util.c @@ -230,6 +234,7 @@ STATETRACKER_SOURCES = \ state_tracker/st_extensions.c \ state_tracker/st_format.c \ state_tracker/st_framebuffer.c \ + state_tracker/st_gen_mipmap.c \ state_tracker/st_mesa_to_tgsi.c \ state_tracker/st_program.c \ state_tracker/st_texture.c diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 1ed9333556..9196918509 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -226,9 +226,11 @@ find_translated_vp(struct st_context *st, GLint fpInAttrib = vp_out_to_fp_in(outAttr); if (fpInAttrib >= 0) { GLuint fpInSlot = stfp->input_to_slot[fpInAttrib]; - GLuint vpOutSlot = stfp->fs->state.input_map[fpInSlot]; - xvp->output_to_slot[outAttr] = vpOutSlot; - numVpOuts++; + if (fpInSlot != ~0) { + GLuint vpOutSlot = stfp->fs->state.input_map[fpInSlot]; + xvp->output_to_slot[outAttr] = vpOutSlot; + numVpOuts++; + } } else if (outAttr == VERT_RESULT_PSIZ || outAttr == VERT_RESULT_BFC0 || @@ -247,7 +249,7 @@ find_translated_vp(struct st_context *st, * We could use this info to do dead code elimination in the * vertex program. */ - dummySlot = stfp->num_input_slots; + dummySlot = numVpOuts; /* Map vert program outputs that aren't used to the dummy slot */ for (outAttr = 0; outAttr < VERT_RESULT_MAX; outAttr++) { diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index fb21d29c40..2a836d630b 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -36,7 +36,6 @@ #include "st_atom.h" #include "st_cb_texture.h" #include "pipe/p_context.h" -#include "pipe/p_defines.h" /** @@ -46,24 +45,21 @@ static void update_textures(struct st_context *st) { - GLuint s; - /* ST_NEW_FRAGMENT_PROGRAM */ struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current; + GLuint unit; - for (s = 0; s < st->ctx->Const.MaxTextureCoordUnits; s++) { - GLuint su = fprog->Base.SamplerUnits[s]; - - struct gl_texture_object *texObj - = st->ctx->Texture.Unit[su]._Current; - + for (unit = 0; unit < st->ctx->Const.MaxTextureCoordUnits; unit++) { + const GLuint su = fprog->Base.SamplerUnits[unit]; + struct gl_texture_object *texObj = st->ctx->Texture.Unit[su]._Current; struct pipe_texture *pt; if (texObj) { GLboolean flush, retval; retval = st_finalize_texture(st->ctx, st->pipe, texObj, &flush); + /* XXX retval indicates whether there's a texture border */ pt = st_get_texobj_texture(texObj); } @@ -75,9 +71,9 @@ update_textures(struct st_context *st) * this table before being deleted, otherwise the pointer * comparison below could fail. */ - if (st->state.sampler_texture[s] != pt) { - st->state.sampler_texture[s] = pt; - st->pipe->set_sampler_texture(st->pipe, s, pt); + if (st->state.sampler_texture[unit] != pt) { + st->state.sampler_texture[unit] = pt; + st->pipe->set_sampler_texture(st->pipe, unit, pt); } } } diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 758d4a4086..ab98b54bab 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -251,7 +251,7 @@ draw_quad(GLcontext *ctx, verts[i][1][3] = color[3]; } - st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 2); + st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 2, GL_FALSE); } @@ -408,7 +408,9 @@ check_clear_depth_with_quad(GLcontext *ctx, struct gl_renderbuffer *rb) const struct st_renderbuffer *strb = st_renderbuffer(rb); const GLboolean isDS = is_depth_stencil_format(strb->surface->format); return ctx->Scissor.Enabled - || (isDS && ctx->DrawBuffer->Visual.stencilBits > 0); + || (isDS && + strb->surface->status == PIPE_SURFACE_STATUS_DEFINED && + ctx->DrawBuffer->Visual.stencilBits > 0); } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 6b44cba2e4..07886e7982 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -355,8 +355,8 @@ make_fragment_shader_z(struct st_context *st) * Create a simple vertex shader that just passes through the * vertex position and texcoord (and optionally, color). */ -static struct st_vertex_program * -make_vertex_shader(struct st_context *st, GLboolean passColor) +struct st_vertex_program * +st_make_passthrough_vertex_shader(struct st_context *st, GLboolean passColor) { /* only make programs once and re-use */ static struct st_vertex_program *progs[2] = { NULL, NULL }; @@ -572,7 +572,7 @@ draw_quad(GLcontext *ctx, GLfloat x0, GLfloat y0, GLfloat z, verts[i][1][3] = 1.0; /*Q*/ } - st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 2); + st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 2, GL_FALSE); } @@ -581,10 +581,13 @@ draw_quad_colored(GLcontext *ctx, GLfloat x0, GLfloat y0, GLfloat z, GLfloat x1, GLfloat y1, const GLfloat *color, GLboolean invertTex) { + GLfloat bias = ctx->st->bitmap_texcoord_bias; GLfloat verts[4][3][4]; /* four verts, three attribs, XYZW */ GLuint i; - GLfloat sLeft = 0.0, sRight = 1.0; - GLfloat tTop = invertTex, tBot = 1.0 - tTop; + GLfloat xBias = bias / (x1-x0); + GLfloat yBias = bias / (y1-y0); + GLfloat sLeft = 0.0 + xBias, sRight = 1.0 + xBias; + GLfloat tTop = invertTex - yBias, tBot = 1.0 - tTop - yBias; /* upper-left */ verts[0][0][0] = x0; /* attr[0].x */ @@ -622,7 +625,7 @@ draw_quad_colored(GLcontext *ctx, GLfloat x0, GLfloat y0, GLfloat z, verts[i][2][3] = 1.0; /*Q*/ } - st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 3); + st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 3, GL_FALSE); } @@ -942,7 +945,7 @@ st_DrawPixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, if (format == GL_DEPTH_COMPONENT) { ps = st->state.framebuffer.zsbuf; stfp = make_fragment_shader_z(ctx->st); - stvp = make_vertex_shader(ctx->st, GL_TRUE); + stvp = st_make_passthrough_vertex_shader(ctx->st, GL_TRUE); color = ctx->Current.RasterColor; } else if (format == GL_STENCIL_INDEX) { @@ -953,7 +956,7 @@ st_DrawPixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, else { ps = st->state.framebuffer.cbufs[0]; stfp = combined_drawpix_fragment_program(ctx); - stvp = make_vertex_shader(ctx->st, GL_FALSE); + stvp = st_make_passthrough_vertex_shader(ctx->st, GL_FALSE); color = NULL; } @@ -1108,7 +1111,7 @@ st_Bitmap(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, struct st_context *st = ctx->st; struct pipe_texture *pt; - stvp = make_vertex_shader(ctx->st, GL_TRUE); + stvp = st_make_passthrough_vertex_shader(ctx->st, GL_TRUE); stfp = combined_bitmap_fragment_program(ctx); st_validate_state(st); @@ -1226,13 +1229,13 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, rbRead = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); color = NULL; stfp = combined_drawpix_fragment_program(ctx); - stvp = make_vertex_shader(ctx->st, GL_FALSE); + stvp = st_make_passthrough_vertex_shader(ctx->st, GL_FALSE); } else { rbRead = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer); color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; stfp = make_fragment_shader_z(ctx->st); - stvp = make_vertex_shader(ctx->st, GL_TRUE); + stvp = st_make_passthrough_vertex_shader(ctx->st, GL_TRUE); } psRead = rbRead->surface; diff --git a/src/mesa/state_tracker/st_cb_drawpixels.h b/src/mesa/state_tracker/st_cb_drawpixels.h index 71ba487020..b8b906f06b 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.h +++ b/src/mesa/state_tracker/st_cb_drawpixels.h @@ -30,6 +30,10 @@ #define ST_CB_DRAWPIXELS_H +extern struct st_vertex_program * +st_make_passthrough_vertex_shader(struct st_context *st, GLboolean passColor); + + extern void st_init_drawpixels_functions(struct dd_function_table *functions); diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index ba0950e295..3350254654 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -30,6 +30,7 @@ #include "main/enums.h" #include "main/image.h" #include "main/macros.h" +#include "main/mipmap.h" #include "main/texcompress.h" #include "main/texformat.h" #include "main/teximage.h" @@ -41,6 +42,7 @@ #include "state_tracker/st_cb_texture.h" #include "state_tracker/st_format.h" #include "state_tracker/st_texture.h" +#include "state_tracker/st_gen_mipmap.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" @@ -76,13 +78,13 @@ struct st_texture_object - static INLINE struct st_texture_object * st_texture_object(struct gl_texture_object *obj) { return (struct st_texture_object *) obj; } + static INLINE struct st_texture_image * st_texture_image(struct gl_texture_image *img) { @@ -122,32 +124,28 @@ gl_target_to_pipe(GLenum target) } +/** + * Return nominal bytes per texel for a compressed format, 0 for non-compressed + * format. + */ static int compressed_num_bytes(GLuint mesaFormat) { - int bytes = 0; switch(mesaFormat) { - case MESA_FORMAT_RGB_FXT1: case MESA_FORMAT_RGBA_FXT1: case MESA_FORMAT_RGB_DXT1: case MESA_FORMAT_RGBA_DXT1: - bytes = 2; - break; - + return 2; case MESA_FORMAT_RGBA_DXT3: case MESA_FORMAT_RGBA_DXT5: - bytes = 4; + return 4; default: - break; + return 0; } - - return bytes; } - - static GLboolean st_IsTextureResident(GLcontext * ctx, struct gl_texture_object *texObj) { @@ -164,7 +162,6 @@ st_IsTextureResident(GLcontext * ctx, struct gl_texture_object *texObj) } - static struct gl_texture_image * st_NewTextureImage(GLcontext * ctx) { @@ -216,8 +213,6 @@ st_FreeTextureImageData(GLcontext * ctx, struct gl_texture_image *texImage) } - - /* ================================================================ * From linux kernel i386 header files, copes with odd sizes better * than COPY_DWORDS would: @@ -290,7 +285,12 @@ logbase2(int n) } -/* Otherwise, store it in memory if (Border != 0) or (any dimension == +/** + * Allocate a pipe_texture object for the given st_texture_object using + * the given st_texture_image to guess the mipmap size/levels. + * + * [comments...] + * Otherwise, store it in memory if (Border != 0) or (any dimension == * 1). * * Otherwise, if max_level >= level >= min_level, create texture with @@ -302,18 +302,19 @@ logbase2(int n) static void guess_and_alloc_texture(struct st_context *st, struct st_texture_object *stObj, - struct st_texture_image *stImage) + const struct st_texture_image *stImage) { GLuint firstLevel; GLuint lastLevel; GLuint width = stImage->base.Width; GLuint height = stImage->base.Height; GLuint depth = stImage->base.Depth; - GLuint l2width, l2height, l2depth; GLuint i, comp_byte = 0; DBG("%s\n", __FUNCTION__); + assert(!stObj->pt); + if (stImage->base.Border) return; @@ -355,15 +356,15 @@ guess_and_alloc_texture(struct st_context *st, lastLevel = firstLevel; } else { - l2width = logbase2(width); - l2height = logbase2(height); - l2depth = logbase2(depth); + GLuint l2width = logbase2(width); + GLuint l2height = logbase2(height); + GLuint l2depth = logbase2(depth); lastLevel = firstLevel + MAX2(MAX2(l2width, l2height), l2depth); } - assert(!stObj->pt); if (stImage->base.IsCompressed) comp_byte = compressed_num_bytes(stImage->base.TexFormat->MesaFormat); + stObj->pt = st_texture_create(st, gl_target_to_pipe(stObj->base.Target), st_mesa_format_to_pipe_format(stImage->base.TexFormat->MesaFormat), @@ -487,21 +488,18 @@ try_pbo_upload(GLcontext *ctx, - - - - static void st_TexImage(GLcontext * ctx, - GLint dims, - GLenum target, GLint level, - GLint internalFormat, - GLint width, GLint height, GLint depth, - GLint border, - GLenum format, GLenum type, const void *pixels, - const struct gl_pixelstore_attrib *unpack, - struct gl_texture_object *texObj, - struct gl_texture_image *texImage, GLsizei imageSize, int compressed) + GLint dims, + GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint depth, + GLint border, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *unpack, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, + GLsizei imageSize, int compressed) { struct st_texture_object *stObj = st_texture_object(texObj); struct st_texture_image *stImage = st_texture_image(texImage); @@ -524,7 +522,7 @@ st_TexImage(GLcontext * ctx, /* choose the texture format */ texImage->TexFormat = st_ChooseTextureFormat(ctx, internalFormat, - format, type); + format, type); _mesa_set_fetch_functions(texImage, dims); @@ -536,7 +534,8 @@ st_TexImage(GLcontext * ctx, ctx->Driver.CompressedTextureSize(ctx, texImage->Width, texImage->Height, texImage->Depth, texImage->TexFormat->MesaFormat); - } else { + } + else { texelBytes = texImage->TexFormat->TexelBytes; /* Minimum pitch of 32 bytes */ @@ -669,7 +668,7 @@ st_TexImage(GLcontext * ctx, * conversion and copy: */ if (compressed) { - memcpy(texImage->Data, pixels, imageSize); + memcpy(texImage->Data, pixels, imageSize); } else { GLuint srcImageStride = _mesa_image_image_stride(unpack, width, height, @@ -705,13 +704,9 @@ st_TexImage(GLcontext * ctx, texImage->Data = NULL; } -#if 0 - /* GL_SGIS_generate_mipmap -- this can be accelerated now. - */ +#if 01 if (level == texObj->BaseLevel && texObj->GenerateMipmap) { - intel_generate_mipmap(ctx, target, - &ctx->Texture.Unit[ctx->Texture.CurrentUnit], - texObj); + ctx->Driver.GenerateMipmap(ctx, target, texObj); } #endif } @@ -1401,7 +1396,10 @@ copy_image_data_to_texture(struct st_context *st, } -/* +/** + * Called during state validation. When this function is finished, + * the texture object should be ready for rendering. + * \return GL_FALSE if a texture border is present, GL_TRUE otherwise */ GLboolean st_finalize_texture(GLcontext *ctx, @@ -1410,11 +1408,10 @@ st_finalize_texture(GLcontext *ctx, GLboolean *needFlush) { struct st_texture_object *stObj = st_texture_object(tObj); + const GLuint nr_faces = (stObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; int comp_byte = 0; int cpp; - GLuint face, i; - GLuint nr_faces = 0; struct st_texture_image *firstImage; *needFlush = GL_FALSE; @@ -1426,8 +1423,7 @@ st_finalize_texture(GLcontext *ctx, /* What levels must the texture include at a minimum? */ calculate_first_last_level(stObj); - firstImage = - st_texture_image(stObj->base.Image[0][stObj->firstLevel]); + firstImage = st_texture_image(stObj->base.Image[0][stObj->firstLevel]); /* Fallback case: */ @@ -1503,7 +1499,6 @@ st_finalize_texture(GLcontext *ctx, /* Pull in any images not in the object's texture: */ - nr_faces = (stObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; for (face = 0; face < nr_faces; face++) { for (i = stObj->firstLevel; i <= stObj->lastLevel; i++) { struct st_texture_image *stImage = @@ -1540,6 +1535,7 @@ st_init_texture_functions(struct dd_function_table *functions) functions->CopyTexSubImage1D = st_CopyTexSubImage1D; functions->CopyTexSubImage2D = st_CopyTexSubImage2D; functions->CopyTexSubImage3D = st_CopyTexSubImage3D; + functions->GenerateMipmap = st_generate_mipmap; functions->GetTexImage = st_GetTexImage; diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 9c206c057a..bf4618bed8 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -49,6 +49,7 @@ #include "st_atom.h" #include "st_draw.h" #include "st_extensions.h" +#include "st_gen_mipmap.h" #include "st_program.h" #include "pipe/p_context.h" #include "pipe/p_winsys.h" @@ -96,6 +97,7 @@ st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe ) st_init_atoms( st ); st_init_draw( st ); + st_init_generate_mipmap(st); /* we want all vertex data to be placed in buffer objects */ vbo_use_buffer_objects(ctx); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 2b6f8743f3..a756055898 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -143,6 +143,8 @@ struct st_context GLfloat polygon_offset_scale; /* ?? */ + GLfloat bitmap_texcoord_bias; + /** Mapping from VERT_RESULT_x to post-transformed vertex slot */ const GLuint *vertex_result_to_slot; diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 8ef50ee768..ae9f5c8b11 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -298,6 +298,7 @@ st_draw_vbo(GLcontext *ctx, break; default: assert(0); + return; } /* get/create the index buffer object */ @@ -353,7 +354,8 @@ st_draw_vbo(GLcontext *ctx, void st_draw_vertices(GLcontext *ctx, unsigned prim, unsigned numVertex, float *verts, - unsigned numAttribs) + unsigned numAttribs, + GLboolean inClipCoords) { const float width = ctx->DrawBuffer->Width; const float height = ctx->DrawBuffer->Height; @@ -366,14 +368,16 @@ st_draw_vertices(GLcontext *ctx, unsigned prim, assert(numAttribs > 0); - /* convert to clip coords */ - for (i = 0; i < numVertex; i++) { - float x = verts[i * numAttribs * 4 + 0]; - float y = verts[i * numAttribs * 4 + 1]; - x = x / width * 2.0 - 1.0; - y = y / height * 2.0 - 1.0; - verts[i * numAttribs * 4 + 0] = x; - verts[i * numAttribs * 4 + 1] = y; + if (!inClipCoords) { + /* convert to clip coords */ + for (i = 0; i < numVertex; i++) { + float x = verts[i * numAttribs * 4 + 0]; + float y = verts[i * numAttribs * 4 + 1]; + x = x / width * 2.0 - 1.0; + y = y / height * 2.0 - 1.0; + verts[i * numAttribs * 4 + 0] = x; + verts[i * numAttribs * 4 + 1] = y; + } } /* XXX create one-time */ @@ -570,6 +574,7 @@ st_feedback_draw_vbo(GLcontext *ctx, break; default: assert(0); + return; } map = pipe->winsys->buffer_map(pipe->winsys, diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h index 89ee790c57..171bde57e5 100644 --- a/src/mesa/state_tracker/st_draw.h +++ b/src/mesa/state_tracker/st_draw.h @@ -62,7 +62,8 @@ st_feedback_draw_vbo(GLcontext *ctx, void st_draw_vertices(GLcontext *ctx, unsigned prim, unsigned numVertex, float *verts, - unsigned numAttribs); + unsigned numAttribs, + GLboolean inClipCoords); #endif diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 0157bdd6b3..97d28d77c4 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -106,6 +106,9 @@ void st_init_limits(struct st_context *st) c->MaxTextureLodBias = pipe->get_paramf(pipe, PIPE_CAP_MAX_TEXTURE_LOD_BIAS); + + st->bitmap_texcoord_bias + = pipe->get_paramf(pipe, PIPE_CAP_BITMAP_TEXCOORD_BIAS); } diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c new file mode 100644 index 0000000000..a6ac9a55fb --- /dev/null +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -0,0 +1,363 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/imports.h" +#include "main/mipmap.h" +#include "main/teximage.h" + +#include "shader/prog_instruction.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/cso_cache/cso_cache.h" + +#include "st_context.h" +#include "st_draw.h" +#include "st_gen_mipmap.h" +#include "st_program.h" +#include "st_cb_drawpixels.h" +#include "st_cb_texture.h" + + + +static void *blend_cso = NULL; +static void *depthstencil_cso = NULL; +static void *rasterizer_cso = NULL; +static void *sampler_cso = NULL; + +static struct st_fragment_program *stfp = NULL; +static struct st_vertex_program *stvp = NULL; + + + +static struct st_fragment_program * +make_tex_fragment_program(GLcontext *ctx) +{ + struct st_fragment_program *stfp; + struct gl_program *p; + GLuint ic = 0; + + p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + if (!p) + return NULL; + + p->NumInstructions = 2; + + p->Instructions = _mesa_alloc_instructions(p->NumInstructions); + if (!p->Instructions) { + ctx->Driver.DeleteProgram(ctx, p); + return NULL; + } + _mesa_init_instructions(p->Instructions, p->NumInstructions); + + /* TEX result.color, fragment.texcoord[0], texture[0], 2D; */ + p->Instructions[ic].Opcode = OPCODE_TEX; + p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; + p->Instructions[ic].DstReg.Index = FRAG_RESULT_COLR; + p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; + p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; + p->Instructions[ic].TexSrcUnit = 0; + p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; + ic++; + + /* END; */ + p->Instructions[ic++].Opcode = OPCODE_END; + + assert(ic == p->NumInstructions); + + p->InputsRead = FRAG_BIT_TEX0; + p->OutputsWritten = (1 << FRAG_RESULT_COLR); + + stfp = (struct st_fragment_program *) p; + + st_translate_fragment_program(ctx->st, stfp, NULL, + stfp->tokens, ST_MAX_SHADER_TOKENS); + + return stfp; +} + + + + +/** + * one-time init for generate mipmap + * XXX Note: there may be other times we need no-op/simple state like this. + * In that case, some code refactoring would be good. + */ +void +st_init_generate_mipmap(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct pipe_blend_state blend; + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state sampler; + struct pipe_depth_stencil_alpha_state depthstencil; + + assert(!blend_cso); + + memset(&blend, 0, sizeof(blend)); + blend.colormask = PIPE_MASK_RGBA; + blend_cso = pipe->create_blend_state(pipe, &blend); + + memset(&depthstencil, 0, sizeof(depthstencil)); + depthstencil_cso = pipe->create_depth_stencil_alpha_state(pipe, &depthstencil); + + memset(&rasterizer, 0, sizeof(rasterizer)); + rasterizer_cso = pipe->create_rasterizer_state(pipe, &rasterizer); + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.normalized_coords = 1; + sampler_cso = pipe->create_sampler_state(pipe, &sampler); + + stfp = make_tex_fragment_program(st->ctx); + stvp = st_make_passthrough_vertex_shader(st, GL_FALSE); +} + + +void +st_destroy_generate_mipmpap(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + + pipe->delete_blend_state(pipe, blend_cso); + pipe->delete_depth_stencil_alpha_state(pipe, depthstencil_cso); + pipe->delete_rasterizer_state(pipe, rasterizer_cso); + pipe->delete_sampler_state(pipe, sampler_cso); + + /* XXX free stfp, stvp */ + + blend_cso = NULL; + depthstencil_cso = NULL; + rasterizer_cso = NULL; + sampler_cso = NULL; +} + + +static void +simple_viewport(struct pipe_context *pipe, uint width, uint height) +{ + struct pipe_viewport_state vp; + + vp.scale[0] = 0.5 * width; + vp.scale[1] = -0.5 * height; + vp.scale[2] = 1.0; + vp.scale[3] = 1.0; + vp.translate[0] = 0.5 * width; + vp.translate[1] = 0.5 * height; + vp.translate[2] = 0.0; + vp.translate[3] = 0.0; + + pipe->set_viewport_state(pipe, &vp); +} + + + +/* + * Draw simple [-1,1]x[-1,1] quad + */ +static void +draw_quad(GLcontext *ctx) +{ + GLfloat verts[4][2][4]; /* four verts, two attribs, XYZW */ + GLuint i; + GLfloat sLeft = 0.0, sRight = 1.0; + GLfloat tTop = 1.0, tBot = 0.0; + GLfloat x0 = -1.0, x1 = 1.0; + GLfloat y0 = -1.0, y1 = 1.0; + + /* upper-left */ + verts[0][0][0] = x0; /* attr[0].x */ + verts[0][0][1] = y0; /* attr[0].y */ + verts[0][1][0] = sLeft; /* attr[1].s */ + verts[0][1][1] = tTop; /* attr[1].t */ + + /* upper-right */ + verts[1][0][0] = x1; + verts[1][0][1] = y0; + verts[1][1][0] = sRight; + verts[1][1][1] = tTop; + + /* lower-right */ + verts[2][0][0] = x1; + verts[2][0][1] = y1; + verts[2][1][0] = sRight; + verts[2][1][1] = tBot; + + /* lower-left */ + verts[3][0][0] = x0; + verts[3][0][1] = y1; + verts[3][1][0] = sLeft; + verts[3][1][1] = tBot; + + /* same for all verts: */ + for (i = 0; i < 4; i++) { + verts[i][0][2] = 0.0; /*Z*/ + verts[i][0][3] = 1.0; /*W*/ + verts[i][1][2] = 0.0; /*R*/ + verts[i][1][3] = 1.0; /*Q*/ + } + + st_draw_vertices(ctx, PIPE_PRIM_QUADS, 4, (float *) verts, 2, GL_TRUE); +} + + + +/** + * Generate mipmap levels using hardware rendering. + * \return TRUE if successful, FALSE if not possible + */ +static boolean +st_render_mipmap(struct st_context *st, + struct pipe_texture *pt, + uint baseLevel, uint lastLevel) +{ + struct pipe_context *pipe = st->pipe; + struct pipe_framebuffer_state fb; + const uint face = 0, zslice = 0; + const uint first_level_save = pt->first_level; + uint dstLevel; + + /* check if we can render in the texture's format */ + if (!pipe->is_format_supported(pipe, pt->format, PIPE_SURFACE)) { + return FALSE; + } + + /* init framebuffer state */ + memset(&fb, 0, sizeof(fb)); + fb.num_cbufs = 1; + + /* bind CSOs */ + pipe->bind_blend_state(pipe, blend_cso); + pipe->bind_depth_stencil_alpha_state(pipe, depthstencil_cso); + pipe->bind_rasterizer_state(pipe, rasterizer_cso); + pipe->bind_sampler_state(pipe, 0, sampler_cso); + + /* bind shaders */ + pipe->bind_fs_state(pipe, stfp->fs->data); + pipe->bind_vs_state(pipe, stvp->cso->data); + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + + /* + * Setup framebuffer / dest surface + */ + fb.cbufs[0] = pipe->get_tex_surface(pipe, pt, face, dstLevel, zslice); + pipe->set_framebuffer_state(pipe, &fb); + + simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]); + + /* + * Setup src texture, override pt->first_level so we sample from + * the right mipmap level. + */ + pt->first_level = srcLevel; + pipe->set_sampler_texture(pipe, 0, pt); + + draw_quad(st->ctx); + } + + /* restore first_level */ + pt->first_level = first_level_save; + + /* restore pipe state */ + if (st->state.rasterizer) + pipe->bind_rasterizer_state(pipe, st->state.rasterizer->data); + if (st->state.fs) + pipe->bind_fs_state(pipe, st->state.fs->data); + if (st->state.vs) + pipe->bind_vs_state(pipe, st->state.vs->cso->data); + if (st->state.sampler[0]) + pipe->bind_sampler_state(pipe, 0, st->state.sampler[0]->data); + pipe->set_sampler_texture(pipe, 0, st->state.sampler_texture[0]); + pipe->set_viewport_state(pipe, &st->state.viewport); + + return TRUE; +} + + + +void +st_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + struct st_context *st = ctx->st; + struct pipe_texture *pt = st_get_texobj_texture(texObj); + const uint baseLevel = texObj->BaseLevel; + const uint lastLevel = pt->last_level; + uint dstLevel; + + if (!st_render_mipmap(st, pt, baseLevel, lastLevel)) { + abort(); + /* XXX the following won't really work at this time */ + _mesa_generate_mipmap(ctx, target, texObj); + return; + } + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + const struct gl_texture_image *srcImage + = _mesa_get_tex_image(ctx, texObj, target, srcLevel); + struct gl_texture_image *dstImage; + struct st_texture_image *stImage; + uint dstWidth = pt->width[dstLevel]; + uint dstHeight = pt->height[dstLevel]; + uint dstDepth = pt->depth[dstLevel]; + uint border = srcImage->Border; + + + dstImage = _mesa_get_tex_image(ctx, texObj, target, dstLevel); + if (!dstImage) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps"); + return; + } + + if (dstImage->ImageOffsets) + _mesa_free(dstImage->ImageOffsets); + + /* Free old image data */ + if (dstImage->Data) + ctx->Driver.FreeTexImageData(ctx, dstImage); + + /* initialize new image */ + _mesa_init_teximage_fields(ctx, target, dstImage, dstWidth, dstHeight, + dstDepth, border, srcImage->InternalFormat); + + dstImage->TexFormat = srcImage->TexFormat; + + stImage = (struct st_texture_image *) dstImage; + stImage->pt = pt; + } + +} diff --git a/src/mesa/state_tracker/st_gen_mipmap.h b/src/mesa/state_tracker/st_gen_mipmap.h new file mode 100644 index 0000000000..7668c1e44e --- /dev/null +++ b/src/mesa/state_tracker/st_gen_mipmap.h @@ -0,0 +1,46 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_GEN_MIPMAP_H +#define ST_GEN_MIPMAP_H + + +extern void +st_init_generate_mipmap(struct st_context *st); + + +extern void +st_destroy_generate_mipmpap(struct st_context *st); + + +extern void +st_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj); + + +#endif /* ST_GEN_MIPMAP_H */ diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 1f1e6500e0..84a9094001 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -33,6 +33,7 @@ #include "main/imports.h" #include "main/mtypes.h" +#include "shader/prog_print.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" @@ -68,6 +69,7 @@ st_translate_vertex_program(struct st_context *st, struct pipe_shader_state vs; const struct cso_vertex_shader *cso; GLuint attr, i; + GLuint num_generic = 0; memset(&vs, 0, sizeof(vs)); @@ -117,7 +119,7 @@ st_translate_vertex_program(struct st_context *st, case VERT_ATTRIB_TEX6: case VERT_ATTRIB_TEX7: vs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - vs.input_semantic_index[slot] = attr - VERT_ATTRIB_TEX0; + vs.input_semantic_index[slot] = num_generic++; break; case VERT_ATTRIB_GENERIC0: case VERT_ATTRIB_GENERIC1: @@ -129,7 +131,7 @@ st_translate_vertex_program(struct st_context *st, case VERT_ATTRIB_GENERIC7: assert(attr < VERT_ATTRIB_MAX); vs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - vs.input_semantic_index[slot] = attr - VERT_ATTRIB_GENERIC0; + vs.input_semantic_index[slot] = num_generic++; break; default: assert(0); @@ -143,6 +145,7 @@ st_translate_vertex_program(struct st_context *st, vs.output_semantic_index[i] = 0; } + num_generic = 0; /* * Determine number of outputs, the (default) output register * mapping and the semantic information for each output. @@ -207,14 +210,14 @@ st_translate_vertex_program(struct st_context *st, case VERT_RESULT_TEX6: case VERT_RESULT_TEX7: vs.output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - vs.output_semantic_index[slot] = attr - VERT_RESULT_TEX0; + vs.output_semantic_index[slot] = num_generic++; break; case VERT_RESULT_VAR0: /* fall-through */ default: assert(attr - VERT_RESULT_VAR0 < MAX_VARYING); vs.output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - vs.output_semantic_index[slot] = attr - VERT_RESULT_VAR0; + vs.output_semantic_index[slot] = num_generic++; } } } @@ -258,6 +261,9 @@ st_translate_vertex_program(struct st_context *st, cso = st_cached_vs_state(st, &vs); stvp->cso = cso; + if (0) + _mesa_print_program(&stvp->Base.Base); + if (TGSI_DEBUG) tgsi_dump( tokensOut, 0 ); } @@ -286,6 +292,7 @@ st_translate_fragment_program(struct st_context *st, GLuint attr; const GLbitfield inputsRead = stfp->Base.Base.InputsRead; GLuint vslot = 0; + GLuint num_generic = 0; memset(&fs, 0, sizeof(fs)); @@ -338,14 +345,14 @@ st_translate_fragment_program(struct st_context *st, case FRAG_ATTRIB_TEX6: case FRAG_ATTRIB_TEX7: fs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - fs.input_semantic_index[slot] = attr - FRAG_ATTRIB_TEX0; + fs.input_semantic_index[slot] = num_generic++; interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; break; case FRAG_ATTRIB_VAR0: /* fall-through */ default: fs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - fs.input_semantic_index[slot] = attr - FRAG_ATTRIB_VAR0; + fs.input_semantic_index[slot] = num_generic++; interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; } } @@ -415,6 +422,9 @@ st_translate_fragment_program(struct st_context *st, cso = st_cached_fs_state(st, &fs); stfp->fs = cso; + if (0) + _mesa_print_program(&stfp->Base.Base); + if (TGSI_DEBUG) tgsi_dump( tokensOut, 0/*TGSI_DUMP_VERBOSE*/ ); diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 15cc458be8..844a9f80d8 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -59,6 +59,10 @@ target_to_target(GLenum target) } #endif + +/** + * Allocate a new pipe_texture object + */ struct pipe_texture * st_texture_create(struct st_context *st, enum pipe_texture_target target, @@ -70,7 +74,7 @@ st_texture_create(struct st_context *st, GLuint depth0, GLuint compress_byte) { - struct pipe_texture *pt = CALLOC_STRUCT(pipe_texture); + struct pipe_texture pt; assert(target <= PIPE_TEXTURE_CUBE); @@ -78,39 +82,33 @@ st_texture_create(struct st_context *st, _mesa_lookup_enum_by_nr(target), _mesa_lookup_enum_by_nr(format), first_level, last_level); - if (!pt) - return NULL; - assert(format); - pt->target = target; - pt->format = format; - pt->first_level = first_level; - pt->last_level = last_level; - pt->width[0] = width0; - pt->height[0] = height0; - pt->depth[0] = depth0; - pt->compressed = compress_byte ? 1 : 0; - pt->cpp = pt->compressed ? compress_byte : st_sizeof_format(format); - pt->refcount = 1; - - st->pipe->texture_create(st->pipe, &pt); - - return pt; + pt.target = target; + pt.format = format; + pt.first_level = first_level; + pt.last_level = last_level; + pt.width[0] = width0; + pt.height[0] = height0; + pt.depth[0] = depth0; + pt.compressed = compress_byte ? 1 : 0; + pt.cpp = pt.compressed ? compress_byte : st_sizeof_format(format); + pt.refcount = 1; + + return st->pipe->texture_create(st->pipe, &pt); } - - -/* Can the image be pulled into a unified mipmap texture. This mirrors - * the completeness test in a lot of ways. +/** + * Check if a texture image be pulled into a unified mipmap texture. + * This mirrors the completeness test in a lot of ways. * * Not sure whether I want to pass gl_texture_image here. */ GLboolean -st_texture_match_image(struct pipe_texture *pt, - struct gl_texture_image *image, - GLuint face, GLuint level) +st_texture_match_image(const struct pipe_texture *pt, + const struct gl_texture_image *image, + GLuint face, GLuint level) { /* Images with borders are never pulled into mipmap textures. */ @@ -189,6 +187,7 @@ st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, return pipe_surface_map(stImage->surface); } + void st_texture_image_unmap(struct st_texture_image *stImage) { @@ -201,7 +200,8 @@ st_texture_image_unmap(struct st_texture_image *stImage) -/* Upload data to a rectangular sub-region. Lots of choices how to do this: +/** + * Upload data to a rectangular sub-region. Lots of choices how to do this: * * - memcpy by span to current destination * - upload data as new buffer and blit @@ -261,13 +261,14 @@ st_texture_image_data(struct pipe_context *pipe, } } + /* Copy mipmap image between textures */ void st_texture_image_copy(struct pipe_context *pipe, - struct pipe_texture *dst, - GLuint face, GLuint level, - struct pipe_texture *src) + struct pipe_texture *dst, + GLuint face, GLuint level, + struct pipe_texture *src) { GLuint width = src->width[level]; GLuint height = src->height[level]; @@ -278,6 +279,7 @@ st_texture_image_copy(struct pipe_context *pipe, if (dst->compressed) height /= 4; + for (i = 0; i < depth; i++) { dst_surface = pipe->get_tex_surface(pipe, dst, face, level, i); src_surface = pipe->get_tex_surface(pipe, src, face, level, i); @@ -292,5 +294,4 @@ st_texture_image_copy(struct pipe_context *pipe, pipe_surface_reference(&dst_surface, NULL); pipe_surface_reference(&src_surface, NULL); } - } diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h index d8b1bcad9d..0b87a494c3 100644 --- a/src/mesa/state_tracker/st_texture.h +++ b/src/mesa/state_tracker/st_texture.h @@ -47,11 +47,11 @@ st_texture_create(struct st_context *st, GLuint compress_byte); -/* Check if an image fits an existing texture +/* Check if an image fits into an existing texture object. */ extern GLboolean -st_texture_match_image(struct pipe_texture *pt, - struct gl_texture_image *image, +st_texture_match_image(const struct pipe_texture *pt, + const struct gl_texture_image *image, GLuint face, GLuint level); /* Return a pointer to an image within a texture. Return image stride as @@ -73,7 +73,7 @@ extern const GLuint * st_texture_depth_offsets(struct pipe_texture *pt, GLuint level); -/* Return the linear offset of an image relative to the start of its region: +/* Return the linear offset of an image relative to the start of its region. */ extern GLuint st_texture_image_offset(const struct pipe_texture *pt, diff --git a/src/mesa/x86/rtasm/x86sse.c b/src/mesa/x86/rtasm/x86sse.c index 56c211eee0..f8da6e405f 100644 --- a/src/mesa/x86/rtasm/x86sse.c +++ b/src/mesa/x86/rtasm/x86sse.c @@ -1137,6 +1137,7 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size ) void x86_release_func( struct x86_function *p ) { _mesa_exec_free(p->store); + p->store = NULL; } |