From 9e95fad02e1d8690deba91a8f30d722f039366ff Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 4 Jun 2008 15:44:38 +0800 Subject: i915: Fix GL_DEPTH_TEXTURE_MODE issue. (bug #16221) --- src/mesa/drivers/dri/i915/i915_texstate.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index 987c6c0454..af7166e681 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -65,7 +65,12 @@ translate_texture_format(GLuint mesa_format, GLenum DepthMode) case MESA_FORMAT_RGBA_FXT1: return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1); case MESA_FORMAT_Z16: - return (MAPSURF_16BIT | (DepthMode==GL_ALPHA?MT_16BIT_A16:MT_16BIT_L16)); + if (DepthMode == GL_ALPHA) + return (MAPSURF_16BIT | MT_16BIT_A16); + else if (DepthMode == GL_INTENSITY) + return (MAPSURF_16BIT | MT_16BIT_I16); + else + return (MAPSURF_16BIT | MT_16BIT_L16); case MESA_FORMAT_RGBA_DXT1: case MESA_FORMAT_RGB_DXT1: return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1); -- cgit v1.2.3 From 028fd5594cd6becfdfc0820b1083729c6033e363 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 4 Jun 2008 14:41:33 -0600 Subject: Set the attribute as used. cherry-picked from gallium-0.1 --- src/mesa/shader/slang/slang_link.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/shader/slang/slang_link.c b/src/mesa/shader/slang/slang_link.c index 80cd4b6df6..dd4990ba02 100644 --- a/src/mesa/shader/slang/slang_link.c +++ b/src/mesa/shader/slang/slang_link.c @@ -262,6 +262,9 @@ _slang_resolve_attributes(struct gl_shader_program *shProg, return GL_FALSE; } _mesa_add_attribute(shProg->Attributes, name, size, attr); + + /* set the attribute as used */ + usedAttributes |= 1<SrcReg[j].Index = VERT_ATTRIB_GENERIC0 + attr; -- cgit v1.2.3 From 91b491debea274cc4dfc12130624935d4ebd9728 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 5 Jun 2008 15:23:46 -0600 Subject: insert mesa/ before include files --- src/mesa/drivers/osmesa/osmesa.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/osmesa/osmesa.c b/src/mesa/drivers/osmesa/osmesa.c index 29a6c1f4aa..f2367bbbb7 100644 --- a/src/mesa/drivers/osmesa/osmesa.c +++ b/src/mesa/drivers/osmesa/osmesa.c @@ -33,14 +33,14 @@ */ -#include "glheader.h" +#include "main/glheader.h" #include "GL/osmesa.h" -#include "context.h" -#include "extensions.h" -#include "framebuffer.h" -#include "imports.h" -#include "mtypes.h" -#include "renderbuffer.h" +#include "main/context.h" +#include "main/extensions.h" +#include "main/framebuffer.h" +#include "main/imports.h" +#include "main/mtypes.h" +#include "main/renderbuffer.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "swrast/s_context.h" -- cgit v1.2.3 From 18906b7904842e8a43257b179335c600a40fe142 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 5 Jun 2008 15:36:07 -0600 Subject: Move construction of libOSMesa.so into src/mesa/drivers/osmesa/Makefile This removes some cruft from src/mesa/Makefile. Something similar could be done for stand-alone / Xlib-Mesa libGL... --- src/mesa/Makefile | 37 ++++++-------------- src/mesa/drivers/osmesa/Makefile | 74 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 27 deletions(-) create mode 100644 src/mesa/drivers/osmesa/Makefile (limited to 'src/mesa') diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 08d723553e..53b1caaab1 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -68,6 +68,11 @@ libmesa.a: $(SOLO_OBJECTS) mimeset -f "$@" ; \ fi +# Make archive of gl* API dispatcher functions only +libglapi.a: $(GLAPI_OBJECTS) + @ $(TOP)/bin/mklib -o glapi -static $(GLAPI_OBJECTS) + + linux-solo: depend subdirs libmesa.a cd drivers/dri && $(MAKE) @@ -97,7 +102,7 @@ fbdev: $(CORE_OBJECTS) $(FBDEV_DRIVER_OBJECTS) $(COMMON_DRIVER_OBJECTS) ###################################################################### -# Stand-alone Mesa libGL and libOSMesa +# Stand-alone Mesa libGL STAND_ALONE_DRIVER_SOURCES = \ $(COMMON_DRIVER_SOURCES) \ $(X11_DRIVER_SOURCES) @@ -108,19 +113,13 @@ STAND_ALONE_OBJECTS = \ $(CORE_OBJECTS) \ $(STAND_ALONE_DRIVER_OBJECTS) -# For libOSMesa16 or libOSMesa32 we link _all_ the objects into the library, -# not just the osmesa.o object (i.e. we don't have a libGL). -OSMESA16_OBJECTS = \ - $(CORE_OBJECTS) \ - $(COMMON_DRIVER_OBJECTS) \ - $(OSMESA_DRIVER_OBJECTS) - stand-alone: depend subdirs libmesa.a \ $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) -osmesa-only: depend subdirs \ - $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME) +osmesa-only: depend subdirs libmesa.a libglapi.a + cd drivers/osmesa && $(MAKE) + # Make the GL library $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) @@ -129,22 +128,6 @@ $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) -install $(TOP)/$(LIB_DIR) \ $(MKLIB_OPTIONS) $(GL_LIB_DEPS) $(STAND_ALONE_OBJECTS) -# Make the OSMesa library -$(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME): $(OSMESA_DRIVER_OBJECTS) $(OSMESA16_OBJECTS) - @ if [ "${DRIVER_DIRS}" = "osmesa" ] ; then \ - $(TOP)/bin/mklib -o $(OSMESA_LIB) -linker '$(CC)' \ - -ldflags '$(LDFLAGS)' -major $(MESA_MAJOR) \ - -minor $(MESA_MINOR) -patch $(MESA_TINY) \ - -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ - $(OSMESA_LIB_DEPS) $(OSMESA16_OBJECTS) ; \ - else \ - $(TOP)/bin/mklib -o $(OSMESA_LIB) -linker '$(CC)' \ - -ldflags '$(LDFLAGS)' -major $(MESA_MAJOR) \ - -minor $(MESA_MINOR) -patch $(GL_TINY) \ - -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ - $(OSMESA_LIB_DEPS) $(OSMESA_DRIVER_OBJECTS) ; \ - fi - ###################################################################### # Generic stuff @@ -205,7 +188,7 @@ tags: clean: -rm -f */*.o -rm -f */*/*.o - -rm -f depend depend.bak libmesa.a + -rm -f depend depend.bak libmesa.a libglapi.a -rm -f drivers/*/*.o -@cd drivers/dri && $(MAKE) clean -@cd drivers/xorg && $(MAKE) clean diff --git a/src/mesa/drivers/osmesa/Makefile b/src/mesa/drivers/osmesa/Makefile new file mode 100644 index 0000000000..fa8dffcb3e --- /dev/null +++ b/src/mesa/drivers/osmesa/Makefile @@ -0,0 +1,74 @@ +# src/mesa/drivers/osmesa/Makefile for libOSMesa.so + +# Note that we may generate libOSMesa.so or libOSMesa16.so or libOSMesa32.so +# with this Makefile + + +TOP = ../../../.. + +include $(TOP)/configs/current + + + +SOURCES = osmesa.c + +OBJECTS = $(SOURCES:.c=.o) + +INCLUDE_DIRS = \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main + +CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mesa/libglapi.a + + +.PHONY: osmesa8 +.PHONY: osmesa16 + + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + + +default: +# $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME) + @ if [ "${DRIVER_DIRS}" = "osmesa" ] ; then \ + $(MAKE) osmesa16 ; \ + else \ + $(MAKE) osmesa8 ; \ + fi + + + + +# The normal libOSMesa is used in conjuction with libGL +osmesa8: $(TOP)/lib/$(OSMESA_LIB_NAME) + +$(TOP)/lib/$(OSMESA_LIB_NAME): $(OBJECTS) + $(TOP)/bin/mklib -o $(OSMESA_LIB) \ + -linker "$(CC)" \ + -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ + -install $(TOP)/$(LIB_DIR) \ + $(MKLIB_OPTIONS) \ + $(OSMESA_LIB_DEPS) $(OBJECTS) + + + + +# The libOSMesa16/libOSMesa32 libraries do not use libGL but rather are built +# with all the other Mesa sources (compiled with -DCHAN_BITS=16/32 +osmesa16: $(OBJECTS) $(CORE_MESA) + $(TOP)/bin/mklib -o $(OSMESA_LIB) \ + -linker "$(CC)" \ + -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ + -install $(TOP)/$(LIB_DIR) \ + $(MKLIB_OPTIONS) \ + $(OSMESA_LIB_DEPS) $(OBJECTS) $(CORE_MESA) + + + +clean: + -rm -f *.o *~ + + +# XXX todo install rule? -- cgit v1.2.3 From bf5e573926b2b007cb238afae86207e075603f1e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 5 Jun 2008 16:05:02 -0600 Subject: prefix some includes with mesa/ or glapi/ --- src/mesa/drivers/x11/glxapi.c | 4 ++-- src/mesa/drivers/x11/xm_api.c | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/x11/glxapi.c b/src/mesa/drivers/x11/glxapi.c index 309a0008d7..c2ccce6f52 100644 --- a/src/mesa/drivers/x11/glxapi.c +++ b/src/mesa/drivers/x11/glxapi.c @@ -34,8 +34,8 @@ #include #include #include -#include "glheader.h" -#include "glapi.h" +#include "main/glheader.h" +#include "glapi/glapi.h" #include "glxapi.h" diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index 8941817531..5bae69a472 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -65,14 +65,14 @@ #include "glxheader.h" #include "GL/xmesa.h" #include "xmesaP.h" -#include "context.h" -#include "extensions.h" -#include "framebuffer.h" -#include "glthread.h" -#include "imports.h" -#include "macros.h" -#include "renderbuffer.h" -#include "teximage.h" +#include "main/context.h" +#include "main/extensions.h" +#include "main/framebuffer.h" +#include "glapi/glthread.h" +#include "main/imports.h" +#include "main/macros.h" +#include "main/renderbuffer.h" +#include "main/teximage.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" #include "vbo/vbo.h" -- cgit v1.2.3 From ea7eb9a374bec4160b07b3c2315c00d9416daf7f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 5 Jun 2008 16:06:17 -0600 Subject: new Makefile for building "stand-alone", Xlib-based libGL --- src/mesa/drivers/x11/Makefile | 59 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 src/mesa/drivers/x11/Makefile (limited to 'src/mesa') diff --git a/src/mesa/drivers/x11/Makefile b/src/mesa/drivers/x11/Makefile new file mode 100644 index 0000000000..3c7219af26 --- /dev/null +++ b/src/mesa/drivers/x11/Makefile @@ -0,0 +1,59 @@ +# src/mesa/drivers/x11/Makefile for libGL.so + +# This builds "stand-alone" Mesa, a version of libGL that does not need the +# GLX extension. All rendering is converted to Xlib calls. No hardware +# acceleration. + + +TOP = ../../../.. + +include $(TOP)/configs/current + + +GL_MAJOR = 1 +GL_MINOR = 5 +GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) + + +SOURCES = \ + fakeglx.c \ + glxapi.c \ + realglx.c \ + xfonts.c \ + xm_api.c \ + xm_buffer.c \ + xm_dd.c \ + xm_glide.c \ + xm_image.c \ + xm_line.c \ + xm_span.c \ + xm_tri.c + +OBJECTS = $(SOURCES:.c=.o) + +INCLUDE_DIRS = \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main + +CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mesa/libglapi.a + + + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + + +default: $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) + + +$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(OBJECTS) $(CORE_MESA) + @ $(TOP)/bin/mklib -o $(GL_LIB) \ + -linker '$(CC)' -ldflags '$(LDFLAGS)' \ + -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ + -install $(TOP)/$(LIB_DIR) \ + $(MKLIB_OPTIONS) $(GL_LIB_DEPS) $(OBJECTS) + + +clean: + -rm -f *.o *~ -- cgit v1.2.3 From 2b84b22bad33e14227abcfab53f93100efac976f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 5 Jun 2008 16:08:42 -0600 Subject: Move building of stand-alone Mesa into drivers/x11/Makefile Also, some re-org, renaming, and general clean-up. We're just a few steps away from removing the all the special case rules for building drivers. --- src/mesa/Makefile | 70 ++++++++++++++++++++++++------------------------------- 1 file changed, 31 insertions(+), 39 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 53b1caaab1..c4d151822a 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -27,12 +27,12 @@ GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) default: depend @for driver in $(DRIVER_DIRS) ; do \ case "$$driver" in \ - x11) $(MAKE) stand-alone || exit 1 ;; \ - dri) $(MAKE) linux-solo || exit 1 ;; \ - osmesa) $(MAKE) osmesa-only || exit 1 ;; \ - beos) $(MAKE) beos || exit 1 ;; \ - directfb) $(MAKE) directfb || exit 1 ;; \ - fbdev) $(MAKE) fbdev || exit 1 ;; \ + x11) $(MAKE) xlib-driver || exit 1 ;; \ + dri) $(MAKE) dri-drivers || exit 1 ;; \ + osmesa) $(MAKE) osmesa-driver || exit 1 ;; \ + beos) $(MAKE) beos-driver || exit 1 ;; \ + directfb) $(MAKE) directfb-driver || exit 1 ;; \ + fbdev) $(MAKE) fbdev-driver || exit 1 ;; \ *) echo "$$driver is invalid in DRIVER_DIRS" >&2; exit 1;; \ esac ; \ done @@ -52,16 +52,9 @@ install: default ###################################################################### -# BeOS driver target - -beos: depend subdirs libmesa.a - cd drivers/beos && $(MAKE) +# Helper libraries used by many drivers: - -###################################################################### -# Linux DRI drivers - -# Make archive of core object files +# Make archive of core mesa object files libmesa.a: $(SOLO_OBJECTS) @ $(TOP)/bin/mklib -o mesa -static $(SOLO_OBJECTS); @if [ "${CONFIG_NAME}" = "beos" ] ; then \ @@ -73,27 +66,38 @@ libglapi.a: $(GLAPI_OBJECTS) @ $(TOP)/bin/mklib -o glapi -static $(GLAPI_OBJECTS) -linux-solo: depend subdirs libmesa.a + +###################################################################### +# BeOS driver target + +beos-driver: depend subdirs libmesa.a + cd drivers/beos && $(MAKE) + + +###################################################################### +# DRI drivers + +dri-drivers: depend subdirs libmesa.a cd drivers/dri && $(MAKE) ##################################################################### -# Stand-alone Mesa libGL, no built-in drivers (DirectFB) +# DirectFB driver (libGL.so) -libgl-core: $(CORE_OBJECTS) +directfb-libgl: $(CORE_OBJECTS) @ $(TOP)/bin/mklib -o $(GL_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) $(CORE_OBJECTS) \ $(GL_LIB_DEPS) -directfb: depend subdirs libgl-core +directfb-driver: depend subdirs directfb-libgl cd drivers/directfb && $(MAKE) ##################################################################### -# fbdev Mesa driver (libGL.so) +# fbdev driver (libGL.so) -fbdev: $(CORE_OBJECTS) $(FBDEV_DRIVER_OBJECTS) $(COMMON_DRIVER_OBJECTS) +fbdev-driver: $(CORE_OBJECTS) $(FBDEV_DRIVER_OBJECTS) $(COMMON_DRIVER_OBJECTS) @ $(TOP)/bin/mklib -o $(GL_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ @@ -102,31 +106,19 @@ fbdev: $(CORE_OBJECTS) $(FBDEV_DRIVER_OBJECTS) $(COMMON_DRIVER_OBJECTS) ###################################################################### -# Stand-alone Mesa libGL -STAND_ALONE_DRIVER_SOURCES = \ - $(COMMON_DRIVER_SOURCES) \ - $(X11_DRIVER_SOURCES) - -STAND_ALONE_DRIVER_OBJECTS = $(STAND_ALONE_DRIVER_SOURCES:.c=.o) +# Xlib driver (libGL.so) -STAND_ALONE_OBJECTS = \ - $(CORE_OBJECTS) \ - $(STAND_ALONE_DRIVER_OBJECTS) +xlib-driver: depend subdirs libmesa.a libglapi.a + cd drivers/x11 && $(MAKE) -stand-alone: depend subdirs libmesa.a \ - $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) +###################################################################### +# osmesa driver (libOSMesa.so) -osmesa-only: depend subdirs libmesa.a libglapi.a +osmesa-driver: depend subdirs libmesa.a libglapi.a cd drivers/osmesa && $(MAKE) -# Make the GL library -$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) - @ $(TOP)/bin/mklib -o $(GL_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ - -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ - -install $(TOP)/$(LIB_DIR) \ - $(MKLIB_OPTIONS) $(GL_LIB_DEPS) $(STAND_ALONE_OBJECTS) ###################################################################### -- cgit v1.2.3 From e89e94e01a40bbaf9e77055c6cddddba0ddbc188 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 5 Jun 2008 16:26:15 -0600 Subject: added dependencies --- src/mesa/drivers/x11/Makefile | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/x11/Makefile b/src/mesa/drivers/x11/Makefile index 3c7219af26..8ee9b61808 100644 --- a/src/mesa/drivers/x11/Makefile +++ b/src/mesa/drivers/x11/Makefile @@ -15,6 +15,15 @@ GL_MINOR = 5 GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) +HEADERS = \ + glxapi.h \ + glxheader.h \ + realglx.h \ + xfonts.h \ + xmesaP.h \ + xm_glide.h \ + xm_image.h + SOURCES = \ fakeglx.c \ glxapi.c \ @@ -55,5 +64,18 @@ $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(OBJECTS) $(CORE_MESA) $(MKLIB_OPTIONS) $(GL_LIB_DEPS) $(OBJECTS) + clean: -rm -f *.o *~ + -rm -f depend depend.bak + + + +depend: $(SOURCES) $(HEADERS) + @ echo "running $(MKDEP)" + @ touch depend + @$(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(SOURCES) $(HEADERS) \ + > /dev/null 2>/dev/null + + +-include depend -- cgit v1.2.3 From 4a68b62b86249307ade3ca5a9ca531e283ab7dcb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 5 Jun 2008 16:27:43 -0600 Subject: remove X11, Glide, OSMesa sources --- src/mesa/sources | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/sources b/src/mesa/sources index 054f667a25..477f76bb3d 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -241,33 +241,6 @@ SPARC_API = \ COMMON_DRIVER_SOURCES = \ drivers/common/driverfuncs.c -X11_DRIVER_SOURCES = \ - drivers/x11/glxapi.c \ - drivers/x11/fakeglx.c \ - drivers/x11/xfonts.c \ - drivers/x11/xm_api.c \ - drivers/x11/xm_buffer.c \ - drivers/x11/xm_dd.c \ - drivers/x11/xm_glide.c \ - drivers/x11/xm_line.c \ - drivers/x11/xm_span.c \ - drivers/x11/xm_tri.c - -OSMESA_DRIVER_SOURCES = \ - drivers/osmesa/osmesa.c - -GLIDE_DRIVER_SOURCES = \ - drivers/glide/fxapi.c \ - drivers/glide/fxdd.c \ - drivers/glide/fxddspan.c \ - drivers/glide/fxddtex.c \ - drivers/glide/fxsetup.c \ - drivers/glide/fxtexman.c \ - drivers/glide/fxtris.c \ - drivers/glide/fxvb.c \ - drivers/glide/fxglidew.c \ - drivers/glide/fxg.c - SVGA_DRIVER_SOURCES = \ drivers/svga/svgamesa.c \ drivers/svga/svgamesa8.c \ @@ -287,9 +260,7 @@ ALL_SOURCES = \ $(SOLO_SOURCES) \ $(ASM_SOURCES) \ $(COMMON_DRIVER_SOURCES)\ - $(X11_DRIVER_SOURCES) \ - $(FBDEV_DRIVER_SOURCES) \ - $(OSMESA_DRIVER_SOURCES) + $(FBDEV_DRIVER_SOURCES) SOLO_SOURCES = \ $(MAIN_SOURCES) \ @@ -319,8 +290,6 @@ GLAPI_OBJECTS = \ CORE_OBJECTS = $(SOLO_OBJECTS) $(GLAPI_OBJECTS) -OSMESA_DRIVER_OBJECTS = $(OSMESA_DRIVER_SOURCES:.c=.o) - COMMON_DRIVER_OBJECTS = $(COMMON_DRIVER_SOURCES:.c=.o) FBDEV_DRIVER_OBJECTS = $(FBDEV_DRIVER_SOURCES:.c=.o) -- cgit v1.2.3 From b55694512a1a762ddbe171775cfe2639a12f5606 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 5 Jun 2008 16:28:00 -0600 Subject: reorder some lines, more make clean cmds --- src/mesa/Makefile | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/Makefile b/src/mesa/Makefile index c4d151822a..defc96bfea 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -81,6 +81,20 @@ dri-drivers: depend subdirs libmesa.a cd drivers/dri && $(MAKE) +###################################################################### +# Xlib driver (libGL.so) + +xlib-driver: depend subdirs libmesa.a libglapi.a + cd drivers/x11 && $(MAKE) + + +###################################################################### +# osmesa driver (libOSMesa.so) + +osmesa-driver: depend subdirs libmesa.a libglapi.a + cd drivers/osmesa && $(MAKE) + + ##################################################################### # DirectFB driver (libGL.so) @@ -105,21 +119,6 @@ fbdev-driver: $(CORE_OBJECTS) $(FBDEV_DRIVER_OBJECTS) $(COMMON_DRIVER_OBJECTS) $(COMMON_DRIVER_OBJECTS) $(GL_LIB_DEPS) -###################################################################### -# Xlib driver (libGL.so) - -xlib-driver: depend subdirs libmesa.a libglapi.a - cd drivers/x11 && $(MAKE) - - -###################################################################### -# osmesa driver (libOSMesa.so) - -osmesa-driver: depend subdirs libmesa.a libglapi.a - cd drivers/osmesa && $(MAKE) - - - ###################################################################### # Generic stuff @@ -184,6 +183,8 @@ clean: -rm -f drivers/*/*.o -@cd drivers/dri && $(MAKE) clean -@cd drivers/xorg && $(MAKE) clean + -@cd drivers/x11 && $(MAKE) clean + -@cd drivers/osmesa && $(MAKE) clean -@cd x86 && $(MAKE) clean -@cd x86-64 && $(MAKE) clean -- cgit v1.2.3 From f8563ddddb9edf056c3c0a5f676f93620c4ce37d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 5 Jun 2008 16:44:11 -0600 Subject: Move fbdev driver construction into src/mesa/drivers/fbdev/Makefile --- src/mesa/Makefile | 19 +++++++------------ src/mesa/drivers/fbdev/Makefile | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 12 deletions(-) create mode 100644 src/mesa/drivers/fbdev/Makefile (limited to 'src/mesa') diff --git a/src/mesa/Makefile b/src/mesa/Makefile index defc96bfea..b6485f39d3 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -95,6 +95,13 @@ osmesa-driver: depend subdirs libmesa.a libglapi.a cd drivers/osmesa && $(MAKE) +##################################################################### +# fbdev driver (libGL.so) + +fbdev-driver: depend subdirs libmesa.a libglapi.a + cd drivers/fbdev && $(MAKE) + + ##################################################################### # DirectFB driver (libGL.so) @@ -108,18 +115,6 @@ directfb-driver: depend subdirs directfb-libgl cd drivers/directfb && $(MAKE) -##################################################################### -# fbdev driver (libGL.so) - -fbdev-driver: $(CORE_OBJECTS) $(FBDEV_DRIVER_OBJECTS) $(COMMON_DRIVER_OBJECTS) - @ $(TOP)/bin/mklib -o $(GL_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ - -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ - -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ - $(CORE_OBJECTS) $(FBDEV_DRIVER_OBJECTS) \ - $(COMMON_DRIVER_OBJECTS) $(GL_LIB_DEPS) - - - ###################################################################### # Generic stuff diff --git a/src/mesa/drivers/fbdev/Makefile b/src/mesa/drivers/fbdev/Makefile new file mode 100644 index 0000000000..ac43dfdb67 --- /dev/null +++ b/src/mesa/drivers/fbdev/Makefile @@ -0,0 +1,36 @@ +# src/mesa/drivers/fbdev/Makefile for libGL.so + +TOP = ../../../.. + +include $(TOP)/configs/current + + +SOURCES = glfbdev.c + +OBJECTS = $(SOURCES:.c=.o) + +INCLUDE_DIRS = \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main + +CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mesa/libglapi.a + + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + + +default: $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) + + +$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(CORE_OBJECTS) $(OBJECTS) + @ $(TOP)/bin/mklib -o $(GL_LIB) \ + -linker '$(CC)' -ldflags '$(LDFLAGS)' \ + -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ + -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ + $(CORE_MESA) $(OBJECTS) $(GL_LIB_DEPS) + + +clean: + -rm -f $(OBJECTS) -- cgit v1.2.3 From c9739b20555847c398e7b52fb0e580d86f958430 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 6 Jun 2008 12:09:01 -0600 Subject: move directfb-libgl into drivers/directfb/Makefile --- src/mesa/Makefile | 6 ------ src/mesa/drivers/directfb/Makefile | 15 ++++++++++++++- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/Makefile b/src/mesa/Makefile index b6485f39d3..d5866bb9f0 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -105,12 +105,6 @@ fbdev-driver: depend subdirs libmesa.a libglapi.a ##################################################################### # DirectFB driver (libGL.so) -directfb-libgl: $(CORE_OBJECTS) - @ $(TOP)/bin/mklib -o $(GL_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ - -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ - -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) $(CORE_OBJECTS) \ - $(GL_LIB_DEPS) - directfb-driver: depend subdirs directfb-libgl cd drivers/directfb && $(MAKE) diff --git a/src/mesa/drivers/directfb/Makefile b/src/mesa/drivers/directfb/Makefile index dc71b385ca..945eb8b3d9 100644 --- a/src/mesa/drivers/directfb/Makefile +++ b/src/mesa/drivers/directfb/Makefile @@ -25,11 +25,24 @@ DIRECTFBGL_MESA_OBJECTS = $(DIRECTFBGL_MESA_SOURCES:.c=.o) DIRECTFBGL_MESA = libidirectfbgl_mesa.so +LIBS = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mesa/libglapi.a + + .c.o: $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(DFB_CFLAGS) $< -o $@ -default: directfbgl_mesa +default: directfb-libgl directfbgl_mesa + + +# XXX this used to be in src/mesa/Makefile and is probably broken now +directfb-libgl: $(CORE_OBJECTS) + @ $(TOP)/bin/mklib -o $(GL_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ + -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ + -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) $(CORE_OBJECTS) \ + $(GL_LIB_DEPS) + + # Mesa DirectFBGL module directfbgl_mesa: $(DIRECTFBGL_MESA_OBJECTS) -- cgit v1.2.3 From 04c6ca99dd194aac085c81044f876c17cd27baee Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 6 Jun 2008 12:09:55 -0600 Subject: new drivers/Makefile with simple default/clean/install rules --- src/mesa/drivers/Makefile | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 src/mesa/drivers/Makefile (limited to 'src/mesa') diff --git a/src/mesa/drivers/Makefile b/src/mesa/drivers/Makefile new file mode 100644 index 0000000000..c5998413e8 --- /dev/null +++ b/src/mesa/drivers/Makefile @@ -0,0 +1,29 @@ +# src/mesa/drivers/Makefile + +TOP = ../../.. +include $(TOP)/configs/current + + +default: + @for dir in $(DRIVER_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1; \ + fi \ + done + + +clean: + @for dir in $(DRIVER_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) clean) || exit 1; \ + fi \ + done + + +install: + @for dir in $(DRIVER_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) install) || exit 1; \ + fi \ + done + -- cgit v1.2.3 From 358dcd71783beb69c4e3923138cf92c27b871159 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 6 Jun 2008 12:12:59 -0600 Subject: remove more old stuff, insert new driver_subdir build rules (but not enabled yet) --- src/mesa/Makefile | 50 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 18 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/Makefile b/src/mesa/Makefile index d5866bb9f0..726bb4b9d0 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -6,11 +6,6 @@ include $(TOP)/configs/current include sources -GL_MAJOR = 1 -GL_MINOR = 5 -GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) - - .SUFFIXES : .cpp .c.o: @@ -37,6 +32,7 @@ default: depend esac ; \ done +# XXX replace this with new_install above someday install: default @for driver in $(DRIVER_DIRS) ; do \ case "$$driver" in \ @@ -51,6 +47,21 @@ install: default done + +# default: build dependencies, then mesa subdirs, then convenience +# libs (.a) and finally the device drivers: +new_default: depend subdirs libmesa.a libglapi.a driver_subdirs + + +driver_subdirs: + (cd drivers && $(MAKE)) + + +# this doesn't work yet but is probably the way to go in the future +new_install: + (cd drivers && $(MAKE) install) + + ###################################################################### # Helper libraries used by many drivers: @@ -66,7 +77,6 @@ libglapi.a: $(GLAPI_OBJECTS) @ $(TOP)/bin/mklib -o glapi -static $(GLAPI_OBJECTS) - ###################################################################### # BeOS driver target @@ -110,15 +120,7 @@ directfb-driver: depend subdirs directfb-libgl ###################################################################### -# Generic stuff - -depend: $(ALL_SOURCES) - @ echo "running $(MKDEP)" - @ touch depend - @$(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(ALL_SOURCES) \ - > /dev/null 2>/dev/null - - +# Assembly subdirs subdirs: @ if echo "$(ASM_FLAGS)" | grep -q USE_X86_ASM ; then \ (cd x86 && $(MAKE)) || exit 1 ; \ @@ -128,6 +130,20 @@ subdirs: (cd x86-64 && $(MAKE)) || exit 1 ; \ fi + +###################################################################### +# Dependency generation + +depend: $(ALL_SOURCES) + @ echo "running $(MKDEP)" + @ touch depend + @$(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(ALL_SOURCES) \ + > /dev/null 2>/dev/null + + +###################################################################### +# Installation rules + pcedit = sed \ -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ -e 's,@LIB_DIR@,$(LIB_DIR),' \ @@ -156,15 +172,13 @@ install-osmesa: default install-dri: cd drivers/dri && $(MAKE) install -## NOT INSTALLED YET: -## $(INSTALL) -d $(DESTDIR)$(INSTALL_DIR)/include/GLES -## $(INSTALL) -m 644 include/GLES/*.h $(DESTDIR)$(INSTALL_DIR)/include/GLES # Emacs tags tags: etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h + clean: -rm -f */*.o -rm -f */*/*.o -- cgit v1.2.3 From dd750e0763d4a03a41d8a4ebde9489ad9c4aa82f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 6 Jun 2008 12:18:40 -0600 Subject: Remove the old, complicated default build rule; use the new driver_subdirs rule Also, all the old driver-specific rules are now gone. --- src/mesa/Makefile | 79 +++++++++---------------------------------------------- 1 file changed, 13 insertions(+), 66 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 726bb4b9d0..06b4e9cceb 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -18,19 +18,16 @@ include sources $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ -# Figure out what to make here -default: depend - @for driver in $(DRIVER_DIRS) ; do \ - case "$$driver" in \ - x11) $(MAKE) xlib-driver || exit 1 ;; \ - dri) $(MAKE) dri-drivers || exit 1 ;; \ - osmesa) $(MAKE) osmesa-driver || exit 1 ;; \ - beos) $(MAKE) beos-driver || exit 1 ;; \ - directfb) $(MAKE) directfb-driver || exit 1 ;; \ - fbdev) $(MAKE) fbdev-driver || exit 1 ;; \ - *) echo "$$driver is invalid in DRIVER_DIRS" >&2; exit 1;; \ - esac ; \ - done + +# Default: build dependencies, then mesa subdirs, then convenience +# libs (.a) and finally the device drivers: +default: depend subdirs libmesa.a libglapi.a driver_subdirs + + +# this doesn't work yet but is probably the way to go in the future +new_install: + (cd drivers && $(MAKE) install) + # XXX replace this with new_install above someday install: default @@ -48,20 +45,6 @@ install: default -# default: build dependencies, then mesa subdirs, then convenience -# libs (.a) and finally the device drivers: -new_default: depend subdirs libmesa.a libglapi.a driver_subdirs - - -driver_subdirs: - (cd drivers && $(MAKE)) - - -# this doesn't work yet but is probably the way to go in the future -new_install: - (cd drivers && $(MAKE) install) - - ###################################################################### # Helper libraries used by many drivers: @@ -78,45 +61,9 @@ libglapi.a: $(GLAPI_OBJECTS) ###################################################################### -# BeOS driver target - -beos-driver: depend subdirs libmesa.a - cd drivers/beos && $(MAKE) - - -###################################################################### -# DRI drivers - -dri-drivers: depend subdirs libmesa.a - cd drivers/dri && $(MAKE) - - -###################################################################### -# Xlib driver (libGL.so) - -xlib-driver: depend subdirs libmesa.a libglapi.a - cd drivers/x11 && $(MAKE) - - -###################################################################### -# osmesa driver (libOSMesa.so) - -osmesa-driver: depend subdirs libmesa.a libglapi.a - cd drivers/osmesa && $(MAKE) - - -##################################################################### -# fbdev driver (libGL.so) - -fbdev-driver: depend subdirs libmesa.a libglapi.a - cd drivers/fbdev && $(MAKE) - - -##################################################################### -# DirectFB driver (libGL.so) - -directfb-driver: depend subdirs directfb-libgl - cd drivers/directfb && $(MAKE) +# Device drivers +driver_subdirs: + (cd drivers && $(MAKE)) ###################################################################### -- cgit v1.2.3 From 847f54e2fa3e1b92ee97e7c2895a71ed20ce227b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 6 Jun 2008 12:24:44 -0600 Subject: move a beos-ism down into drivers/beos/Makefile --- src/mesa/Makefile | 3 --- src/mesa/drivers/beos/Makefile | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 06b4e9cceb..8c5e3c894e 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -51,9 +51,6 @@ install: default # Make archive of core mesa object files libmesa.a: $(SOLO_OBJECTS) @ $(TOP)/bin/mklib -o mesa -static $(SOLO_OBJECTS); - @if [ "${CONFIG_NAME}" = "beos" ] ; then \ - mimeset -f "$@" ; \ - fi # Make archive of gl* API dispatcher functions only libglapi.a: $(GLAPI_OBJECTS) diff --git a/src/mesa/drivers/beos/Makefile b/src/mesa/drivers/beos/Makefile index 0448650a8c..342d7ce024 100644 --- a/src/mesa/drivers/beos/Makefile +++ b/src/mesa/drivers/beos/Makefile @@ -169,8 +169,10 @@ OBJECTS := $(DRIVER_OBJECTS:.cpp=.o) default: depend $(TOP)/$(LIB_DIR) $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) +# XXX FIXME: mesa.a might be libmesa.a now $(MESA_MODULES): cd $(TOP)/src/mesa && $(MAKE) mesa.a ; + mimeset -f "$@" $(GLU_MODULES): cd $(GLU_DIR) && $(MAKE) $(subst $(GLU_DIR)/,,$(GLU_MODULES)) ; -- cgit v1.2.3 From bde6b57e48693ee87676f4ff1e0bef54b426eeb5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 6 Jun 2008 12:25:24 -0600 Subject: remove stray semicolon --- src/mesa/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 8c5e3c894e..5cdc7d5751 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -50,7 +50,7 @@ install: default # Make archive of core mesa object files libmesa.a: $(SOLO_OBJECTS) - @ $(TOP)/bin/mklib -o mesa -static $(SOLO_OBJECTS); + @ $(TOP)/bin/mklib -o mesa -static $(SOLO_OBJECTS) # Make archive of gl* API dispatcher functions only libglapi.a: $(GLAPI_OBJECTS) -- cgit v1.2.3 From 1d5b51abc8e9b3ba53a0854c27694309e740f256 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 6 Jun 2008 12:27:14 -0600 Subject: move the install rules --- src/mesa/Makefile | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 5cdc7d5751..061a7bf4f0 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -24,26 +24,6 @@ include sources default: depend subdirs libmesa.a libglapi.a driver_subdirs -# this doesn't work yet but is probably the way to go in the future -new_install: - (cd drivers && $(MAKE) install) - - -# XXX replace this with new_install above someday -install: default - @for driver in $(DRIVER_DIRS) ; do \ - case "$$driver" in \ - osmesa) if [ "$(DRIVER_DIRS)" = osmesa ]; then \ - $(MAKE) install-headers install-osmesa || exit 1 ; \ - else \ - $(MAKE) install-osmesa || exit 1 ; \ - fi ;; \ - dri) $(MAKE) install-libgl install-dri || exit 1 ;; \ - *) $(MAKE) install-libgl || exit 1 ;; \ - esac ; \ - done - - ###################################################################### # Helper libraries used by many drivers: @@ -88,6 +68,26 @@ depend: $(ALL_SOURCES) ###################################################################### # Installation rules +# this isn't fleshed out yet but is probably the way to go in the future +new_install: + (cd drivers && $(MAKE) install) + + +# XXX replace this with new_install above someday +install: default + @for driver in $(DRIVER_DIRS) ; do \ + case "$$driver" in \ + osmesa) if [ "$(DRIVER_DIRS)" = osmesa ]; then \ + $(MAKE) install-headers install-osmesa || exit 1 ; \ + else \ + $(MAKE) install-osmesa || exit 1 ; \ + fi ;; \ + dri) $(MAKE) install-libgl install-dri || exit 1 ;; \ + *) $(MAKE) install-libgl || exit 1 ;; \ + esac ; \ + done + + pcedit = sed \ -e 's,@INSTALL_DIR@,$(INSTALL_DIR),' \ -e 's,@LIB_DIR@,$(LIB_DIR),' \ -- cgit v1.2.3 From 0b72fada6aa3e15cee62c806057ebf986f34433d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 6 Jun 2008 12:27:55 -0600 Subject: s/subdirs/asm_subdirs/ --- src/mesa/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 061a7bf4f0..d39f282b33 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -19,9 +19,9 @@ include sources -# Default: build dependencies, then mesa subdirs, then convenience +# Default: build dependencies, then asm_subdirs, then convenience # libs (.a) and finally the device drivers: -default: depend subdirs libmesa.a libglapi.a driver_subdirs +default: depend asm_subdirs libmesa.a libglapi.a driver_subdirs @@ -45,7 +45,7 @@ driver_subdirs: ###################################################################### # Assembly subdirs -subdirs: +asm_subdirs: @ if echo "$(ASM_FLAGS)" | grep -q USE_X86_ASM ; then \ (cd x86 && $(MAKE)) || exit 1 ; \ fi -- cgit v1.2.3 From 665605234d2aed2baa22fa621fa02478b2c08a4d Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 6 Jun 2008 21:13:12 +0200 Subject: r300: Remove unused variable r300_tex_obj::dirty_state and related defines This variable was only ever written to, but nobody used its value. --- src/mesa/drivers/dri/r300/r300_context.h | 7 ------- src/mesa/drivers/dri/r300/r300_tex.c | 6 +----- src/mesa/drivers/dri/r300/r300_texmem.c | 4 ---- src/mesa/drivers/dri/r300/r300_texstate.c | 5 ----- src/mesa/drivers/dri/r300/radeon_context.h | 10 ---------- 5 files changed, 1 insertion(+), 31 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index a5ec5ee46e..b33dfeafdd 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -179,13 +179,6 @@ struct r300_tex_obj { GLuint bufAddr; /* Offset to start of locally shared texture block */ - GLuint dirty_state; /* Flags (1 per texunit) for - whether or not this texobj - has dirty hardware state - (pp_*) that needs to be - brought into the - texunit. */ - drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; /* Six, for the cube faces */ diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 0b4acec044..606c3f8a90 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -1077,7 +1077,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, break; case GL_DEPTH_TEXTURE_MODE: - if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat + if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) { r300SetDepthTexMode(texObj); break; @@ -1092,10 +1092,6 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, default: return; } - - /* Mark this texobj as dirty (one bit per tex unit) - */ - t->dirty_state = TEX_ALL; } static void r300BindTexture(GLcontext * ctx, GLenum target, diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c index 723601ac4a..e0fda86ac8 100644 --- a/src/mesa/drivers/dri/r300/r300_texmem.c +++ b/src/mesa/drivers/dri/r300/r300_texmem.c @@ -534,10 +534,6 @@ int r300UploadTexImages(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face) /* hope it's safe to add that here... */ t->offset |= t->tile_bits; } - - /* Mark this texobj as dirty on all units: - */ - t->dirty_state = TEX_ALL; } /* Let the world know we've used this memory recently. diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index 78fa75228e..131d958005 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -428,10 +428,6 @@ static void r300SetTexImages(r300ContextPtr rmesa, if (tObj->Image[0][t->base.firstLevel]->Height > 2048) t->pitch_reg |= R500_TXHEIGHT_BIT11; } - - t->dirty_state = TEX_ALL; - - /* FYI: r300UploadTexImages( rmesa, t ) used to be called here */ } /* ================================================================ @@ -568,7 +564,6 @@ static GLboolean r300UpdateTexture(GLcontext * ctx, int unit) rmesa->state.texture.unit[unit].texobj = t; t->base.bound |= (1 << unit); - t->dirty_state |= 1 << unit; driUpdateTextureLRU((driTextureObject *) t); /* XXX: should be locked! */ } diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h index 38d8930601..7458d63723 100644 --- a/src/mesa/drivers/dri/r300/radeon_context.h +++ b/src/mesa/drivers/dri/r300/radeon_context.h @@ -53,16 +53,6 @@ struct radeon_context; typedef struct radeon_context radeonContextRec; typedef struct radeon_context *radeonContextPtr; -#define TEX_0 0x1 -#define TEX_1 0x2 -#define TEX_2 0x4 -#define TEX_3 0x8 -#define TEX_4 0x10 -#define TEX_5 0x20 -#define TEX_6 0x40 -#define TEX_7 0x80 -#define TEX_ALL 0xff - /* Rasterizing fallbacks */ /* See correponding strings in r200_swtcl.c */ #define RADEON_FALLBACK_TEXTURE 0x0001 -- cgit v1.2.3 From 03a1144cd1de9425a0bc5ceec98f9c49fa19ac6d Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 6 Jun 2008 21:28:08 +0200 Subject: r300: Cleanup TX_MIN_FILTER defines This commit should not affect the functionality at all, just cleanup some of the original texture filter guesswork using information from the register documentation. --- src/mesa/drivers/dri/r300/r300_reg.h | 29 +++++--------- src/mesa/drivers/dri/r300/r300_state.c | 2 +- src/mesa/drivers/dri/r300/r300_tex.c | 71 ++++++++++++++-------------------- 3 files changed, 39 insertions(+), 63 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 21e1dc29de..999e8bc1b4 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1374,20 +1374,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_MAG_FILTER_MASK (3 << 9) # define R300_TX_MIN_FILTER_NEAREST (1 << 11) # define R300_TX_MIN_FILTER_LINEAR (2 << 11) -# define R300_TX_MIN_FILTER_NEAREST_MIP_NEAREST (5 << 11) /* TODO: use spec */ -# define R300_TX_MIN_FILTER_NEAREST_MIP_LINEAR (9 << 11) /* TODO: use spec */ -# define R300_TX_MIN_FILTER_LINEAR_MIP_NEAREST (6 << 11) /* TODO: use spec */ -# define R300_TX_MIN_FILTER_LINEAR_MIP_LINEAR (10 << 11) /* TODO: use spec */ - -/* NOTE: NEAREST doesnt seem to exist. - * Im not seting MAG_FILTER_MASK and (3 << 11) on for all - * anisotropy modes because that would void selected mag filter - */ -# define R300_TX_MIN_FILTER_ANISO_NEAREST (0 << 13) -# define R300_TX_MIN_FILTER_ANISO_LINEAR (0 << 13) -# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST (1 << 13) -# define R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR (2 << 13) -# define R300_TX_MIN_FILTER_MASK ( (15 << 11) | (3 << 13) ) +# define R300_TX_MIN_FILTER_MASK (3 << 11) +# define R300_TX_MIN_FILTER_MIP_NONE (0 << 13) +# define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13) +# define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13) +# define R300_TX_MIN_FILTER_MIP_MASK (3 << 13) # define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) # define R300_TX_MAX_ANISO_2_TO_1 (2 << 21) # define R300_TX_MAX_ANISO_4_TO_1 (4 << 21) @@ -1432,9 +1423,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. They are given meanings as R, G, B and Alpha by the swizzle specification */ # define R300_TX_FORMAT_X8 0x0 -# define R500_TX_FORMAT_X1 0x0 // bit set in format 2 +# define R500_TX_FORMAT_X1 0x0 // bit set in format 2 # define R300_TX_FORMAT_X16 0x1 -# define R500_TX_FORMAT_X1_REV 0x0 // bit set in format 2 +# define R500_TX_FORMAT_X1_REV 0x0 // bit set in format 2 # define R300_TX_FORMAT_Y4X4 0x2 # define R300_TX_FORMAT_Y8X8 0x3 # define R300_TX_FORMAT_Y16X16 0x4 @@ -2238,7 +2229,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1) # define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2) # define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2) - + /* Discard src pixels less than or equal to threshold. */ #define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0 @@ -3179,7 +3170,7 @@ enum { * 2 to end: Up to 16380 dwords of vertex data. */ #define R300_PACKET3_3D_DRAW_INDX 0x00002A00 - + /* Specify the full set of vertex arrays as (address, stride). * The first parameter is the number of vertex arrays specified. @@ -3209,7 +3200,7 @@ enum { /* Same as R300_PACKET3_3D_DRAW_INDX but without VAP_VTX_FMT */ #define R300_PACKET3_3D_DRAW_INDX_2 0x00003600 -/* Clears a portion of hierachical Z RAM +/* Clears a portion of hierachical Z RAM * 3 dword parameters * 0. START * 1. COUNT: 13:0 (max is 0x3FFF) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 550f710854..7602f12d81 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1284,7 +1284,7 @@ static unsigned long gen_fixed_filter(unsigned long f) return f; mag = f & R300_TX_MAG_FILTER_MASK; - min = f & R300_TX_MIN_FILTER_MASK; + min = f & (R300_TX_MIN_FILTER_MASK|R300_TX_MIN_FILTER_MIP_MASK); /* TODO: Check for anisto filters too */ if ((mag != R300_TX_MAG_FILTER_NEAREST) diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 606c3f8a90..2ea65f8260 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -185,53 +185,38 @@ static void r300SetTexMaxAnisotropy(r300TexObjPtr t, GLfloat max) * \param minf Texture minification mode * \param magf Texture magnification mode */ - static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf) { - GLuint anisotropy = (t->filter & R300_TX_MAX_ANISO_MASK); + t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK); - t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MAG_FILTER_MASK); + switch (minf) { + case GL_NEAREST: + t->filter |= R300_TX_MIN_FILTER_NEAREST; + break; + case GL_LINEAR: + t->filter |= R300_TX_MIN_FILTER_LINEAR; + break; + case GL_NEAREST_MIPMAP_NEAREST: + t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST; + break; + case GL_NEAREST_MIPMAP_LINEAR: + t->filter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR; + break; + case GL_LINEAR_MIPMAP_NEAREST: + t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST; + break; + case GL_LINEAR_MIPMAP_LINEAR: + t->filter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR; + break; + } - if (anisotropy == R300_TX_MAX_ANISO_1_TO_1) { - switch (minf) { - case GL_NEAREST: - t->filter |= R300_TX_MIN_FILTER_NEAREST; - break; - case GL_LINEAR: - t->filter |= R300_TX_MIN_FILTER_LINEAR; - break; - case GL_NEAREST_MIPMAP_NEAREST: - t->filter |= R300_TX_MIN_FILTER_NEAREST_MIP_NEAREST; - break; - case GL_NEAREST_MIPMAP_LINEAR: - t->filter |= R300_TX_MIN_FILTER_NEAREST_MIP_LINEAR; - break; - case GL_LINEAR_MIPMAP_NEAREST: - t->filter |= R300_TX_MIN_FILTER_LINEAR_MIP_NEAREST; - break; - case GL_LINEAR_MIPMAP_LINEAR: - t->filter |= R300_TX_MIN_FILTER_LINEAR_MIP_LINEAR; - break; - } - } else { - switch (minf) { - case GL_NEAREST: - t->filter |= R300_TX_MIN_FILTER_ANISO_NEAREST; - break; - case GL_LINEAR: - t->filter |= R300_TX_MIN_FILTER_ANISO_LINEAR; - break; - case GL_NEAREST_MIPMAP_NEAREST: - case GL_LINEAR_MIPMAP_NEAREST: - t->filter |= - R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_NEAREST; - break; - case GL_NEAREST_MIPMAP_LINEAR: - case GL_LINEAR_MIPMAP_LINEAR: - t->filter |= - R300_TX_MIN_FILTER_ANISO_NEAREST_MIP_LINEAR; - break; - } + /* Note that EXT_texture_filter_anisotropic is extremely vague about + * how anisotropic filtering interacts with the "normal" filter modes. + * When anisotropic filtering is enabled, we zero the filter setting + * inside a mip level. + */ + if (t->filter & R300_TX_MAX_ANISO_MASK) { + t->filter &= ~R300_TX_MIN_FILTER_MASK; } /* Note we don't have 3D mipmaps so only use the mag filter setting -- cgit v1.2.3 From fa1bc0d7d600b25ec1b9fce157a8c8ddc6723564 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 6 Jun 2008 23:47:46 +0200 Subject: r300: Improve texture layout calculations The texture layout calculations for mipmapped cubemaps used to be completely wrong, since the GPU expects images to be grouped by miplevel instead of by face number. This has been fixed now, though the memory layout is still slightly incorrect for the smaller miplevels. Unfortunately, the docs are lacking in that area. --- src/mesa/drivers/dri/r300/r300_texmem.c | 2 +- src/mesa/drivers/dri/r300/r300_texstate.c | 228 ++++++++++++++++-------------- 2 files changed, 120 insertions(+), 110 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c index e0fda86ac8..69847a4022 100644 --- a/src/mesa/drivers/dri/r300/r300_texmem.c +++ b/src/mesa/drivers/dri/r300/r300_texmem.c @@ -349,7 +349,7 @@ static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, imageWidth = texImage->Width; imageHeight = texImage->Height; - offset = t->bufAddr + t->base.totalSize / 6 * face; + offset = t->bufAddr; if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { GLint imageX = 0; diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index 131d958005..2589ec572e 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -189,6 +189,112 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) } +/** + * Compute sizes and fill in offset and blit information for the given + * image (determined by \p face and \p level). + * + * \param curOffset points to the offset at which the image is to be stored + * and is updated by this function according to the size of the image. + */ +static void compute_tex_image_offset( + struct gl_texture_object *tObj, + GLuint face, + GLint level, + GLint* curOffset) +{ + r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; + const struct gl_texture_image* texImage; + GLuint blitWidth = R300_BLIT_WIDTH_BYTES; + GLuint texelBytes; + GLuint size; + + texImage = tObj->Image[0][level + t->base.firstLevel]; + if (!texImage) + return; + + texelBytes = texImage->TexFormat->TexelBytes; + + /* find image size in bytes */ + if (texImage->IsCompressed) { + if ((t->format & R300_TX_FORMAT_DXT1) == + R300_TX_FORMAT_DXT1) { + // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format); + if ((texImage->Width + 3) < 8) /* width one block */ + size = texImage->CompressedSize * 4; + else if ((texImage->Width + 3) < 16) + size = texImage->CompressedSize * 2; + else + size = texImage->CompressedSize; + } else { + /* DXT3/5, 16 bytes per block */ + WARN_ONCE + ("DXT 3/5 suffers from multitexturing problems!\n"); + // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width); + if ((texImage->Width + 3) < 8) + size = texImage->CompressedSize * 2; + else + size = texImage->CompressedSize; + } + } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { + size = + ((texImage->Width * texelBytes + + 63) & ~63) * texImage->Height; + blitWidth = 64 / texelBytes; + } else if (t->tile_bits & R300_TXO_MICRO_TILE) { + /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, + though the actual offset may be different (if texture is less than + 32 bytes width) to the untiled case */ + int w = (texImage->Width * texelBytes * 2 + 31) & ~31; + size = + (w * ((texImage->Height + 1) / 2)) * + texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); + } else { + int w = (texImage->Width * texelBytes + 31) & ~31; + size = w * texImage->Height * texImage->Depth; + blitWidth = MAX2(texImage->Width, 64 / texelBytes); + } + assert(size > 0); + + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n", + texImage->Width, texImage->Height, + texImage->Depth, + texImage->TexFormat->TexelBytes, + texImage->InternalFormat); + + /* All images are aligned to a 32-byte offset */ + *curOffset = (*curOffset + 0x1f) & ~0x1f; + + if (texelBytes) { + /* fix x and y coords up later together with offset */ + t->image[face][level].x = *curOffset; + t->image[face][level].y = 0; + t->image[face][level].width = + MIN2(size / texelBytes, blitWidth); + t->image[face][level].height = + (size / texelBytes) / t->image[face][level].width; + } else { + t->image[face][level].x = *curOffset % R300_BLIT_WIDTH_BYTES; + t->image[face][level].y = *curOffset / R300_BLIT_WIDTH_BYTES; + t->image[face][level].width = + MIN2(size, R300_BLIT_WIDTH_BYTES); + t->image[face][level].height = size / t->image[face][level].width; + } + + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, + "level %d, face %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", + level, face, texImage->Width, texImage->Height, + t->image[face][level].x, t->image[face][level].y, + t->image[face][level].width, t->image[face][level].height, + size, *curOffset); + + *curOffset += size; +} + + + /** * This function computes the number of bytes of storage needed for * the given texture object (all mipmap levels, all cube faces). @@ -206,7 +312,7 @@ static void r300SetTexImages(r300ContextPtr rmesa, r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - GLint curOffset, blitWidth; + GLint curOffset; GLint i, texelBytes; GLint numLevels; GLint log2Width, log2Height, log2Depth; @@ -245,8 +351,6 @@ static void r300SetTexImages(r300ContextPtr rmesa, * The idea is that we lay out the mipmap levels within a block of * memory organized as a rectangle of width BLIT_WIDTH_BYTES. */ - curOffset = 0; - blitWidth = R300_BLIT_WIDTH_BYTES; t->tile_bits = 0; /* figure out if this texture is suitable for tiling. */ @@ -276,94 +380,20 @@ static void r300SetTexImages(r300ContextPtr rmesa, } #endif - for (i = 0; i < numLevels; i++) { - const struct gl_texture_image *texImage; - GLuint size; - - texImage = tObj->Image[0][i + t->base.firstLevel]; - if (!texImage) - break; - - /* find image size in bytes */ - if (texImage->IsCompressed) { - if ((t->format & R300_TX_FORMAT_DXT1) == - R300_TX_FORMAT_DXT1) { - // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format); - if ((texImage->Width + 3) < 8) /* width one block */ - size = texImage->CompressedSize * 4; - else if ((texImage->Width + 3) < 16) - size = texImage->CompressedSize * 2; - else - size = texImage->CompressedSize; - } else { - /* DXT3/5, 16 bytes per block */ - WARN_ONCE - ("DXT 3/5 suffers from multitexturing problems!\n"); - // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width); - if ((texImage->Width + 3) < 8) - size = texImage->CompressedSize * 2; - else - size = texImage->CompressedSize; - } - } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - size = - ((texImage->Width * texelBytes + - 63) & ~63) * texImage->Height; - blitWidth = 64 / texelBytes; - } else if (t->tile_bits & R300_TXO_MICRO_TILE) { - /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, - though the actual offset may be different (if texture is less than - 32 bytes width) to the untiled case */ - int w = (texImage->Width * texelBytes * 2 + 31) & ~31; - size = - (w * ((texImage->Height + 1) / 2)) * - texImage->Depth; - blitWidth = MAX2(texImage->Width, 64 / texelBytes); - } else { - int w = (texImage->Width * texelBytes + 31) & ~31; - size = w * texImage->Height * texImage->Depth; - blitWidth = MAX2(texImage->Width, 64 / texelBytes); - } - assert(size > 0); - - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n", - texImage->Width, texImage->Height, - texImage->Depth, - texImage->TexFormat->TexelBytes, - texImage->InternalFormat); - - /* Align to 32-byte offset. It is faster to do this unconditionally - * (no branch penalty). - */ + curOffset = 0; - curOffset = (curOffset + 0x1f) & ~0x1f; + if (tObj->Target == GL_TEXTURE_CUBE_MAP) { + ASSERT(log2Width == log2Height); + t->format |= R300_TX_FORMAT_CUBIC_MAP; - if (texelBytes) { - /* fix x and y coords up later together with offset */ - t->image[0][i].x = curOffset; - t->image[0][i].y = 0; - t->image[0][i].width = - MIN2(size / texelBytes, blitWidth); - t->image[0][i].height = - (size / texelBytes) / t->image[0][i].width; - } else { - t->image[0][i].x = curOffset % R300_BLIT_WIDTH_BYTES; - t->image[0][i].y = curOffset / R300_BLIT_WIDTH_BYTES; - t->image[0][i].width = - MIN2(size, R300_BLIT_WIDTH_BYTES); - t->image[0][i].height = size / t->image[0][i].width; + for(i = 0; i < numLevels; i++) { + GLuint face; + for(face = 0; face < 6; face++) + compute_tex_image_offset(tObj, face, i, &curOffset); } - - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, - "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", - i, texImage->Width, texImage->Height, - t->image[0][i].x, t->image[0][i].y, - t->image[0][i].width, t->image[0][i].height, - size, curOffset); - - curOffset += size; + } else { + for (i = 0; i < numLevels; i++) + compute_tex_image_offset(tObj, 0, i, &curOffset); } /* Align the total size of texture memory block. @@ -371,26 +401,6 @@ static void r300SetTexImages(r300ContextPtr rmesa, t->base.totalSize = (curOffset + RADEON_OFFSET_MASK) & ~RADEON_OFFSET_MASK; - /* Setup remaining cube face blits, if needed */ - if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - GLuint face; - for (face = 1; face < 6; face++) { - for (i = 0; i < numLevels; i++) { - t->image[face][i].x = t->image[0][i].x; - t->image[face][i].y = t->image[0][i].y; - t->image[face][i].width = t->image[0][i].width; - t->image[face][i].height = - t->image[0][i].height; - } - } - t->base.totalSize *= 6; /* total texmem needed */ - } - - if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - ASSERT(log2Width == log2Height); - t->format |= R300_TX_FORMAT_CUBIC_MAP; - } - t->size = (((tObj->Image[0][t->base.firstLevel]->Width - 1) << R300_TX_WIDTHMASK_SHIFT) @@ -408,7 +418,7 @@ static void r300SetTexImages(r300ContextPtr rmesa, t->pitch |= (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - unsigned int align = blitWidth - 1; + unsigned int align = (64 / texelBytes) - 1; t->pitch |= ((tObj->Image[0][t->base.firstLevel]->Width * texelBytes) + 63) & ~(63); t->size |= R300_TX_SIZE_TXPITCH_EN; -- cgit v1.2.3 From f39780242e0dc7060d716fe255977a0a89734945 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 6 Jun 2008 21:53:05 -0700 Subject: r5xx: Fix speedy LIT once and for all. To do: Add a slightly more accurate LIT. Will do later. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 113 ++++++++++++++---------------- 1 file changed, 52 insertions(+), 61 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index b967aa2d73..26513e74ae 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -743,13 +743,10 @@ static GLboolean parse_program(struct r500_fragment_program *fp) emit_sop(fp, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); break; case OPCODE_LIT: - /* To be honest, I have no idea how I came up with the following. - * All I know is that it's based on the r3xx stuff, and was - * concieved with the help of NyQuil. Mmm, MyQuil. */ - - /* First instruction */ src[0] = make_src(fp, fpi->SrcReg[0]); src[1] = emit_const4fv(fp, LIT); + /* First inst: MAX temp, input, [0, 0, 0, -128] + * Write: RG, A */ fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT | (R500_WRITEMASK_ARG << 11); fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); @@ -764,77 +761,71 @@ static GLboolean parse_program(struct r500_fragment_program *fp) fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); counter++; - /* Second instruction */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AB << 11); + /* Second inst: MIN temp, temp, [x, x, x, 128] + * Write: A */ + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_A << 11); fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)) | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - /* Select [w, w, w, y] */ - temp_swiz = 3 | (3 << 3) | (3 << 6); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(temp_swiz) - | R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_LN2 - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_G; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) | R500_ALPHA_ADDR1(src[1]); + /* fp->inst[counter].inst3; */ + fp->inst[counter].inst4 = R500_ALPHA_OP_MAX + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX + | R500_ALU_RGBA_ADDRD(dest); counter++; - /* Third instruction */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AG << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - /* Select [x, x, x, z] */ - temp_swiz = 0; + /* Third-fifth insts: POW temp, temp.y, temp.w + * Write: B */ + emit_sop(fp, counter, fpi, OPCODE_LG2, get_temp(fp, 0), SWIZZLE_Y, get_temp(fp, 1)); + fp->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11); + counter++; + fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1)) + | R500_RGB_ADDR1(get_temp(fp, 0)); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)) + | R500_ALPHA_ADDR1(get_temp(fp, 0)); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(temp_swiz) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); fp->inst[counter].inst4 = R500_ALPHA_OP_MAD | R500_ALPHA_ADDRD(get_temp(fp, 1)) | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_B; + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)) | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | R500_ALU_RGBA_A_SWIZ_0; - counter++; - /* Fourth instruction */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AR << 11); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); - fp->inst[counter].inst4 = R500_ALPHA_OP_EX2 - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); counter++; - /* Fifth instruction */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_AB << 11); + emit_sop(fp, counter, fpi, OPCODE_EX2, get_temp(fp, 1), SWIZZLE_W, get_temp(fp, 0)); + fp->inst[counter].inst0 |= (R500_WRITEMASK_B << 11); + counter++; + /* Sixth inst: CMP dest, temp.xxxx, temp.[1, x, z, 1], temp.[1, x, 0, 1]; + * Write: ARGB + * This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ + emit_alu(fp, counter, fpi); fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - /* Select [w, w, w] */ - temp_swiz = 3 | (3 << 3) | (3 << 6); + fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) + | R500_ALU_RGB_R_SWIZ_A_1 + | R500_ALU_RGB_G_SWIZ_A_R + | R500_ALU_RGB_B_SWIZ_A_B | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(temp_swiz); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SWIZ_A_1 - | R500_ALPHA_SWIZ_B_1; - /* Select [-y, -y, -y] */ - temp_swiz = 1 | (1 << 3) | (1 << 6); + | R500_ALU_RGB_R_SWIZ_B_1 + | R500_ALU_RGB_G_SWIZ_B_R + | R500_ALU_RGB_B_SWIZ_B_0; + fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_1 + | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_1; fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | MAKE_SWIZ_RGBA_C(temp_swiz) - | R500_ALU_RGBA_MOD_C_NEG - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - /* Final instruction */ - emit_mov(fp, counter, fpi, get_temp(fp, 0), SWIZZLE_NOOP, dest); + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC0 + | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 + | R500_ALU_RGBA_R_SWIZ_R + | R500_ALU_RGBA_G_SWIZ_R + | R500_ALU_RGBA_B_SWIZ_R + | R500_ALU_RGBA_A_SWIZ_R; break; case OPCODE_LRP: /* src0 * src1 + INV(src0) * src2 -- cgit v1.2.3 From 5e58e5d77792891fac953ff0ec30990f3e0ca854 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Jun 2008 01:37:35 -0700 Subject: r5xx: Unbreak anisotropic filtering? Not quite finished, maybe? Not sure. --- src/mesa/drivers/dri/r300/r300_reg.h | 3 +++ src/mesa/drivers/dri/r300/r300_tex.c | 19 +++++++++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 999e8bc1b4..79f2fbfde7 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1371,13 +1371,16 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_MAG_FILTER_4 (0 << 9) # define R300_TX_MAG_FILTER_NEAREST (1 << 9) # define R300_TX_MAG_FILTER_LINEAR (2 << 9) +# define R300_TX_MAG_FILTER_ANISO (3 << 9) # define R300_TX_MAG_FILTER_MASK (3 << 9) # define R300_TX_MIN_FILTER_NEAREST (1 << 11) # define R300_TX_MIN_FILTER_LINEAR (2 << 11) +# define R300_TX_MIN_FILTER_ANISO (3 << 11) # define R300_TX_MIN_FILTER_MASK (3 << 11) # define R300_TX_MIN_FILTER_MIP_NONE (0 << 13) # define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13) # define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13) +# define R300_TX_MIN_FILTER_MIP_ANISO (3 << 13) # define R300_TX_MIN_FILTER_MIP_MASK (3 << 13) # define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) # define R300_TX_MAX_ANISO_2_TO_1 (2 << 21) diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 2ea65f8260..55243dc7a4 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -184,11 +184,20 @@ static void r300SetTexMaxAnisotropy(r300TexObjPtr t, GLfloat max) * \param t Texture whose filter modes are to be set * \param minf Texture minification mode * \param magf Texture magnification mode + * \param anisotropy Maximum anisotropy level */ -static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf) +static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy) { t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK); + if (anisotropy > 1.0) { + t->filter |= R300_TX_MAG_FILTER_ANISO + | R300_TX_MIN_FILTER_ANISO + | R300_TX_MIN_FILTER_MIP_ANISO; + r300SetTexMaxAnisotropy(t, anisotropy); + return; + } + switch (minf) { case GL_NEAREST: t->filter |= R300_TX_MIN_FILTER_NEAREST; @@ -216,7 +225,7 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf) * inside a mip level. */ if (t->filter & R300_TX_MAX_ANISO_MASK) { - t->filter &= ~R300_TX_MIN_FILTER_MASK; + /* t->filter &= ~R300_TX_MIN_FILTER_MASK; */ } /* Note we don't have 3D mipmaps so only use the mag filter setting @@ -263,8 +272,7 @@ static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj) make_empty_list(&t->base); r300SetTexWrap(t, texObj->WrapS, texObj->WrapT, texObj->WrapR); - r300SetTexMaxAnisotropy(t, texObj->MaxAnisotropy); - r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter); + r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); r300SetTexBorderColor(t, texObj->_BorderChan); } @@ -1035,8 +1043,7 @@ static void r300TexParameter(GLcontext * ctx, GLenum target, case GL_TEXTURE_MIN_FILTER: case GL_TEXTURE_MAG_FILTER: case GL_TEXTURE_MAX_ANISOTROPY_EXT: - r300SetTexMaxAnisotropy(t, texObj->MaxAnisotropy); - r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter); + r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); break; case GL_TEXTURE_WRAP_S: -- cgit v1.2.3 From bf1a7c884d02d4a59ad51a446dec3736959d8239 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 7 Jun 2008 14:17:36 +0200 Subject: r300: Update LOD_BIAS register constants --- src/mesa/drivers/dri/r300/r300_reg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 79f2fbfde7..5de5477242 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1395,7 +1395,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_CHROMA_KEY_BLEND 2 # define R300_MC_ROUND_NORMAL (0<<2) # define R300_MC_ROUND_MPEG4 (1<<2) -# define R300_LOD_BIAS_MASK 0x1fff +# define R300_LOD_BIAS_SHIFT 3 +# define R300_LOD_BIAS_MASK 0x1ff8 # define R300_EDGE_ANISO_EDGE_DIAG (0<<13) # define R300_EDGE_ANISO_EDGE_ONLY (1<<13) # define R300_MC_COORD_TRUNCATE_DISABLE (0<<14) -- cgit v1.2.3 From 75bfe630ff9df8b827cbdbf88b08e5da5d3eccfa Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 7 Jun 2008 18:54:35 +0200 Subject: r300: Further anisotropic filtering fixes Thanks to Corbin for the initial cut today. Fixed some minor stuff (in particular, make sure we still use a MIP_LINEAR filtering mode; anisotropy without MIP_LINEAR filtering is not the truly pleasing anisotropy). --- src/mesa/drivers/dri/r300/r300_reg.h | 13 +++++----- src/mesa/drivers/dri/r300/r300_tex.c | 46 ++++++++++++++++-------------------- 2 files changed, 26 insertions(+), 33 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 5de5477242..8b00f9958c 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1380,14 +1380,13 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TX_MIN_FILTER_MIP_NONE (0 << 13) # define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13) # define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13) -# define R300_TX_MIN_FILTER_MIP_ANISO (3 << 13) # define R300_TX_MIN_FILTER_MIP_MASK (3 << 13) -# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) -# define R300_TX_MAX_ANISO_2_TO_1 (2 << 21) -# define R300_TX_MAX_ANISO_4_TO_1 (4 << 21) -# define R300_TX_MAX_ANISO_8_TO_1 (6 << 21) -# define R300_TX_MAX_ANISO_16_TO_1 (8 << 21) -# define R300_TX_MAX_ANISO_MASK (14 << 21) +# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) +# define R300_TX_MAX_ANISO_2_TO_1 (1 << 21) +# define R300_TX_MAX_ANISO_4_TO_1 (2 << 21) +# define R300_TX_MAX_ANISO_8_TO_1 (3 << 21) +# define R300_TX_MAX_ANISO_16_TO_1 (4 << 21) +# define R300_TX_MAX_ANISO_MASK (7 << 21) #define R300_TX_FILTER1_0 0x4440 # define R300_CHROMA_KEY_MODE_DISABLE 0 diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 55243dc7a4..a12b0293e5 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -160,21 +160,18 @@ static void r300SetTexWrap(r300TexObjPtr t, GLenum swrap, GLenum twrap, t->filter |= hw_qwrap << R300_TX_WRAP_Q_SHIFT; } -static void r300SetTexMaxAnisotropy(r300TexObjPtr t, GLfloat max) +static GLuint aniso_filter(GLfloat anisotropy) { - - t->filter &= ~R300_TX_MAX_ANISO_MASK; - - if (max <= 1.0) { - t->filter |= R300_TX_MAX_ANISO_1_TO_1; - } else if (max <= 2.0) { - t->filter |= R300_TX_MAX_ANISO_2_TO_1; - } else if (max <= 4.0) { - t->filter |= R300_TX_MAX_ANISO_4_TO_1; - } else if (max <= 8.0) { - t->filter |= R300_TX_MAX_ANISO_8_TO_1; + if (anisotropy >= 16.0) { + return R300_TX_MAX_ANISO_16_TO_1; + } else if (anisotropy >= 8.0) { + return R300_TX_MAX_ANISO_8_TO_1; + } else if (anisotropy >= 4.0) { + return R300_TX_MAX_ANISO_4_TO_1; + } else if (anisotropy >= 2.0) { + return R300_TX_MAX_ANISO_2_TO_1; } else { - t->filter |= R300_TX_MAX_ANISO_16_TO_1; + return R300_TX_MAX_ANISO_1_TO_1; } } @@ -188,13 +185,19 @@ static void r300SetTexMaxAnisotropy(r300TexObjPtr t, GLfloat max) */ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy) { - t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK); + t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK); + t->filter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY; - if (anisotropy > 1.0) { + /* Note that EXT_texture_filter_anisotropic is extremely vague about + * how anisotropic filtering interacts with the "normal" filter modes. + * When anisotropic filtering is enabled, we override min and mag + * filter settings. + */ + if (anisotropy >= 2.0 && (minf != GL_NEAREST && minf != GL_LINEAR)) { t->filter |= R300_TX_MAG_FILTER_ANISO | R300_TX_MIN_FILTER_ANISO - | R300_TX_MIN_FILTER_MIP_ANISO; - r300SetTexMaxAnisotropy(t, anisotropy); + | R300_TX_MIN_FILTER_MIP_LINEAR + | aniso_filter(anisotropy); return; } @@ -219,15 +222,6 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat break; } - /* Note that EXT_texture_filter_anisotropic is extremely vague about - * how anisotropic filtering interacts with the "normal" filter modes. - * When anisotropic filtering is enabled, we zero the filter setting - * inside a mip level. - */ - if (t->filter & R300_TX_MAX_ANISO_MASK) { - /* t->filter &= ~R300_TX_MIN_FILTER_MASK; */ - } - /* Note we don't have 3D mipmaps so only use the mag filter setting * to set the 3D texture filter mode. */ -- cgit v1.2.3 From 731686b7132ff50d78ef2dc5bfab550750925132 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 7 Jun 2008 20:25:47 +0200 Subject: r300: Fix stencil buffer clear The glean/readPixSanity test passes now. --- src/mesa/drivers/dri/r300/r300_ioctl.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index b0225453d3..2da0ac0e1a 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -130,8 +130,6 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) t1 |= R300_Z_ENABLE | R300_Z_WRITE_ENABLE; t2 |= (R300_ZS_ALWAYS << R300_Z_FUNC_SHIFT); - } else { //XXX - t1 |= R300_STENCIL_FRONT_BACK; // disable } if (flags & CLEARBUFFER_STENCIL) { @@ -144,20 +142,12 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) (R300_ZS_REPLACE << R300_S_FRONT_ZPASS_OP_SHIFT) | (R300_ZS_REPLACE << - R300_S_FRONT_ZFAIL_OP_SHIFT) | - (R300_ZS_ALWAYS << - R300_S_BACK_FUNC_SHIFT) | - (R300_ZS_REPLACE << - R300_S_BACK_SFAIL_OP_SHIFT) | - (R300_ZS_REPLACE << - R300_S_BACK_ZPASS_OP_SHIFT) | - (R300_ZS_REPLACE << - R300_S_BACK_ZFAIL_OP_SHIFT); + R300_S_FRONT_ZFAIL_OP_SHIFT); } e32(t1); e32(t2); - e32(r300->state.stencil.clear); + e32(R300_STENCILWRITEMASK_MASK | r300->state.stencil.clear); } cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__); @@ -307,7 +297,6 @@ static void r300EmitClearState(GLcontext * ctx) reg_start(R300_RS_INST_0, 0); e32(R300_RS_INST_COL_CN_WRITE); } else { - R300_STATECHANGE(r300, ri); reg_start(R500_RS_IP_0, 7); for (i = 0; i < 8; ++i) { -- cgit v1.2.3 From 7cb63a25de47bd941efd04c39d2758c2e91a313a Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 7 Jun 2008 20:33:28 +0200 Subject: r300: Apply writemask when clearing stencil buffer This fixes the glean/maskedClear test. --- src/mesa/drivers/dri/r300/r300_ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 2da0ac0e1a..fbe6920745 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -147,7 +147,8 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) e32(t1); e32(t2); - e32(R300_STENCILWRITEMASK_MASK | r300->state.stencil.clear); + e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) | + (r300->state.stencil.clear & R300_STENCILREF_MASK)); } cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__); -- cgit v1.2.3 From cb1687660844d42f929e11a2261c0eeb3fe859be Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 7 Jun 2008 21:07:28 +0200 Subject: r300: Some cleanups in depth and stencil state handling This also fixes a bug with Zfunc set to GL_NEVER in glean/paths. --- src/mesa/drivers/dri/r300/r300_context.h | 2 -- src/mesa/drivers/dri/r300/r300_ioctl.c | 2 +- src/mesa/drivers/dri/r300/r300_state.c | 37 ++++++-------------------------- 3 files changed, 8 insertions(+), 33 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index b33dfeafdd..3ac87d173b 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -574,9 +574,7 @@ struct r300_depthbuffer_state { }; struct r300_stencilbuffer_state { - GLuint clear; GLboolean hw_stencil; - }; /* Vertex shader state */ diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index fbe6920745..6af23300f2 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -148,7 +148,7 @@ static void r300ClearBuffer(r300ContextPtr r300, int flags, int buffer) e32(t1); e32(t2); e32(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << R300_STENCILWRITEMASK_SHIFT) | - (r300->state.stencil.clear & R300_STENCILREF_MASK)); + (ctx->Stencil.Clear & R300_STENCILREF_MASK)); } cmd2 = (drm_r300_cmd_header_t *) r300AllocCmdBuf(r300, 9, __FUNCTION__); diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 7602f12d81..b36ca7aef8 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -525,24 +525,15 @@ static void r300SetDepthState(GLcontext * ctx) r300ContextPtr r300 = R300_CONTEXT(ctx); R300_STATECHANGE(r300, zs); - r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE; // XXX - r300->hw.zs.cmd[R300_ZS_CNTL_1] &= - ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT); + r300->hw.zs.cmd[R300_ZS_CNTL_0] &= R300_STENCIL_ENABLE|R300_STENCIL_FRONT_BACK; + r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT); - if (ctx->Depth.Test && ctx->Depth.Func != GL_NEVER) { + if (ctx->Depth.Test) { + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE; if (ctx->Depth.Mask) - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= - R300_Z_ENABLE | R300_Z_WRITE_ENABLE | R300_STENCIL_FRONT_BACK; // XXX - else - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE | R300_STENCIL_FRONT_BACK; // XXX - + r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_WRITE_ENABLE; r300->hw.zs.cmd[R300_ZS_CNTL_1] |= - translate_func(ctx->Depth. - Func) << R300_Z_FUNC_SHIFT; - } else { - r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK; // XXX - r300->hw.zs.cmd[R300_ZS_CNTL_1] |= - translate_func(GL_NEVER) << R300_Z_FUNC_SHIFT; + translate_func(ctx->Depth.Func) << R300_Z_FUNC_SHIFT; } r300SetEarlyZState(ctx); @@ -925,7 +916,7 @@ static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face, GLuint flag; R300_STATECHANGE(rmesa, zs); - + rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK; rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~((R300_ZS_MASK << R300_S_FRONT_FUNC_SHIFT) | (R300_ZS_MASK << @@ -1000,17 +991,6 @@ static void r300StencilOpSeparate(GLcontext * ctx, GLenum face, } } -static void r300ClearStencil(GLcontext * ctx, GLint s) -{ - r300ContextPtr rmesa = R300_CONTEXT(ctx); - - rmesa->state.stencil.clear = - ((GLuint) (ctx->Stencil.Clear & R300_STENCILREF_MASK) | - (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT) | - ((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) << - R300_STENCILMASK_SHIFT)); -} - /* ============================================================= * Window position and viewport transformation */ @@ -2637,12 +2617,10 @@ void r300InitState(r300ContextPtr r300) case 16: r300->state.depth.scale = 1.0 / (GLfloat) 0xffff; depth_fmt = R300_DEPTHFORMAT_16BIT_INT_Z; - r300->state.stencil.clear = 0x00000000; break; case 24: r300->state.depth.scale = 1.0 / (GLfloat) 0xffffff; depth_fmt = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; - r300->state.stencil.clear = 0x00ff0000; break; default: fprintf(stderr, "Error: Unsupported depth %d... exiting\n", @@ -2706,7 +2684,6 @@ void r300InitStateFuncs(struct dd_function_table *functions) functions->ShadeModel = r300ShadeModel; /* Stencil related */ - functions->ClearStencil = r300ClearStencil; functions->StencilFuncSeparate = r300StencilFuncSeparate; functions->StencilMaskSeparate = r300StencilMaskSeparate; functions->StencilOpSeparate = r300StencilOpSeparate; -- cgit v1.2.3 From 21f50818b09c1ab3b5b1dc797b34c23b9b1634dc Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 7 Jun 2008 12:37:10 -0700 Subject: r300: Allow driconf to set a default anisotropy. If an app does not use GL_EXT_tex_filter_aniso, this lets driconf set it instead. --- src/mesa/drivers/dri/r300/r300_tex.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index a12b0293e5..2c3c28e638 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -191,13 +191,15 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat /* Note that EXT_texture_filter_anisotropic is extremely vague about * how anisotropic filtering interacts with the "normal" filter modes. * When anisotropic filtering is enabled, we override min and mag - * filter settings. + * filter settings completely. This includes driconf's settings. */ - if (anisotropy >= 2.0 && (minf != GL_NEAREST && minf != GL_LINEAR)) { + if (anisotropy >= 2.0) { t->filter |= R300_TX_MAG_FILTER_ANISO | R300_TX_MIN_FILTER_ANISO | R300_TX_MIN_FILTER_MIP_LINEAR | aniso_filter(anisotropy); + if (RADEON_DEBUG & DEBUG_TEXTURE) + fprintf(stderr, "Using maximum anisotropy of %f\n", anisotropy); return; } -- cgit v1.2.3 From 28c28f72fd9ed192467a3cf913b344951d0bc805 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Sun, 8 Jun 2008 14:04:39 +0200 Subject: i965: fix OPCODE_TEX when additional ops are needed --- src/mesa/drivers/dri/i965/brw_wm_fp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c index 7e80724130..ef90260e2a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_fp.c +++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c @@ -737,7 +737,8 @@ static void precalc_tex( struct brw_wm_compile *c, release_temp(c, tmp); } - if (inst->TexSrcTarget == GL_TEXTURE_RECTANGLE_NV) + if ((inst->TexSrcTarget == TEXTURE_RECT_INDEX) || + (inst->TexSrcTarget == TEXTURE_CUBE_INDEX)) release_temp(c, tmpcoord); } @@ -1019,7 +1020,11 @@ void brw_wm_pass_fp( struct brw_wm_compile *c ) case OPCODE_LIT: precalc_lit(c, inst); break; - + + case OPCODE_TEX: + precalc_tex(c, inst); + break; + case OPCODE_TXP: precalc_txp(c, inst); break; -- cgit v1.2.3 From f440b0ddd9690a2f8d7b7eed9b56ff77407b9114 Mon Sep 17 00:00:00 2001 From: Brian Date: Sun, 8 Jun 2008 11:25:30 -0600 Subject: remove realglx.[ch] from build --- src/mesa/drivers/x11/Makefile | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/x11/Makefile b/src/mesa/drivers/x11/Makefile index 8ee9b61808..ce5366e332 100644 --- a/src/mesa/drivers/x11/Makefile +++ b/src/mesa/drivers/x11/Makefile @@ -18,7 +18,6 @@ GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) HEADERS = \ glxapi.h \ glxheader.h \ - realglx.h \ xfonts.h \ xmesaP.h \ xm_glide.h \ @@ -27,7 +26,6 @@ HEADERS = \ SOURCES = \ fakeglx.c \ glxapi.c \ - realglx.c \ xfonts.c \ xm_api.c \ xm_buffer.c \ -- cgit v1.2.3 From 00099731195b2e5b57b8bca6342a8a711e0e427a Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sun, 8 Jun 2008 22:36:20 +0200 Subject: r300: Workaround hardware readcache problem This workaround is similar to the one found in r200_span.c. It seems like some part of the read hardware doesn't realize that VRAM has changed. By reading from an arbitrary position, this is fixed. The piglit test bugs/r300-readcache is a regression test for this bug. --- src/mesa/drivers/dri/r300/radeon_span.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c index eae09d6b35..f1bc56ea6a 100644 --- a/src/mesa/drivers/dri/r300/radeon_span.c +++ b/src/mesa/drivers/dri/r300/radeon_span.c @@ -282,6 +282,30 @@ static void radeonSpanRenderStart(GLcontext * ctx) #endif LOCK_HARDWARE(rmesa); radeonWaitForIdleLocked(rmesa); + + /* Read the first pixel in the frame buffer. This should + * be a noop, right? In fact without this conform fails as reading + * from the framebuffer sometimes produces old results -- the + * on-card read cache gets mixed up and doesn't notice that the + * framebuffer has been updated. + * + * Note that we should probably be reading some otherwise unused + * region of VRAM, otherwise we might get incorrect results when + * reading pixels from the top left of the screen. + * + * I found this problem on an R420 with glean's texCube test. + * Note that the R200 span code also *writes* the first pixel in the + * framebuffer, but I've found this to be unnecessary. + * -- Nicolai Hähnle, June 2008 + */ + { + int p; + driRenderbuffer *drb = + (driRenderbuffer *) ctx->WinSysDrawBuffer->_ColorDrawBuffers[0]; + volatile int *buf = + (volatile int *)(rmesa->dri.screen->pFB + drb->offset); + p = *buf; + } } static void radeonSpanRenderFinish(GLcontext * ctx) -- cgit v1.2.3 From 4c7e02298f68b84d929cd1d3af8b3120281c2e32 Mon Sep 17 00:00:00 2001 From: George Sapountzis Date: Mon, 9 Jun 2008 17:46:39 +0300 Subject: dri/swrast: minor cosmetic --- src/mesa/drivers/dri/swrast/swrast.c | 102 ++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 50 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index c4dba59198..f41e331809 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -326,7 +326,7 @@ swrast_alloc_back_storage(GLcontext *ctx, struct gl_renderbuffer *rb, _mesa_free(rb->Data); - (void) swrast_alloc_front_storage(ctx, rb, internalFormat, width, height); + swrast_alloc_front_storage(ctx, rb, internalFormat, width, height); rb->Data = _mesa_malloc(height * xrb->pitch); @@ -341,59 +341,61 @@ swrast_new_renderbuffer(const GLvisual *visual, GLboolean front) TRACE; - if (xrb) { - _mesa_init_renderbuffer(&xrb->Base, 0); + if (!xrb) + return NULL; - pixel_format = choose_pixel_format(visual); + _mesa_init_renderbuffer(&xrb->Base, 0); - xrb->Base.Delete = swrast_delete_renderbuffer; - if (front) { - xrb->Base.AllocStorage = swrast_alloc_front_storage; - swrast_set_span_funcs_front(xrb, pixel_format); - } - else { - xrb->Base.AllocStorage = swrast_alloc_back_storage; - swrast_set_span_funcs_back(xrb, pixel_format); - } + pixel_format = choose_pixel_format(visual); - switch (pixel_format) { - case PF_A8R8G8B8: - xrb->Base.InternalFormat = GL_RGBA; - xrb->Base._BaseFormat = GL_RGBA; - xrb->Base.DataType = GL_UNSIGNED_BYTE; - xrb->Base.RedBits = 8 * sizeof(GLubyte); - xrb->Base.GreenBits = 8 * sizeof(GLubyte); - xrb->Base.BlueBits = 8 * sizeof(GLubyte); - xrb->Base.AlphaBits = 8 * sizeof(GLubyte); - break; - case PF_R5G6B5: - xrb->Base.InternalFormat = GL_RGB; - xrb->Base._BaseFormat = GL_RGB; - xrb->Base.DataType = GL_UNSIGNED_BYTE; - xrb->Base.RedBits = 5 * sizeof(GLubyte); - xrb->Base.GreenBits = 6 * sizeof(GLubyte); - xrb->Base.BlueBits = 5 * sizeof(GLubyte); - xrb->Base.AlphaBits = 0; - break; - case PF_R3G3B2: - xrb->Base.InternalFormat = GL_RGB; - xrb->Base._BaseFormat = GL_RGB; - xrb->Base.DataType = GL_UNSIGNED_BYTE; - xrb->Base.RedBits = 3 * sizeof(GLubyte); - xrb->Base.GreenBits = 3 * sizeof(GLubyte); - xrb->Base.BlueBits = 2 * sizeof(GLubyte); - xrb->Base.AlphaBits = 0; - break; - case PF_CI8: - xrb->Base.InternalFormat = GL_COLOR_INDEX8_EXT; - xrb->Base._BaseFormat = GL_COLOR_INDEX; - xrb->Base.DataType = GL_UNSIGNED_BYTE; - xrb->Base.IndexBits = 8 * sizeof(GLubyte); - break; - default: - return NULL; - } + xrb->Base.Delete = swrast_delete_renderbuffer; + if (front) { + xrb->Base.AllocStorage = swrast_alloc_front_storage; + swrast_set_span_funcs_front(xrb, pixel_format); + } + else { + xrb->Base.AllocStorage = swrast_alloc_back_storage; + swrast_set_span_funcs_back(xrb, pixel_format); } + + switch (pixel_format) { + case PF_A8R8G8B8: + xrb->Base.InternalFormat = GL_RGBA; + xrb->Base._BaseFormat = GL_RGBA; + xrb->Base.DataType = GL_UNSIGNED_BYTE; + xrb->Base.RedBits = 8 * sizeof(GLubyte); + xrb->Base.GreenBits = 8 * sizeof(GLubyte); + xrb->Base.BlueBits = 8 * sizeof(GLubyte); + xrb->Base.AlphaBits = 8 * sizeof(GLubyte); + break; + case PF_R5G6B5: + xrb->Base.InternalFormat = GL_RGB; + xrb->Base._BaseFormat = GL_RGB; + xrb->Base.DataType = GL_UNSIGNED_BYTE; + xrb->Base.RedBits = 5 * sizeof(GLubyte); + xrb->Base.GreenBits = 6 * sizeof(GLubyte); + xrb->Base.BlueBits = 5 * sizeof(GLubyte); + xrb->Base.AlphaBits = 0; + break; + case PF_R3G3B2: + xrb->Base.InternalFormat = GL_RGB; + xrb->Base._BaseFormat = GL_RGB; + xrb->Base.DataType = GL_UNSIGNED_BYTE; + xrb->Base.RedBits = 3 * sizeof(GLubyte); + xrb->Base.GreenBits = 3 * sizeof(GLubyte); + xrb->Base.BlueBits = 2 * sizeof(GLubyte); + xrb->Base.AlphaBits = 0; + break; + case PF_CI8: + xrb->Base.InternalFormat = GL_COLOR_INDEX8_EXT; + xrb->Base._BaseFormat = GL_COLOR_INDEX; + xrb->Base.DataType = GL_UNSIGNED_BYTE; + xrb->Base.IndexBits = 8 * sizeof(GLubyte); + break; + default: + return NULL; + } + return xrb; } -- cgit v1.2.3 From 5cf69fe511a67a7bcf6787974becf6a143abb507 Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Mon, 9 Jun 2008 16:24:15 +0100 Subject: Check for NULL pointer --- src/mesa/drivers/dri/common/dri_util.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index 6efdf4312a..e2beae380e 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -602,6 +602,8 @@ dri2CreateNewDrawable(__DRIscreen *screen, const __DRIconfig *config, __DRIdrawable *pdraw; pdraw = driCreateNewDrawable(screen, config, 0, 0, NULL, data); + if (!pdraw) + return NULL; pdraw->dri2.drawable_id = drawable_id; pdraw->dri2.tail = head; -- cgit v1.2.3 From 08c07b60be7c21819c78a3281dc9b5a9b547431b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 9 Jun 2008 13:50:11 -0600 Subject: Makefile clean-ups --- src/mesa/drivers/fbdev/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/fbdev/Makefile b/src/mesa/drivers/fbdev/Makefile index ac43dfdb67..c0ef54f604 100644 --- a/src/mesa/drivers/fbdev/Makefile +++ b/src/mesa/drivers/fbdev/Makefile @@ -24,7 +24,7 @@ CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mesa/libglapi.a default: $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) -$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(CORE_OBJECTS) $(OBJECTS) +$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(CORE_MESA) $(OBJECTS) @ $(TOP)/bin/mklib -o $(GL_LIB) \ -linker '$(CC)' -ldflags '$(LDFLAGS)' \ -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ -- cgit v1.2.3 From a80ccaadc77acbf835aa7139d75ea166efd8553f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 9 Jun 2008 13:50:31 -0600 Subject: more old stuff, rename SOLO->MESA, etc --- src/mesa/sources | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/sources b/src/mesa/sources index 477f76bb3d..122fdd1c04 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -241,28 +241,11 @@ SPARC_API = \ COMMON_DRIVER_SOURCES = \ drivers/common/driverfuncs.c -SVGA_DRIVER_SOURCES = \ - drivers/svga/svgamesa.c \ - drivers/svga/svgamesa8.c \ - drivers/svga/svgamesa15.c \ - drivers/svga/svgamesa16.c \ - drivers/svga/svgamesa24.c \ - drivers/svga/svgamesa32.c - -FBDEV_DRIVER_SOURCES = \ - drivers/fbdev/glfbdev.c ### All the core C sources -ALL_SOURCES = \ - $(GLAPI_SOURCES) \ - $(SOLO_SOURCES) \ - $(ASM_SOURCES) \ - $(COMMON_DRIVER_SOURCES)\ - $(FBDEV_DRIVER_SOURCES) - -SOLO_SOURCES = \ +MESA_SOURCES = \ $(MAIN_SOURCES) \ $(MATH_SOURCES) \ $(VBO_SOURCES) \ @@ -273,27 +256,27 @@ SOLO_SOURCES = \ $(ASM_C_SOURCES) \ $(SLANG_SOURCES) -CORE_SOURCES = \ +ALL_SOURCES = \ + $(MESA_SOURCES) \ $(GLAPI_SOURCES) \ - $(SOLO_SOURCES) + $(ASM_SOURCES) \ + $(COMMON_DRIVER_SOURCES) + ### Object files -SOLO_OBJECTS = \ - $(SOLO_SOURCES:.c=.o) \ +MESA_OBJECTS = \ + $(MESA_SOURCES:.c=.o) \ $(ASM_SOURCES:.S=.o) GLAPI_OBJECTS = \ $(GLAPI_SOURCES:.c=.o) \ $(ASM_API:.S=.o) -CORE_OBJECTS = $(SOLO_OBJECTS) $(GLAPI_OBJECTS) COMMON_DRIVER_OBJECTS = $(COMMON_DRIVER_SOURCES:.c=.o) -FBDEV_DRIVER_OBJECTS = $(FBDEV_DRIVER_SOURCES:.c=.o) - ### Include directories -- cgit v1.2.3 From 910b3bfaeb372ef0e20c6b05c64c3721b739a442 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 9 Jun 2008 13:50:44 -0600 Subject: Makefile clean-ups --- src/mesa/drivers/directfb/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/directfb/Makefile b/src/mesa/drivers/directfb/Makefile index 945eb8b3d9..ece0457cff 100644 --- a/src/mesa/drivers/directfb/Makefile +++ b/src/mesa/drivers/directfb/Makefile @@ -36,10 +36,10 @@ default: directfb-libgl directfbgl_mesa # XXX this used to be in src/mesa/Makefile and is probably broken now -directfb-libgl: $(CORE_OBJECTS) +directfb-libgl: $(LIBS) @ $(TOP)/bin/mklib -o $(GL_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ - -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) $(CORE_OBJECTS) \ + -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) $(LIBS) \ $(GL_LIB_DEPS) -- cgit v1.2.3 From f6246f85d7c87180b464f84f330385680a82a601 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 9 Jun 2008 14:09:09 -0600 Subject: s/SOLO/MESA/ --- src/mesa/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/Makefile b/src/mesa/Makefile index d39f282b33..a301625179 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -29,8 +29,8 @@ default: depend asm_subdirs libmesa.a libglapi.a driver_subdirs # Helper libraries used by many drivers: # Make archive of core mesa object files -libmesa.a: $(SOLO_OBJECTS) - @ $(TOP)/bin/mklib -o mesa -static $(SOLO_OBJECTS) +libmesa.a: $(MESA_OBJECTS) + @ $(TOP)/bin/mklib -o mesa -static $(MESA_OBJECTS) # Make archive of gl* API dispatcher functions only libglapi.a: $(GLAPI_OBJECTS) -- cgit v1.2.3 From 72f87b69471649ff02e80a89f902b69980f3d025 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 9 Jun 2008 16:06:44 -0600 Subject: fix Xlib libGL.so build problem. Also, build driverfuncs.c into libmesa.a since it's always needed. --- src/mesa/drivers/dri/Makefile.template | 1 - src/mesa/drivers/x11/Makefile | 2 +- src/mesa/sources | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template index 53f9d80689..07e181e2cd 100644 --- a/src/mesa/drivers/dri/Makefile.template +++ b/src/mesa/drivers/dri/Makefile.template @@ -3,7 +3,6 @@ MESA_MODULES = $(TOP)/src/mesa/libmesa.a COMMON_SOURCES = \ - ../../common/driverfuncs.c \ ../common/utils.c \ ../common/texmem.c \ ../common/vblank.c \ diff --git a/src/mesa/drivers/x11/Makefile b/src/mesa/drivers/x11/Makefile index ce5366e332..51226eeae1 100644 --- a/src/mesa/drivers/x11/Makefile +++ b/src/mesa/drivers/x11/Makefile @@ -59,7 +59,7 @@ $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(OBJECTS) $(CORE_MESA) -linker '$(CC)' -ldflags '$(LDFLAGS)' \ -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ -install $(TOP)/$(LIB_DIR) \ - $(MKLIB_OPTIONS) $(GL_LIB_DEPS) $(OBJECTS) + $(MKLIB_OPTIONS) $(GL_LIB_DEPS) $(OBJECTS) $(CORE_MESA) diff --git a/src/mesa/sources b/src/mesa/sources index 122fdd1c04..2db1843239 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -253,6 +253,7 @@ MESA_SOURCES = \ $(SHADER_SOURCES) \ $(SWRAST_SOURCES) \ $(SWRAST_SETUP_SOURCES) \ + $(COMMON_DRIVER_SOURCES) \ $(ASM_C_SOURCES) \ $(SLANG_SOURCES) -- cgit v1.2.3 From a742bed99ae840d806198172005f6b25399ec573 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 10 Jun 2008 16:31:36 +0800 Subject: i965: apply commit 6c1a98e97affb2163e776551eb3a9e669ff99bbf to glsl --- src/mesa/drivers/dri/i965/brw_wm_glsl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index b2ffc82ed2..faa2837fa0 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -978,7 +978,7 @@ static void emit_wpos_xy(struct brw_wm_compile *c, brw_ADD(p, dst[0], retype(src0[0], BRW_REGISTER_TYPE_W), - brw_imm_d(- c->key.origin_x)); + brw_imm_d(0 - c->key.origin_x)); } if (mask & WRITEMASK_Y) { -- cgit v1.2.3 From c233b4245f7fcb667260278f3007d21044e62142 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 10 Jun 2008 10:09:03 -0600 Subject: add some missing rule dependencies --- src/mesa/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/Makefile b/src/mesa/Makefile index a301625179..5f84f72bb2 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -29,17 +29,17 @@ default: depend asm_subdirs libmesa.a libglapi.a driver_subdirs # Helper libraries used by many drivers: # Make archive of core mesa object files -libmesa.a: $(MESA_OBJECTS) +libmesa.a: $(MESA_OBJECTS) asm_subdirs @ $(TOP)/bin/mklib -o mesa -static $(MESA_OBJECTS) # Make archive of gl* API dispatcher functions only -libglapi.a: $(GLAPI_OBJECTS) +libglapi.a: $(GLAPI_OBJECTS) asm_subdirs @ $(TOP)/bin/mklib -o glapi -static $(GLAPI_OBJECTS) ###################################################################### # Device drivers -driver_subdirs: +driver_subdirs: libmesa.a libglapi.a (cd drivers && $(MAKE)) -- cgit v1.2.3 From 9563449c14f06b0927adecf36711c034b561e121 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 10 Jun 2008 12:08:30 -0600 Subject: remove unnecessary swrast, tnl includes --- src/mesa/vbo/vbo_save_loopback.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/vbo/vbo_save_loopback.c b/src/mesa/vbo/vbo_save_loopback.c index f2cef698fb..92ca4ea95d 100644 --- a/src/mesa/vbo/vbo_save_loopback.c +++ b/src/mesa/vbo/vbo_save_loopback.c @@ -25,9 +25,6 @@ * **************************************************************************/ -#include "swrast_setup/swrast_setup.h" -#include "swrast/swrast.h" -#include "tnl/tnl.h" #include "main/context.h" #include "main/glheader.h" #include "main/enums.h" -- cgit v1.2.3 From 8d976aedc4654bc66b80f22690e3674f1ef46183 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 11 Jun 2008 19:33:14 -0600 Subject: comments --- src/mesa/drivers/dri/intel/intel_screen.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index 52e062eece..5233e58fc9 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -291,6 +291,10 @@ intelUpdateScreenFromSAREA(intelScreenPrivate * intelScreen, intelPrintSAREA(sarea); } + +/** + * DRI2 entrypoint + */ static void intelHandleDrawableConfig(__DRIdrawablePrivate *dPriv, __DRIcontextPrivate *pcp, @@ -332,6 +336,9 @@ intelHandleDrawableConfig(__DRIdrawablePrivate *dPriv, #define BUFFER_FLAG_TILED 0x0100 +/** + * DRI2 entrypoint + */ static void intelHandleBufferAttach(__DRIdrawablePrivate *dPriv, __DRIcontextPrivate *pcp, @@ -728,6 +735,7 @@ intelFillInModes(__DRIscreenPrivate *psp, /** * This is the driver specific part of the createNewScreen entry point. + * Called when using legacy DRI. * * \todo maybe fold this into intelInitDriver * @@ -791,7 +799,8 @@ struct intel_context *intelScreenContext(intelScreenPrivate *intelScreen) /** * This is the driver specific part of the createNewScreen entry point. - * + * Called when using DRI2. + * * \return the __GLcontextModes supported by this driver */ static const -- cgit v1.2.3 From 3f3e4f645d389672e6d72215e60b812a887cc66d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 11 Jun 2008 19:33:30 -0600 Subject: remove obsoleted doxygen \param comments Someone changed the function parameters but didn't bother to update the comments. Also, whitespace changes, clean-ups. --- src/mesa/drivers/dri/common/dri_util.c | 41 +++++++++++++++------------------- 1 file changed, 18 insertions(+), 23 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index e2beae380e..10d2e801dc 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -277,9 +277,9 @@ __driUtilUpdateDrawableInfo(__DRIdrawablePrivate *pdp) pdp->pStamp = &(psp->pSAREA->drawableTable[pdp->index].stamp); DRM_SPINLOCK(&psp->pSAREA->drawable_lock, psp->drawLockID); - } + int __driParseEvents(__DRIcontextPrivate *pcp, __DRIdrawablePrivate *pdp) { @@ -471,6 +471,7 @@ static int driDrawableGetMSC( __DRIscreen *sPriv, __DRIdrawable *dPriv, return sPriv->DriverAPI.GetDrawableMSC(sPriv, dPriv, msc); } + static int driWaitForMSC(__DRIdrawable *dPriv, int64_t target_msc, int64_t divisor, int64_t remainder, int64_t * msc, int64_t * sbc) @@ -478,7 +479,6 @@ static int driWaitForMSC(__DRIdrawable *dPriv, int64_t target_msc, __DRIswapInfo sInfo; int status; - status = dPriv->driScreenPriv->DriverAPI.WaitForMSC( dPriv, target_msc, divisor, remainder, msc ); @@ -496,12 +496,14 @@ static int driWaitForMSC(__DRIdrawable *dPriv, int64_t target_msc, return status; } + const __DRImediaStreamCounterExtension driMediaStreamCounterExtension = { { __DRI_MEDIA_STREAM_COUNTER, __DRI_MEDIA_STREAM_COUNTER_VERSION }, driWaitForMSC, driDrawableGetMSC, }; + static void driCopySubBuffer(__DRIdrawable *dPriv, int x, int y, int w, int h) { @@ -595,6 +597,7 @@ driCreateNewDrawable(__DRIscreen *psp, const __DRIconfig *config, return pdp; } + static __DRIdrawable * dri2CreateNewDrawable(__DRIscreen *screen, const __DRIconfig *config, unsigned int drawable_id, unsigned int head, void *data) @@ -644,8 +647,6 @@ driDestroyDrawable(__DRIdrawable *pdp) /** * Destroy the per-context private information. * - * \param contextPrivate opaque pointer to the per-drawable private info. - * * \internal * This function calls __DriverAPIRec::DestroyContext on \p contextPrivate, calls * drmDestroyContext(), and finally frees \p contextPrivate. @@ -663,13 +664,9 @@ driDestroyContext(__DRIcontext *pcp) /** * Create the per-drawable private driver information. * - * \param dpy The display handle. - * \param modes Mode used to create the new context. * \param render_type Type of rendering target. \c GLX_RGBA is the only * type likely to ever be supported for direct-rendering. - * \param shared The shared context dependent methods or \c NULL if - * non-existent. - * \param pctx DRI context to receive the context dependent methods. + * \param shared Context with which to share textures, etc. or NULL * * \returns An opaque pointer to the per-context private information on * success, or \c NULL on failure. @@ -718,6 +715,7 @@ driCreateNewContext(__DRIscreen *psp, const __DRIconfig *config, return pcp; } + static __DRIcontext * dri2CreateNewContext(__DRIscreen *screen, const __DRIconfig *config, __DRIcontext *shared, void *data) @@ -736,6 +734,7 @@ dri2CreateNewContext(__DRIscreen *screen, const __DRIconfig *config, return driCreateNewContext(screen, config, 0, shared, hwContext, data); } + static int driCopyContext(__DRIcontext *dest, __DRIcontext *src, unsigned long mask) { @@ -753,10 +752,6 @@ driCopyContext(__DRIcontext *dest, __DRIcontext *src, unsigned long mask) /** * Destroy the per-screen private information. * - * \param dpy the display handle. - * \param scrn the screen number. - * \param screenPrivate opaque pointer to the per-screen private information. - * * \internal * This function calls __DriverAPIRec::DestroyScreen on \p screenPrivate, calls * drmClose(), and finally frees \p screenPrivate. @@ -811,15 +806,10 @@ setupLoaderExtensions(__DRIscreen *psp, * This routine also fills in the linked list pointed to by \c driver_modes * with the \c __GLcontextModes that the driver can support for windows or * pbuffers. + * + * For legacy DRI. * * \param scrn Index of the screen - * \param psc DRI screen data (not driver private) - * \param modes Linked list of known display modes. This list is, at a - * minimum, a list of modes based on the current display mode. - * These roughly match the set of available X11 visuals, but it - * need not be limited to X11! The calling libGL should create - * a list that will inform the driver of the current display - * mode (i.e., color buffer depth, depth buffer depth, etc.). * \param ddx_version Version of the 2D DDX. This may not be meaningful for * all drivers. * \param dri_version Version of the "server-side" DRI. @@ -828,9 +818,9 @@ setupLoaderExtensions(__DRIscreen *psp, * framebuffer. * \param pSAREA Pointer the the SAREA. * \param fd Device handle for the DRM. - * \param internal_api_version Version of the internal interface between the - * driver and libGL. - * \param driverAPI Driver API functions used by other routines in dri_util.c. + * \param extensions ?? + * \param driver_modes Returns modes suppoted by the driver + * \param loaderPrivate ?? * * \note There is no need to check the minimum API version in this * function. Since the name of this function is versioned, it is @@ -903,6 +893,9 @@ driCreateNewScreen(int scrn, } +/** + * DRI2 + */ static __DRIscreen * dri2CreateNewScreen(int scrn, int fd, unsigned int sarea_handle, const __DRIextension **extensions, @@ -986,6 +979,7 @@ static const __DRIextension **driGetExtensions(__DRIscreen *psp) return psp->extensions; } +/** Legacy DRI interface */ const __DRIlegacyExtension driLegacyExtension = { { __DRI_LEGACY, __DRI_LEGACY_VERSION }, driCreateNewScreen, @@ -993,6 +987,7 @@ const __DRIlegacyExtension driLegacyExtension = { driCreateNewContext }; +/** DRI2 interface */ const __DRIcoreExtension driCoreExtension = { { __DRI_CORE, __DRI_CORE_VERSION }, dri2CreateNewScreen, -- cgit v1.2.3 From 74964ff04d89be430944dd8106bf5c97a9933f85 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 10 Jun 2008 16:59:44 -0400 Subject: glsl: implement variable array indexes --- src/mesa/shader/arbprogparse.c | 2 +- src/mesa/shader/prog_print.c | 17 +++++++++++------ src/mesa/shader/slang/slang_emit.c | 16 +++++++++++----- src/mesa/shader/slang/slang_ir.h | 1 + 4 files changed, 24 insertions(+), 12 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c index 60aaabe679..97bda9c6b3 100644 --- a/src/mesa/shader/arbprogparse.c +++ b/src/mesa/shader/arbprogparse.c @@ -3869,7 +3869,7 @@ _mesa_parse_arb_fragment_program(GLcontext* ctx, GLenum target, program->Base.Parameters = ap.Base.Parameters; #if DEBUG_FP - _mesa_printf("____________Fragment program %u ________\n", program->Base.ID); + _mesa_printf("____________Fragment program %u ________\n", program->Base.Id); _mesa_print_program(&program->Base); #endif } diff --git a/src/mesa/shader/prog_print.c b/src/mesa/shader/prog_print.c index 1c35ce3fec..09bf15f004 100644 --- a/src/mesa/shader/prog_print.c +++ b/src/mesa/shader/prog_print.c @@ -206,7 +206,7 @@ arb_output_attrib_string(GLint index, GLenum progType) */ static const char * reg_string(enum register_file f, GLint index, gl_prog_print_mode mode, - const struct gl_program *prog) + GLint relAddr, const struct gl_program *prog) { static char str[100]; @@ -214,7 +214,10 @@ reg_string(enum register_file f, GLint index, gl_prog_print_mode mode, switch (mode) { case PROG_PRINT_DEBUG: - sprintf(str, "%s[%d]", file_string(f, mode), index); + if (relAddr) + sprintf(str, "%s[ADDR%s%d]", file_string(f, mode), (index > 0) ? "+" : "", index); + else + sprintf(str, "%s[%d]", file_string(f, mode), index); break; case PROG_PRINT_ARB: @@ -401,7 +404,7 @@ print_dst_reg(const struct prog_dst_register *dstReg, gl_prog_print_mode mode, { _mesa_printf("%s%s", reg_string((enum register_file) dstReg->File, - dstReg->Index, mode, prog), + dstReg->Index, mode, GL_FALSE, prog), writemask_string(dstReg->WriteMask)); if (dstReg->CondMask != COND_TR) { @@ -424,9 +427,9 @@ print_src_reg(const struct prog_src_register *srcReg, gl_prog_print_mode mode, { _mesa_printf("%s%s", reg_string((enum register_file) srcReg->File, - srcReg->Index, mode, prog), + srcReg->Index, mode, srcReg->RelAddr, prog), _mesa_swizzle_string(srcReg->Swizzle, - srcReg->NegateBase, GL_FALSE)); + srcReg->NegateBase, GL_FALSE)); #if 0 _mesa_printf("%s[%d]%s", file_string((enum register_file) srcReg->File, mode), @@ -590,7 +593,9 @@ _mesa_print_instruction_opt(const struct prog_instruction *inst, GLint indent, break; case OPCODE_ARL: - _mesa_printf("ARL addr.x, "); + _mesa_printf("ARL "); + print_dst_reg(&inst->DstReg, mode, prog); + _mesa_printf(", "); print_src_reg(&inst->SrcReg[0], mode, prog); print_comment(inst); break; diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c index ff63e05dd2..93256f8647 100644 --- a/src/mesa/shader/slang/slang_emit.c +++ b/src/mesa/shader/slang/slang_emit.c @@ -223,6 +223,7 @@ storage_to_src_reg(struct prog_src_register *src, const slang_ir_storage *st) assert(st->Size <= 4); src->File = st->File; src->Index = st->Index; + src->RelAddr = st->RelAddr; if (st->Swizzle != SWIZZLE_NOOP) src->Swizzle = st->Swizzle; else @@ -1488,11 +1489,16 @@ emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n) n->Store->Index = arrayAddr + index; } else { - /* Variable index - PROBLEM */ - const GLint arrayAddr = n->Children[0]->Store->Index; - const GLint index = 0; - _mesa_problem(NULL, "variable array indexes not supported yet!"); - n->Store->Index = arrayAddr + index; + /* Variable index*/ + struct prog_instruction *inst; + inst = new_instruction(emitInfo, OPCODE_ARL); + storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); + storage_to_src_reg(&inst->SrcReg[0], n->Children[1]->Store); + inst->DstReg.File = PROGRAM_ADDRESS; + inst->Comment = _mesa_strdup("ARL ADDR"); + n->Store->RelAddr = GL_TRUE; + n->Store->Index = inst->DstReg.Index;/*index of the array*/ + inst->DstReg.Index = 0; /*addr index is always 0*/ } return NULL; /* no instruction */ } diff --git a/src/mesa/shader/slang/slang_ir.h b/src/mesa/shader/slang/slang_ir.h index c7c0ddbf9a..ba0735d64d 100644 --- a/src/mesa/shader/slang/slang_ir.h +++ b/src/mesa/shader/slang/slang_ir.h @@ -146,6 +146,7 @@ struct _slang_ir_storage GLint Size; /**< number of floats */ GLuint Swizzle; GLint RefCount; /**< Used during IR tree delete */ + GLboolean RelAddr; }; typedef struct _slang_ir_storage slang_ir_storage; -- cgit v1.2.3 From 2542c59c55589bdeaf56b9cf91625b0adf75e3ab Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 12 Jun 2008 00:03:35 -0400 Subject: shaders: fix allocation of arrays of parameters --- src/mesa/shader/prog_parameter.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/shader/prog_parameter.c b/src/mesa/shader/prog_parameter.c index 152bd79f69..afdd7e5489 100644 --- a/src/mesa/shader/prog_parameter.c +++ b/src/mesa/shader/prog_parameter.c @@ -62,6 +62,28 @@ _mesa_free_parameter_list(struct gl_program_parameter_list *paramList) } +static GLint +_mesa_fit_type_in_vec4(GLenum type) +{ + switch (type) { + case GL_FLOAT: + case GL_INT: + return 4; + break; + case GL_FLOAT_VEC2: + case GL_INT_VEC2: + return 2; + break; + case GL_FLOAT_VEC3: + case GL_INT_VEC3: + return 1; + break; + case GL_FLOAT_VEC4: + case GL_INT_VEC4: + default: + return 1; + } +} /** * Add a new parameter to a parameter list. @@ -272,7 +294,7 @@ _mesa_add_uniform(struct gl_program_parameter_list *paramList, } else { i = _mesa_add_parameter(paramList, PROGRAM_UNIFORM, name, - size, datatype, NULL, NULL); + size * _mesa_fit_type_in_vec4(datatype), datatype, NULL, NULL); return i; } } -- cgit v1.2.3 From 19659a50f9356023c59942794bd4e6e075f120bd Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 12 Jun 2008 14:19:10 -0400 Subject: almost forgot - RelAddr is a boolean so use it here --- src/mesa/shader/prog_print.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/shader/prog_print.c b/src/mesa/shader/prog_print.c index 09bf15f004..2c5e03acc2 100644 --- a/src/mesa/shader/prog_print.c +++ b/src/mesa/shader/prog_print.c @@ -206,7 +206,7 @@ arb_output_attrib_string(GLint index, GLenum progType) */ static const char * reg_string(enum register_file f, GLint index, gl_prog_print_mode mode, - GLint relAddr, const struct gl_program *prog) + GLboolean relAddr, const struct gl_program *prog) { static char str[100]; -- cgit v1.2.3 From effb73befa56099ba78ef2d93be4f980dad157b3 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sun, 8 Jun 2008 23:05:15 -0400 Subject: glsl: fix array size initialiazers using const variables e.g. const int kernelSize = 9; uniform vec2 kernel[kernelSize]; --- src/mesa/shader/slang/slang_compile.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/shader/slang/slang_compile.c b/src/mesa/shader/slang/slang_compile.c index 46b5c54bbe..f2e8aab8b0 100644 --- a/src/mesa/shader/slang/slang_compile.c +++ b/src/mesa/shader/slang/slang_compile.c @@ -258,9 +258,33 @@ parse_array_len(slang_parse_ctx * C, slang_output_ctx * O, GLuint * len) /* evaluate compile-time expression which is array size */ _slang_simplify(&array_size, &space, C->atoms); - result = (array_size.type == SLANG_OPER_LITERAL_INT); - *len = (GLint) array_size.literal[0]; + if (array_size.type == SLANG_OPER_LITERAL_INT) { + result = GL_TRUE; + *len = (GLint) array_size.literal[0]; + } else if (array_size.type == SLANG_OPER_IDENTIFIER) { + slang_variable *var = _slang_locate_variable(array_size.locals, array_size.a_id, GL_TRUE); + if (!var) { + slang_info_log_error(C->L, "undefined variable '%s'", + (char *) array_size.a_id); + result = GL_FALSE; + } else if (var->type.qualifier == SLANG_QUAL_CONST && + var->type.specifier.type == SLANG_SPEC_INT) { + if (var->initializer && + var->initializer->type == SLANG_OPER_LITERAL_INT) { + *len = (GLint) var->initializer->literal[0]; + result = GL_TRUE; + } else { + slang_info_log_error(C->L, "unable to parse array size declaration"); + result = GL_FALSE; + } + } else { + slang_info_log_error(C->L, "unable to parse array size declaration"); + result = GL_FALSE; + } + } else { + result = GL_FALSE; + } slang_operation_destruct(&array_size); return result; -- cgit v1.2.3 From 53f5b9741a5453f9166aee8da6e1f8bf5b10d59e Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 12 Jun 2008 19:37:26 -0400 Subject: glsl: allow uniforms --- src/mesa/shader/prog_execute.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/shader/prog_execute.c b/src/mesa/shader/prog_execute.c index 8ce2ca3964..4575a069ea 100644 --- a/src/mesa/shader/prog_execute.c +++ b/src/mesa/shader/prog_execute.c @@ -90,7 +90,8 @@ get_register_pointer(const struct prog_src_register *source, const struct gl_program_parameter_list *params; ASSERT(source->File == PROGRAM_LOCAL_PARAM || source->File == PROGRAM_CONSTANT || - source->File == PROGRAM_STATE_VAR); + source->File == PROGRAM_STATE_VAR || + source->File == PROGRAM_UNIFORM); params = machine->CurProgram->Parameters; if (reg < 0 || reg >= params->NumParameters) return ZeroVec; -- cgit v1.2.3 From 928ec5cd3be5c7d6f3d9df0ace18371b1ec8c68c Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 12 Jun 2008 18:53:52 -0400 Subject: glsl: make sure we replace all output reads with temporaries test in if.glsl --- src/mesa/shader/programopt.c | 12 +++++++----- src/mesa/shader/programopt.h | 2 +- src/mesa/shader/slang/slang_compile.c | 3 ++- 3 files changed, 10 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/shader/programopt.c b/src/mesa/shader/programopt.c index 7d560c74a5..da4e3fcfcf 100644 --- a/src/mesa/shader/programopt.c +++ b/src/mesa/shader/programopt.c @@ -367,20 +367,22 @@ _mesa_count_texture_instructions(struct gl_program *prog) /** - * Scan/rewrite program to remove reads of varying (output) registers. + * Scan/rewrite program to remove reads of custom (output) registers. + * The passed type has to be either PROGRAM_VARYING or PROGRAM_OUTPUT. * In GLSL vertex shaders, varying vars can be read and written. * Normally, vertex varying vars are implemented as output registers. * On some hardware, trying to read an output register causes trouble. * So, rewrite the program to use a temporary register in this case. */ void -_mesa_remove_varying_reads(struct gl_program *prog) +_mesa_remove_output_reads(struct gl_program *prog, enum register_file type) { GLuint i; GLint outputMap[VERT_RESULT_MAX]; GLuint numVaryingReads = 0; assert(prog->Target == GL_VERTEX_PROGRAM_ARB); + assert(type == PROGRAM_UNIFORM || type == PROGRAM_OUTPUT); for (i = 0; i < VERT_RESULT_MAX; i++) outputMap[i] = -1; @@ -391,7 +393,7 @@ _mesa_remove_varying_reads(struct gl_program *prog) const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); GLuint j; for (j = 0; j < numSrc; j++) { - if (inst->SrcReg[j].File == PROGRAM_VARYING) { + if (inst->SrcReg[j].File == type) { /* replace the read with a temp reg */ const GLuint var = inst->SrcReg[j].Index; if (outputMap[var] == -1) { @@ -414,7 +416,7 @@ _mesa_remove_varying_reads(struct gl_program *prog) const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); GLuint j; for (j = 0; j < numSrc; j++) { - if (inst->DstReg.File == PROGRAM_VARYING && + if (inst->DstReg.File == type && outputMap[inst->DstReg.Index] >= 0) { /* change inst to write to the temp reg, instead of the varying */ inst->DstReg.File = PROGRAM_TEMPORARY; @@ -447,7 +449,7 @@ _mesa_remove_varying_reads(struct gl_program *prog) if (outputMap[var] >= 0) { /* MOV VAR[var], TEMP[tmp]; */ inst->Opcode = OPCODE_MOV; - inst->DstReg.File = PROGRAM_VARYING; + inst->DstReg.File = type; inst->DstReg.Index = var; inst->SrcReg[0].File = PROGRAM_TEMPORARY; inst->SrcReg[0].Index = outputMap[var]; diff --git a/src/mesa/shader/programopt.h b/src/mesa/shader/programopt.h index 47ff2f0c7b..11572e64f5 100644 --- a/src/mesa/shader/programopt.h +++ b/src/mesa/shader/programopt.h @@ -40,6 +40,6 @@ extern void _mesa_count_texture_instructions(struct gl_program *prog); extern void -_mesa_remove_varying_reads(struct gl_program *prog); +_mesa_remove_output_reads(struct gl_program *prog, enum register_file type); #endif /* PROGRAMOPT_H */ diff --git a/src/mesa/shader/slang/slang_compile.c b/src/mesa/shader/slang/slang_compile.c index f2e8aab8b0..cdea1c5128 100644 --- a/src/mesa/shader/slang/slang_compile.c +++ b/src/mesa/shader/slang/slang_compile.c @@ -2212,7 +2212,8 @@ _slang_compile(GLcontext *ctx, struct gl_shader *shader) printf("Pre-remove output reads:\n"); _mesa_print_program(shader->Programs[0]); #endif - _mesa_remove_varying_reads(shader->Programs[0]); + _mesa_remove_output_reads(shader->Programs[0], PROGRAM_VARYING); + _mesa_remove_output_reads(shader->Programs[0], PROGRAM_OUTPUT); #if 0 printf("Post-remove output reads:\n"); _mesa_print_program(shader->Programs[0]); -- cgit v1.2.3 From 95b34baa8f981b08e21a36f9163d98a426559839 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 12 Jun 2008 19:41:54 -0400 Subject: silly bug: it's PROGRAM_VARYING --- src/mesa/shader/programopt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/shader/programopt.c b/src/mesa/shader/programopt.c index da4e3fcfcf..f3511ba00e 100644 --- a/src/mesa/shader/programopt.c +++ b/src/mesa/shader/programopt.c @@ -382,7 +382,7 @@ _mesa_remove_output_reads(struct gl_program *prog, enum register_file type) GLuint numVaryingReads = 0; assert(prog->Target == GL_VERTEX_PROGRAM_ARB); - assert(type == PROGRAM_UNIFORM || type == PROGRAM_OUTPUT); + assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT); for (i = 0; i < VERT_RESULT_MAX; i++) outputMap[i] = -1; -- cgit v1.2.3 From 341987f68842d662168aa6386723ef9ac8080a93 Mon Sep 17 00:00:00 2001 From: George Sapountzis Date: Fri, 13 Jun 2008 15:48:34 +0300 Subject: dri/swrast: add OpenGL 2.1 functions --- src/mesa/drivers/dri/swrast/swrast.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index f41e331809..282db7f86c 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -43,6 +43,7 @@ #include "tnl/t_pipeline.h" #include "vbo/vbo.h" #include "drivers/common/driverfuncs.h" +#include "utils.h" #include "swrast_priv.h" @@ -51,6 +52,7 @@ #define need_GL_VERSION_1_4 #define need_GL_VERSION_1_5 #define need_GL_VERSION_2_0 +#define need_GL_VERSION_2_1 /* sw extensions for imaging */ #define need_GL_EXT_blend_color @@ -75,7 +77,6 @@ #define need_GL_NV_fragment_program #include "extension_helper.h" -#include "utils.h" const struct dri_extension card_extensions[] = { @@ -83,6 +84,7 @@ const struct dri_extension card_extensions[] = { "GL_VERSION_1_4", GL_VERSION_1_4_functions }, { "GL_VERSION_1_5", GL_VERSION_1_5_functions }, { "GL_VERSION_2_0", GL_VERSION_2_0_functions }, + { "GL_VERSION_2_1", GL_VERSION_2_1_functions }, { "GL_EXT_blend_color", GL_EXT_blend_color_functions }, { "GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions }, -- cgit v1.2.3 From 7ffb1230b3287a72d9ac59c5d830f7a4155cbdf9 Mon Sep 17 00:00:00 2001 From: George Sapountzis Date: Fri, 13 Jun 2008 15:52:32 +0300 Subject: dri/swrast: make unbind a noop This is for loading swrast_dri.so from libGL. MakeContextCurrent() seems to unbind the context right after binding it and DRI drivers also have a noop DriverAPI.UnbindContext ... --- src/mesa/drivers/dri/swrast/swrast.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index 282db7f86c..304d07729e 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -695,7 +695,6 @@ static int driUnbindContext(__DRIcontext *ctx) { TRACE; (void) ctx; - _mesa_make_current(NULL, NULL, NULL); return GL_TRUE; } -- cgit v1.2.3 From b1056b6127d2bd6fa39ba7d88b5a49cd2765c469 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 7 Jun 2008 14:17:56 +0200 Subject: r300: Reenable t->filter_1 (though currently not used) --- src/mesa/drivers/dri/r300/r300_state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index b36ca7aef8..c3f1cc9e28 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1441,8 +1441,7 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 + hw_tmu] = gen_fixed_filter(t->filter) | (hw_tmu << 28); - /* Currently disabled! */ - r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; //0x20501f80; + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->filter_1; r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->size; r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + -- cgit v1.2.3 From 4919cc3a2c5e779834a3ff362e417f36f12b2b17 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 13 Jun 2008 18:41:16 +0200 Subject: r300: Fix depth texture GL_LUMINANCE vs. GL_INTENSITY mixup --- src/mesa/drivers/dri/r300/r300_texstate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index 2589ec572e..bdd20b18e4 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -127,18 +127,18 @@ void r300SetDepthTexMode(struct gl_texture_object *tObj) { static const GLuint formats[3][3] = { { - R300_EASY_TX_FORMAT(X, X, X, X, X16), R300_EASY_TX_FORMAT(X, X, X, ONE, X16), + R300_EASY_TX_FORMAT(X, X, X, X, X16), R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X16), }, { - R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8), R300_EASY_TX_FORMAT(X, X, X, ONE, X24_Y8), + R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8), R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X24_Y8), }, { - R300_EASY_TX_FORMAT(X, X, X, X, X32), R300_EASY_TX_FORMAT(X, X, X, ONE, X32), + R300_EASY_TX_FORMAT(X, X, X, X, X32), R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X32), }, }; -- cgit v1.2.3 From b7669e4a8637a9680bcef0d0db82ae5e1984741c Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 13 Jun 2008 20:49:52 +0200 Subject: r300/r500: Separate fragprog compiler structures r500 code still used r300_pfs_compile_state, which contains some fields that really only make sense on r3xx type hardware. In order to allow both fragprog implementations to go forward without disturbing each other, I've pushed this structure down into the respective r[3|5]00_fragprog.c --- src/mesa/drivers/dri/r300/r300_context.h | 83 ++------------------------ src/mesa/drivers/dri/r300/r300_fragprog.c | 92 ++++++++++++++++++++++++++--- src/mesa/drivers/dri/r300/r500_fragprog.c | 98 +++++++++++++++++++++++++++---- 3 files changed, 176 insertions(+), 97 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 3ac87d173b..2d51bad52e 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -654,83 +654,7 @@ struct r300_vertex_program_cont { #define PFS_NUM_TEMP_REGS 32 #define PFS_NUM_CONST_REGS 16 -/* Mapping Mesa registers to R300 temporaries */ -struct reg_acc { - int reg; /* Assigned hw temp */ - unsigned int refcount; /* Number of uses by mesa program */ -}; - -/** - * Describe the current lifetime information for an R300 temporary - */ -struct reg_lifetime { - /* Index of the first slot where this register is free in the sense - that it can be used as a new destination register. - This is -1 if the register has been assigned to a Mesa register - and the last access to the register has not yet been emitted */ - int free; - - /* Index of the first slot where this register is currently reserved. - This is used to stop e.g. a scalar operation from being moved - before the allocation time of a register that was first allocated - for a vector operation. */ - int reserved; - - /* Index of the first slot in which the register can be used as a - source without losing the value that is written by the last - emitted instruction that writes to the register */ - int vector_valid; - int scalar_valid; - - /* Index to the slot where the register was last read. - This is also the first slot in which the register may be written again */ - int vector_lastread; - int scalar_lastread; -}; - -/** - * Store usage information about an ALU instruction slot during the - * compilation of a fragment program. - */ -#define SLOT_SRC_VECTOR (1<<0) -#define SLOT_SRC_SCALAR (1<<3) -#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) -#define SLOT_OP_VECTOR (1<<16) -#define SLOT_OP_SCALAR (1<<17) -#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) - -struct r300_pfs_compile_slot { - /* Bitmask indicating which parts of the slot are used, using SLOT_ constants - defined above */ - unsigned int used; - - /* Selected sources */ - int vsrc[3]; - int ssrc[3]; -}; - -/** - * Store information during compilation of fragment programs. - */ -struct r300_pfs_compile_state { - int nrslots; /* number of ALU slots used so far */ - - /* Track which (parts of) slots are already filled with instructions */ - struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST]; - - /* Track the validity of R300 temporaries */ - struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; - - /* Used to map Mesa's inputs/temps onto hardware temps */ - int temp_in_use; - struct reg_acc temps[PFS_NUM_TEMP_REGS]; - struct reg_acc inputs[32]; /* don't actually need 32... */ - - /* Track usage of hardware temps, for register allocation, - * indirection detection, etc. */ - GLuint used_in_node; - GLuint dest_in_node; -}; +struct r300_pfs_compile_state; /** * Store everything about a fragment program that is needed @@ -789,13 +713,15 @@ struct r300_fragment_program { GLuint optimization; }; +struct r500_pfs_compile_state; + struct r500_fragment_program { struct gl_fragment_program mesa_program; GLcontext *ctx; GLboolean translated; GLboolean error; - struct r300_pfs_compile_state *cs; + struct r500_pfs_compile_state *cs; struct { GLuint inst0; @@ -840,7 +766,6 @@ struct r300_state { struct r300_texture_state texture; int sw_tcl_inputs[VERT_ATTRIB_MAX]; struct r300_vertex_shader_state vertex_shader; - struct r300_pfs_compile_state pfs_compile; struct r300_dma_region aos[R300_MAX_AOS_ARRAYS]; int aos_count; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 54b80d20a1..df8ab47b0f 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -52,6 +52,85 @@ #include "r300_reg.h" #include "r300_state.h" +/* Mapping Mesa registers to R300 temporaries */ +struct reg_acc { + int reg; /* Assigned hw temp */ + unsigned int refcount; /* Number of uses by mesa program */ +}; + +/** + * Describe the current lifetime information for an R300 temporary + */ +struct reg_lifetime { + /* Index of the first slot where this register is free in the sense + that it can be used as a new destination register. + This is -1 if the register has been assigned to a Mesa register + and the last access to the register has not yet been emitted */ + int free; + + /* Index of the first slot where this register is currently reserved. + This is used to stop e.g. a scalar operation from being moved + before the allocation time of a register that was first allocated + for a vector operation. */ + int reserved; + + /* Index of the first slot in which the register can be used as a + source without losing the value that is written by the last + emitted instruction that writes to the register */ + int vector_valid; + int scalar_valid; + + /* Index to the slot where the register was last read. + This is also the first slot in which the register may be written again */ + int vector_lastread; + int scalar_lastread; +}; + +/** + * Store usage information about an ALU instruction slot during the + * compilation of a fragment program. + */ +#define SLOT_SRC_VECTOR (1<<0) +#define SLOT_SRC_SCALAR (1<<3) +#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) +#define SLOT_OP_VECTOR (1<<16) +#define SLOT_OP_SCALAR (1<<17) +#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) + +struct r300_pfs_compile_slot { + /* Bitmask indicating which parts of the slot are used, using SLOT_ constants + defined above */ + unsigned int used; + + /* Selected sources */ + int vsrc[3]; + int ssrc[3]; +}; + +/** + * Store information during compilation of fragment programs. + */ +struct r300_pfs_compile_state { + int nrslots; /* number of ALU slots used so far */ + + /* Track which (parts of) slots are already filled with instructions */ + struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST]; + + /* Track the validity of R300 temporaries */ + struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; + + /* Used to map Mesa's inputs/temps onto hardware temps */ + int temp_in_use; + struct reg_acc temps[PFS_NUM_TEMP_REGS]; + struct reg_acc inputs[32]; /* don't actually need 32... */ + + /* Track usage of hardware temps, for register allocation, + * indirection detection, etc. */ + GLuint used_in_node; + GLuint dest_in_node; +}; + + /* * Usefull macros and values */ @@ -2093,7 +2172,7 @@ static void insert_wpos(struct gl_program *prog) */ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) { - struct r300_pfs_compile_state *cs = NULL; + COMPILE_STATE; struct gl_fragment_program *mp = &fp->mesa_program; struct prog_instruction *fpi; GLuint InputsRead = mp->Base.InputsRead; @@ -2105,7 +2184,6 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); fp->translated = GL_FALSE; fp->error = GL_FALSE; - fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile); fp->WritesDepth = GL_FALSE; fp->tex.length = 0; fp->cur_node = 0; @@ -2227,13 +2305,11 @@ static void update_params(struct r300_fragment_program *fp) void r300TranslateFragmentShader(r300ContextPtr r300, struct r300_fragment_program *fp) { - - struct r300_pfs_compile_state *cs = NULL; - if (!fp->translated) { + struct r300_pfs_compile_state cs; + fp->cs = &cs; init_program(r300, fp); - cs = fp->cs; if (parse_program(fp) == GL_FALSE) { dump_program(fp); @@ -2242,11 +2318,11 @@ void r300TranslateFragmentShader(r300ContextPtr r300, /* Finish off */ fp->node[fp->cur_node].alu_end = - cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; + cs.nrslots - fp->node[fp->cur_node].alu_offset - 1; if (fp->node[fp->cur_node].tex_end < 0) fp->node[fp->cur_node].tex_end = 0; fp->alu_offset = 0; - fp->alu_end = cs->nrslots - 1; + fp->alu_end = cs.nrslots - 1; fp->tex_offset = 0; fp->tex_end = fp->tex.length ? fp->tex.length - 1 : 0; assert(fp->node[fp->cur_node].alu_end >= 0); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 26513e74ae..70e45f3ea8 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -57,6 +57,85 @@ #include "r300_reg.h" #include "r300_state.h" +/* Mapping Mesa registers to R500 temporaries */ +struct reg_acc { + int reg; /* Assigned hw temp */ + unsigned int refcount; /* Number of uses by mesa program */ +}; + +/** + * Describe the current lifetime information for an R300 temporary + */ +struct reg_lifetime { + /* Index of the first slot where this register is free in the sense + that it can be used as a new destination register. + This is -1 if the register has been assigned to a Mesa register + and the last access to the register has not yet been emitted */ + int free; + + /* Index of the first slot where this register is currently reserved. + This is used to stop e.g. a scalar operation from being moved + before the allocation time of a register that was first allocated + for a vector operation. */ + int reserved; + + /* Index of the first slot in which the register can be used as a + source without losing the value that is written by the last + emitted instruction that writes to the register */ + int vector_valid; + int scalar_valid; + + /* Index to the slot where the register was last read. + This is also the first slot in which the register may be written again */ + int vector_lastread; + int scalar_lastread; +}; + +/** + * Store usage information about an ALU instruction slot during the + * compilation of a fragment program. + */ +#define SLOT_SRC_VECTOR (1<<0) +#define SLOT_SRC_SCALAR (1<<3) +#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) +#define SLOT_OP_VECTOR (1<<16) +#define SLOT_OP_SCALAR (1<<17) +#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) + +struct r500_pfs_compile_slot { + /* Bitmask indicating which parts of the slot are used, using SLOT_ constants + defined above */ + unsigned int used; + + /* Selected sources */ + int vsrc[3]; + int ssrc[3]; +}; + +/** + * Store information during compilation of fragment programs. + */ +struct r500_pfs_compile_state { + int nrslots; /* number of ALU slots used so far */ + + /* Track which (parts of) slots are already filled with instructions */ + struct r500_pfs_compile_slot slot[PFS_MAX_ALU_INST]; + + /* Track the validity of R300 temporaries */ + struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; + + /* Used to map Mesa's inputs/temps onto hardware temps */ + int temp_in_use; + struct reg_acc temps[PFS_NUM_TEMP_REGS]; + struct reg_acc inputs[32]; /* don't actually need 32... */ + + /* Track usage of hardware temps, for register allocation, + * indirection detection, etc. */ + GLuint used_in_node; + GLuint dest_in_node; +}; + + /* * Useful macros and values */ @@ -66,7 +145,7 @@ fp->error = GL_TRUE; \ } while(0) -#define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs +#define COMPILE_STATE struct r500_pfs_compile_state *cs = fp->cs #define R500_US_NUM_TEMP_REGS 128 #define R500_US_NUM_CONST_REGS 256 @@ -293,7 +372,7 @@ static void emit_tex(struct r500_fragment_program *fp, fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit) | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - + if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) fp->inst[counter].inst1 |= R500_TEX_UNSCALED; @@ -1227,7 +1306,7 @@ static GLboolean parse_program(struct r500_fragment_program *fp) static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) { - struct r300_pfs_compile_state *cs = NULL; + struct r500_pfs_compile_state *cs = fp->cs; struct gl_fragment_program *mp = &fp->mesa_program; struct prog_instruction *fpi; GLuint InputsRead = mp->Base.InputsRead; @@ -1239,7 +1318,6 @@ static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); fp->translated = GL_FALSE; fp->error = GL_FALSE; - fp->cs = cs = &(R300_CONTEXT(fp->ctx)->state.pfs_compile); fp->const_nr = 0; /* Size of pixel stack, plus 1. */ fp->max_temp_idx = 1; @@ -1420,23 +1498,21 @@ static void dumb_shader(struct r500_fragment_program *fp) void r500TranslateFragmentShader(r300ContextPtr r300, struct r500_fragment_program *fp) { - - struct r300_pfs_compile_state *cs = NULL; - if (!fp->translated) { + struct r500_pfs_compile_state cs; + fp->cs = &cs; init_program(r300, fp); - cs = fp->cs; if (parse_program(fp) == GL_FALSE) { ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n"); dumb_shader(fp); fp->inst_offset = 0; - fp->inst_end = cs->nrslots - 1; + fp->inst_end = cs.nrslots - 1; return; } fp->inst_offset = 0; - fp->inst_end = cs->nrslots - 1; + fp->inst_end = cs.nrslots - 1; fp->translated = GL_TRUE; if (RADEON_DEBUG & DEBUG_PIXEL) { @@ -1449,6 +1525,8 @@ void r500TranslateFragmentShader(r300ContextPtr r300, r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); + + fp->cs = 0; } update_params(fp); -- cgit v1.2.3 From defadd9c03c726d1e79bd911de07a2682bf78b01 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 13 Jun 2008 20:03:17 +0200 Subject: r300_fragprog: Remove dead code --- src/mesa/drivers/dri/r300/r300_fragprog.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index df8ab47b0f..90fd9a7cfe 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -1738,19 +1738,9 @@ static GLboolean parse_program(struct r300_fragment_program *fp) /* src0.xyz1 -> temp * DP4 dest, temp, src1 */ -#if 0 - temp[0] = get_temp_reg(fp); - src[0].s_swz = SWIZZLE_ONE; - emit_arith(fp, PFS_OP_MAD, temp[0], mask, - src[0], pfs_one, pfs_zero, 0); - emit_arith(fp, PFS_OP_DP4, dest, mask, - temp[0], src[1], undef, flags); - free_temp(fp, temp[0]); -#else emit_arith(fp, PFS_OP_DP4, dest, mask, swizzle(src[0], X, Y, Z, ONE), src[1], undef, flags); -#endif break; case OPCODE_DST: src[0] = t_src(fp, fpi->SrcReg[0]); -- cgit v1.2.3 From 4ba1c7d84826aaa07114872560cab3a428949499 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 13 Jun 2008 21:43:09 +0200 Subject: r300_fragprog: Refactoring and cleanup Refactor so that r300_pfs_compile_state "owns"/holds a pointer to r300_fragment_program instead of the other way round. This is more natural from an object orientation point of view. Move the compiled hardware state into r300_fragment_program_code, in anticipation of on-the-fly program recompilation based on external OpenGL state. --- src/mesa/drivers/dri/r300/r300_context.h | 25 +- src/mesa/drivers/dri/r300/r300_fragprog.c | 717 +++++++++++++++--------------- src/mesa/drivers/dri/r300/r300_shader.c | 1 - src/mesa/drivers/dri/r300/r300_state.c | 73 +-- 4 files changed, 417 insertions(+), 399 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 2d51bad52e..05efb813e5 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -657,17 +657,9 @@ struct r300_vertex_program_cont { struct r300_pfs_compile_state; /** - * Store everything about a fragment program that is needed - * to render with that program. + * Stores an R300 fragment program in its compiled-to-hardware form. */ -struct r300_fragment_program { - struct gl_fragment_program mesa_program; - - GLcontext *ctx; - GLboolean translated; - GLboolean error; - struct r300_pfs_compile_state *cs; - +struct r300_fragment_program_code { struct { int length; GLuint inst[PFS_MAX_TEX_INST]; @@ -708,6 +700,19 @@ struct r300_fragment_program { int const_nr; int max_temp_idx; +}; + +/** + * Store everything about a fragment program that is needed + * to render with that program. + */ +struct r300_fragment_program { + struct gl_fragment_program mesa_program; + + GLboolean translated; + GLboolean error; + + struct r300_fragment_program_code code; GLboolean WritesDepth; GLuint optimization; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 90fd9a7cfe..ba88b88f45 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -111,6 +111,9 @@ struct r300_pfs_compile_slot { * Store information during compilation of fragment programs. */ struct r300_pfs_compile_state { + r300ContextPtr r300; + struct r300_fragment_program *fp; + int nrslots; /* number of ALU slots used so far */ /* Track which (parts of) slots are already filled with instructions */ @@ -141,7 +144,10 @@ struct r300_pfs_compile_state { } while(0) #define PFS_INVAL 0xFFFFFFFF -#define COMPILE_STATE struct r300_pfs_compile_state *cs = fp->cs +#define COMPILE_STATE \ + struct r300_fragment_program *fp = cs->fp; \ + struct r300_fragment_program_code *code = &fp->code; \ + (void)code #define SWIZZLE_XYZ 0 #define SWIZZLE_XXX 1 @@ -155,7 +161,7 @@ struct r300_pfs_compile_state { #define SWIZZLE_000 9 #define SWIZZLE_HHH 10 -#define swizzle(r, x, y, z, w) do_swizzle(fp, r, \ +#define swizzle(r, x, y, z, w) do_swizzle(cs, r, \ ((SWIZZLE_##x<<0)| \ (SWIZZLE_##y<<3)| \ (SWIZZLE_##z<<6)| \ @@ -380,15 +386,16 @@ static const GLuint pfs_zero = REG(REG_TYPE_CONST, /* * Common functions prototypes */ -static void dump_program(struct r300_fragment_program *fp); -static void emit_arith(struct r300_fragment_program *fp, int op, +static void dump_program(struct r300_fragment_program *fp, + struct r300_fragment_program_code *code); +static void emit_arith(struct r300_pfs_compile_state *cs, int op, GLuint dest, int mask, GLuint src0, GLuint src1, GLuint src2, int flags); /** * Get an R300 temporary that can be written to in the given slot. */ -static int get_hw_temp(struct r300_fragment_program *fp, int slot) +static int get_hw_temp(struct r300_pfs_compile_state *cs, int slot) { COMPILE_STATE; int r; @@ -418,8 +425,8 @@ static int get_hw_temp(struct r300_fragment_program *fp, int slot) cs->hwtemps[r].vector_valid = 0; cs->hwtemps[r].scalar_valid = 0; - if (r > fp->max_temp_idx) - fp->max_temp_idx = r; + if (r > fp->code.max_temp_idx) + fp->code.max_temp_idx = r; return r; } @@ -427,7 +434,7 @@ static int get_hw_temp(struct r300_fragment_program *fp, int slot) /** * Get an R300 temporary that will act as a TEX destination register. */ -static int get_hw_temp_tex(struct r300_fragment_program *fp) +static int get_hw_temp_tex(struct r300_pfs_compile_state *cs) { COMPILE_STATE; int r; @@ -442,7 +449,7 @@ static int get_hw_temp_tex(struct r300_fragment_program *fp) } if (r >= PFS_NUM_TEMP_REGS) - return get_hw_temp(fp, 0); /* Will cause an indirection */ + return get_hw_temp(cs, 0); /* Will cause an indirection */ cs->hwtemps[r].reserved = cs->hwtemps[r].free; cs->hwtemps[r].free = -1; @@ -454,8 +461,8 @@ static int get_hw_temp_tex(struct r300_fragment_program *fp) cs->hwtemps[r].vector_valid = cs->nrslots; cs->hwtemps[r].scalar_valid = cs->nrslots; - if (r > fp->max_temp_idx) - fp->max_temp_idx = r; + if (r > code->max_temp_idx) + code->max_temp_idx = r; return r; } @@ -463,10 +470,8 @@ static int get_hw_temp_tex(struct r300_fragment_program *fp) /** * Mark the given hardware register as free. */ -static void free_hw_temp(struct r300_fragment_program *fp, int idx) +static void free_hw_temp(struct r300_pfs_compile_state *cs, int idx) { - COMPILE_STATE; - // Be very careful here. Consider sequences like // MAD r0, r1,r2,r3 // TEX r4, ... @@ -482,7 +487,7 @@ static void free_hw_temp(struct r300_fragment_program *fp, int idx) /** * Create a new Mesa temporary register. */ -static GLuint get_temp_reg(struct r300_fragment_program *fp) +static GLuint get_temp_reg(struct r300_pfs_compile_state *cs) { COMPILE_STATE; GLuint r = undef; @@ -508,7 +513,7 @@ static GLuint get_temp_reg(struct r300_fragment_program *fp) * Create a new Mesa temporary register that will act as the destination * register for a texture read. */ -static GLuint get_temp_reg_tex(struct r300_fragment_program *fp) +static GLuint get_temp_reg_tex(struct r300_pfs_compile_state *cs) { COMPILE_STATE; GLuint r = undef; @@ -522,7 +527,7 @@ static GLuint get_temp_reg_tex(struct r300_fragment_program *fp) cs->temp_in_use |= (1 << --index); cs->temps[index].refcount = 0xFFFFFFFF; - cs->temps[index].reg = get_hw_temp_tex(fp); + cs->temps[index].reg = get_hw_temp_tex(cs); REG_SET_TYPE(r, REG_TYPE_TEMP); REG_SET_INDEX(r, index); @@ -533,20 +538,19 @@ static GLuint get_temp_reg_tex(struct r300_fragment_program *fp) /** * Free a Mesa temporary and the associated R300 temporary. */ -static void free_temp(struct r300_fragment_program *fp, GLuint r) +static void free_temp(struct r300_pfs_compile_state *cs, GLuint r) { - COMPILE_STATE; GLuint index = REG_GET_INDEX(r); if (!(cs->temp_in_use & (1 << index))) return; if (REG_GET_TYPE(r) == REG_TYPE_TEMP) { - free_hw_temp(fp, cs->temps[index].reg); + free_hw_temp(cs, cs->temps[index].reg); cs->temps[index].reg = -1; cs->temp_in_use &= ~(1 << index); } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) { - free_hw_temp(fp, cs->inputs[index].reg); + free_hw_temp(cs, cs->inputs[index].reg); cs->inputs[index].reg = -1; } } @@ -560,25 +564,26 @@ static void free_temp(struct r300_fragment_program *fp, GLuint r) * of the fragment program (actually, up until the next time the fragment * program is translated). */ -static GLuint emit_const4fv(struct r300_fragment_program *fp, +static GLuint emit_const4fv(struct r300_pfs_compile_state *cs, const GLfloat * cp) { + COMPILE_STATE; GLuint reg = undef; int index; - for (index = 0; index < fp->const_nr; ++index) { - if (fp->constant[index] == cp) + for (index = 0; index < code->const_nr; ++index) { + if (code->constant[index] == cp) break; } - if (index >= fp->const_nr) { + if (index >= code->const_nr) { if (index >= PFS_NUM_CONST_REGS) { ERROR("Out of hw constants!\n"); return reg; } - fp->const_nr++; - fp->constant[index] = cp; + code->const_nr++; + code->constant[index] = cp; } REG_SET_TYPE(reg, REG_TYPE_CONST); @@ -609,9 +614,11 @@ static inline GLuint absolute(GLuint r) return r; } -static int swz_native(struct r300_fragment_program *fp, +static int swz_native(struct r300_pfs_compile_state *cs, GLuint src, GLuint * r, GLuint arbneg) { + COMPILE_STATE; + /* Native swizzle, handle negation */ src = (src & ~REG_NEGS_MASK) | (((arbneg >> 3) & 1) << REG_NEGS_SHIFT); @@ -623,13 +630,13 @@ static int swz_native(struct r300_fragment_program *fp, *r = src; } else { if (!REG_GET_VALID(*r)) - *r = get_temp_reg(fp); + *r = get_temp_reg(cs); src |= REG_NEGV_MASK; - emit_arith(fp, + emit_arith(cs, PFS_OP_MAD, *r, arbneg & 0x7, keep(src), pfs_one, pfs_zero, 0); src = src & ~REG_NEGV_MASK; - emit_arith(fp, + emit_arith(cs, PFS_OP_MAD, *r, (arbneg ^ 0x7) | WRITEMASK_W, @@ -639,15 +646,16 @@ static int swz_native(struct r300_fragment_program *fp, return 3; } -static int swz_emit_partial(struct r300_fragment_program *fp, +static int swz_emit_partial(struct r300_pfs_compile_state *cs, GLuint src, GLuint * r, int mask, int mc, GLuint arbneg) { + COMPILE_STATE; GLuint tmp; GLuint wmask = 0; if (!REG_GET_VALID(*r)) - *r = get_temp_reg(fp); + *r = get_temp_reg(cs); /* A partial match, VSWZ/mask define what parts of the * desired swizzle we match @@ -661,7 +669,7 @@ static int swz_emit_partial(struct r300_fragment_program *fp, if (tmp) { tmp = tmp ^ s_mask[mask].mask; if (tmp) { - emit_arith(fp, + emit_arith(cs, PFS_OP_MAD, *r, arbneg & s_mask[mask].mask, @@ -672,7 +680,7 @@ static int swz_emit_partial(struct r300_fragment_program *fp, } else { REG_SET_NO_USE(src, GL_FALSE); } - emit_arith(fp, + emit_arith(cs, PFS_OP_MAD, *r, tmp | wmask, src, pfs_one, pfs_zero, 0); } else { @@ -681,7 +689,7 @@ static int swz_emit_partial(struct r300_fragment_program *fp, } else { REG_SET_NO_USE(src, GL_FALSE); } - emit_arith(fp, + emit_arith(cs, PFS_OP_MAD, *r, (arbneg & s_mask[mask].mask) | wmask, @@ -693,7 +701,7 @@ static int swz_emit_partial(struct r300_fragment_program *fp, } else { REG_SET_NO_USE(src, GL_FALSE); } - emit_arith(fp, PFS_OP_MAD, + emit_arith(cs, PFS_OP_MAD, *r, s_mask[mask].mask | wmask, src, pfs_one, pfs_zero, 0); @@ -702,9 +710,10 @@ static int swz_emit_partial(struct r300_fragment_program *fp, return s_mask[mask].count; } -static GLuint do_swizzle(struct r300_fragment_program *fp, +static GLuint do_swizzle(struct r300_pfs_compile_state *cs, GLuint src, GLuint arbswz, GLuint arbneg) { + COMPILE_STATE; GLuint r = undef; GLuint vswz; int c_mask = 0; @@ -759,10 +768,10 @@ static GLuint do_swizzle(struct r300_fragment_program *fp, if (chash == (arbswz & s_mask[c_mask].hash)) { if (s_mask[c_mask].count == 3) { - v_match += swz_native(fp, + v_match += swz_native(cs, src, &r, arbneg); } else { - v_match += swz_emit_partial(fp, + v_match += swz_emit_partial(cs, src, &r, c_mask, @@ -787,9 +796,10 @@ static GLuint do_swizzle(struct r300_fragment_program *fp, return r; } -static GLuint t_src(struct r300_fragment_program *fp, +static GLuint t_src(struct r300_pfs_compile_state *cs, struct prog_src_register fpsrc) { + COMPILE_STATE; GLuint r = undef; switch (fpsrc.File) { @@ -804,19 +814,18 @@ static GLuint t_src(struct r300_fragment_program *fp, REG_SET_TYPE(r, REG_TYPE_INPUT); break; case PROGRAM_LOCAL_PARAM: - r = emit_const4fv(fp, + r = emit_const4fv(cs, fp->mesa_program.Base.LocalParams[fpsrc. Index]); break; case PROGRAM_ENV_PARAM: - r = emit_const4fv(fp, - fp->ctx->FragmentProgram.Parameters[fpsrc. - Index]); + r = emit_const4fv(cs, + cs->r300->radeon.glCtx->FragmentProgram.Parameters[fpsrc.Index]); break; case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_CONSTANT: - r = emit_const4fv(fp, + r = emit_const4fv(cs, fp->mesa_program.Base.Parameters-> ParameterValues[fpsrc.Index]); break; @@ -827,11 +836,11 @@ static GLuint t_src(struct r300_fragment_program *fp, /* no point swizzling ONE/ZERO/HALF constants... */ if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO) - r = do_swizzle(fp, r, fpsrc.Swizzle, fpsrc.NegateBase); + r = do_swizzle(cs, r, fpsrc.Swizzle, fpsrc.NegateBase); return r; } -static GLuint t_scalar_src(struct r300_fragment_program *fp, +static GLuint t_scalar_src(struct r300_pfs_compile_state *cs, struct prog_src_register fpsrc) { struct prog_src_register src = fpsrc; @@ -839,12 +848,13 @@ static GLuint t_scalar_src(struct r300_fragment_program *fp, src.Swizzle = ((sc << 0) | (sc << 3) | (sc << 6) | (sc << 9)); - return t_src(fp, src); + return t_src(cs, src); } -static GLuint t_dst(struct r300_fragment_program *fp, +static GLuint t_dst(struct r300_pfs_compile_state *cs, struct prog_dst_register dest) { + COMPILE_STATE; GLuint r = undef; switch (dest.File) { @@ -871,7 +881,7 @@ static GLuint t_dst(struct r300_fragment_program *fp, } } -static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex) +static int t_hw_src(struct r300_pfs_compile_state *cs, GLuint src, GLboolean tex) { COMPILE_STATE; int idx; @@ -883,18 +893,18 @@ static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex) * hasn't been written to. Undefined results. */ if (cs->temps[index].reg == -1) - cs->temps[index].reg = get_hw_temp(fp, cs->nrslots); + cs->temps[index].reg = get_hw_temp(cs, cs->nrslots); idx = cs->temps[index].reg; if (!REG_GET_NO_USE(src) && (--cs->temps[index].refcount == 0)) - free_temp(fp, src); + free_temp(cs, src); break; case REG_TYPE_INPUT: idx = cs->inputs[index].reg; if (!REG_GET_NO_USE(src) && (--cs->inputs[index].refcount == 0)) - free_hw_temp(fp, cs->inputs[index].reg); + free_hw_temp(cs, cs->inputs[index].reg); break; case REG_TYPE_CONST: return (index | SRC_CONST); @@ -909,7 +919,7 @@ static int t_hw_src(struct r300_fragment_program *fp, GLuint src, GLboolean tex) return idx; } -static int t_hw_dst(struct r300_fragment_program *fp, +static int t_hw_dst(struct r300_pfs_compile_state *cs, GLuint dest, GLboolean tex, int slot) { COMPILE_STATE; @@ -921,15 +931,15 @@ static int t_hw_dst(struct r300_fragment_program *fp, case REG_TYPE_TEMP: if (cs->temps[REG_GET_INDEX(dest)].reg == -1) { if (!tex) { - cs->temps[index].reg = get_hw_temp(fp, slot); + cs->temps[index].reg = get_hw_temp(cs, slot); } else { - cs->temps[index].reg = get_hw_temp_tex(fp); + cs->temps[index].reg = get_hw_temp_tex(cs); } } idx = cs->temps[index].reg; if (!REG_GET_NO_USE(dest) && (--cs->temps[index].refcount == 0)) - free_temp(fp, dest); + free_temp(cs, dest); cs->dest_in_node |= (1 << idx); cs->used_in_node |= (1 << idx); @@ -937,13 +947,11 @@ static int t_hw_dst(struct r300_fragment_program *fp, case REG_TYPE_OUTPUT: switch (index) { case FRAG_RESULT_COLR: - fp->node[fp->cur_node].flags |= - R300_RGBA_OUT; + code->node[code->cur_node].flags |= R300_RGBA_OUT; break; case FRAG_RESULT_DEPR: fp->WritesDepth = GL_TRUE; - fp->node[fp->cur_node].flags |= - R300_W_OUT; + code->node[code->cur_node].flags |= R300_W_OUT; break; } return index; @@ -956,7 +964,7 @@ static int t_hw_dst(struct r300_fragment_program *fp, return idx; } -static void emit_nop(struct r300_fragment_program *fp) +static void emit_nop(struct r300_pfs_compile_state *cs) { COMPILE_STATE; @@ -965,18 +973,18 @@ static void emit_nop(struct r300_fragment_program *fp) return; } - fp->alu.inst[cs->nrslots].inst0 = NOP_INST0; - fp->alu.inst[cs->nrslots].inst1 = NOP_INST1; - fp->alu.inst[cs->nrslots].inst2 = NOP_INST2; - fp->alu.inst[cs->nrslots].inst3 = NOP_INST3; + code->alu.inst[cs->nrslots].inst0 = NOP_INST0; + code->alu.inst[cs->nrslots].inst1 = NOP_INST1; + code->alu.inst[cs->nrslots].inst2 = NOP_INST2; + code->alu.inst[cs->nrslots].inst3 = NOP_INST3; cs->nrslots++; } -static void emit_tex(struct r300_fragment_program *fp, +static void emit_tex(struct r300_pfs_compile_state *cs, struct prog_instruction *fpi, int opcode) { COMPILE_STATE; - GLuint coord = t_src(fp, fpi->SrcReg[0]); + GLuint coord = t_src(cs, fpi->SrcReg[0]); GLuint dest = undef, rdest = undef; GLuint din, uin; int unit = fpi->TexSrcUnit; @@ -1001,15 +1009,15 @@ static void emit_tex(struct r300_fragment_program *fp, tokens[2] = unit; factor_index = - _mesa_add_state_reference(fp->mesa_program.Base. + _mesa_add_state_reference(cs->fp->mesa_program.Base. Parameters, tokens); factorreg = - emit_const4fv(fp, - fp->mesa_program.Base.Parameters-> + emit_const4fv(cs, + cs->fp->mesa_program.Base.Parameters-> ParameterValues[factor_index]); - tempreg = keep(get_temp_reg(fp)); + tempreg = keep(get_temp_reg(cs)); - emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, + emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, coord, factorreg, pfs_zero, 0); coord = tempreg; @@ -1022,8 +1030,8 @@ static void emit_tex(struct r300_fragment_program *fp, REG_GET_SSWZ(coord) != SWIZZLE_W || coord & (REG_NEGV_MASK | REG_NEGS_MASK | REG_ABS_MASK)) { assert(tempreg == 0); - tempreg = keep(get_temp_reg(fp)); - emit_arith(fp, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, + tempreg = keep(get_temp_reg(cs)); + emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, coord, pfs_one, pfs_zero, 0); coord = tempreg; } @@ -1033,30 +1041,30 @@ static void emit_tex(struct r300_fragment_program *fp, din = cs->dest_in_node; /* Resolve source/dest to hardware registers */ - hwsrc = t_hw_src(fp, coord, GL_TRUE); + hwsrc = t_hw_src(cs, coord, GL_TRUE); if (opcode != R300_TEX_OP_KIL) { - dest = t_dst(fp, fpi->DstReg); + dest = t_dst(cs, fpi->DstReg); /* r300 doesn't seem to be able to do TEX->output reg */ if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { rdest = dest; - dest = get_temp_reg_tex(fp); + dest = get_temp_reg_tex(cs); } else if (fpi->DstReg.WriteMask != WRITEMASK_XYZW) { /* in case write mask isn't XYZW */ rdest = dest; - dest = get_temp_reg_tex(fp); + dest = get_temp_reg_tex(cs); } hwdest = - t_hw_dst(fp, dest, GL_TRUE, - fp->node[fp->cur_node].alu_offset); + t_hw_dst(cs, dest, GL_TRUE, + code->node[code->cur_node].alu_offset); /* Use a temp that hasn't been used in this node, rather * than causing an indirection */ if (uin & (1 << hwdest)) { - free_hw_temp(fp, hwdest); - hwdest = get_hw_temp_tex(fp); + free_hw_temp(cs, hwdest); + hwdest = get_hw_temp_tex(cs); cs->temps[REG_GET_INDEX(dest)].reg = hwdest; } } else { @@ -1071,32 +1079,32 @@ static void emit_tex(struct r300_fragment_program *fp, (din & (1 << hwsrc))) || (uin & (1 << hwdest))) { /* Finish off current node */ - if (fp->node[fp->cur_node].alu_offset == cs->nrslots) - emit_nop(fp); + if (code->node[code->cur_node].alu_offset == cs->nrslots) + emit_nop(cs); - fp->node[fp->cur_node].alu_end = - cs->nrslots - fp->node[fp->cur_node].alu_offset - 1; - assert(fp->node[fp->cur_node].alu_end >= 0); + code->node[code->cur_node].alu_end = + cs->nrslots - code->node[code->cur_node].alu_offset - 1; + assert(code->node[code->cur_node].alu_end >= 0); - if (++fp->cur_node >= PFS_MAX_TEX_INDIRECT) { + if (++code->cur_node >= PFS_MAX_TEX_INDIRECT) { ERROR("too many levels of texture indirection\n"); return; } /* Start new node */ - fp->node[fp->cur_node].tex_offset = fp->tex.length; - fp->node[fp->cur_node].alu_offset = cs->nrslots; - fp->node[fp->cur_node].tex_end = -1; - fp->node[fp->cur_node].alu_end = -1; - fp->node[fp->cur_node].flags = 0; + code->node[code->cur_node].tex_offset = code->tex.length; + code->node[code->cur_node].alu_offset = cs->nrslots; + code->node[code->cur_node].tex_end = -1; + code->node[code->cur_node].alu_end = -1; + code->node[code->cur_node].flags = 0; cs->used_in_node = 0; cs->dest_in_node = 0; } - if (fp->cur_node == 0) - fp->first_node_has_tex = 1; + if (code->cur_node == 0) + code->first_node_has_tex = 1; - fp->tex.inst[fp->tex.length++] = 0 | (hwsrc << R300_SRC_ADDR_SHIFT) + code->tex.inst[code->tex.length++] = 0 | (hwsrc << R300_SRC_ADDR_SHIFT) | (hwdest << R300_DST_ADDR_SHIFT) | (unit << R300_TEX_ID_SHIFT) | (opcode << R300_TEX_INST_SHIFT); @@ -1105,25 +1113,25 @@ static void emit_tex(struct r300_fragment_program *fp, if (REG_GET_TYPE(coord) != REG_TYPE_CONST) cs->used_in_node |= (1 << hwsrc); - fp->node[fp->cur_node].tex_end++; + code->node[code->cur_node].tex_end++; /* Copy from temp to output if needed */ if (REG_GET_VALID(rdest)) { - emit_arith(fp, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest, + emit_arith(cs, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest, pfs_one, pfs_zero, 0); - free_temp(fp, dest); + free_temp(cs, dest); } /* Free temp register */ if (tempreg != 0) - free_temp(fp, tempreg); + free_temp(cs, tempreg); } /** * Returns the first slot where we could possibly allow writing to dest, * according to register allocation. */ -static int get_earliest_allowed_write(struct r300_fragment_program *fp, +static int get_earliest_allowed_write(struct r300_pfs_compile_state *cs, GLuint dest, int mask) { COMPILE_STATE; @@ -1173,7 +1181,7 @@ static int get_earliest_allowed_write(struct r300_fragment_program *fp, * * @return the index of the slot */ -static int find_and_prepare_slot(struct r300_fragment_program *fp, +static int find_and_prepare_slot(struct r300_pfs_compile_state *cs, GLboolean emit_vop, GLboolean emit_sop, int argc, GLuint * src, GLuint dest, int mask) @@ -1198,10 +1206,10 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, if (emit_sop) used |= SLOT_OP_SCALAR; - pos = get_earliest_allowed_write(fp, dest, mask); + pos = get_earliest_allowed_write(cs, dest, mask); - if (fp->node[fp->cur_node].alu_offset > pos) - pos = fp->node[fp->cur_node].alu_offset; + if (code->node[code->cur_node].alu_offset > pos) + pos = code->node[code->cur_node].alu_offset; for (i = 0; i < argc; ++i) { if (!REG_GET_BUILTIN(src[i])) { if (emit_vop) @@ -1210,7 +1218,7 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i; } - hwsrc[i] = t_hw_src(fp, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ + hwsrc[i] = t_hw_src(cs, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ regnr = hwsrc[i] & 31; if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { @@ -1236,10 +1244,10 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, return -1; } - fp->alu.inst[pos].inst0 = NOP_INST0; - fp->alu.inst[pos].inst1 = NOP_INST1; - fp->alu.inst[pos].inst2 = NOP_INST2; - fp->alu.inst[pos].inst3 = NOP_INST3; + fp->code.alu.inst[pos].inst0 = NOP_INST0; + fp->code.alu.inst[pos].inst1 = NOP_INST1; + fp->code.alu.inst[pos].inst2 = NOP_INST2; + fp->code.alu.inst[pos].inst3 = NOP_INST3; cs->nrslots++; } @@ -1316,14 +1324,14 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, } // Emit the source fetch code - fp->alu.inst[pos].inst1 &= ~R300_ALU_SRC_MASK; - fp->alu.inst[pos].inst1 |= + code->alu.inst[pos].inst1 &= ~R300_ALU_SRC_MASK; + code->alu.inst[pos].inst1 |= ((cs->slot[pos].vsrc[0] << R300_ALU_SRC0C_SHIFT) | (cs->slot[pos].vsrc[1] << R300_ALU_SRC1C_SHIFT) | (cs->slot[pos].vsrc[2] << R300_ALU_SRC2C_SHIFT)); - fp->alu.inst[pos].inst3 &= ~R300_ALU_SRC_MASK; - fp->alu.inst[pos].inst3 |= + code->alu.inst[pos].inst3 &= ~R300_ALU_SRC_MASK; + code->alu.inst[pos].inst3 |= ((cs->slot[pos].ssrc[0] << R300_ALU_SRC0A_SHIFT) | (cs->slot[pos].ssrc[1] << R300_ALU_SRC1A_SHIFT) | (cs->slot[pos].ssrc[2] << R300_ALU_SRC2A_SHIFT)); @@ -1349,10 +1357,10 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, } } - fp->alu.inst[pos].inst0 &= + code->alu.inst[pos].inst0 &= ~(R300_ALU_ARG0C_MASK | R300_ALU_ARG1C_MASK | R300_ALU_ARG2C_MASK); - fp->alu.inst[pos].inst0 |= + code->alu.inst[pos].inst0 |= (swz[0] << R300_ALU_ARG0C_SHIFT) | (swz[1] << R300_ALU_ARG1C_SHIFT) | (swz[2] << R300_ALU_ARG2C_SHIFT); @@ -1378,10 +1386,10 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, } } - fp->alu.inst[pos].inst2 &= + code->alu.inst[pos].inst2 &= ~(R300_ALU_ARG0A_MASK | R300_ALU_ARG1A_MASK | R300_ALU_ARG2A_MASK); - fp->alu.inst[pos].inst2 |= + code->alu.inst[pos].inst2 |= (swz[0] << R300_ALU_ARG0A_SHIFT) | (swz[1] << R300_ALU_ARG1A_SHIFT) | (swz[2] << R300_ALU_ARG2A_SHIFT); @@ -1393,7 +1401,7 @@ static int find_and_prepare_slot(struct r300_fragment_program *fp, /** * Append an ALU instruction to the instruction list. */ -static void emit_arith(struct r300_fragment_program *fp, +static void emit_arith(struct r300_pfs_compile_state *cs, int op, GLuint dest, int mask, @@ -1427,12 +1435,12 @@ static void emit_arith(struct r300_fragment_program *fp, emit_sop = GL_TRUE; pos = - find_and_prepare_slot(fp, emit_vop, emit_sop, argc, src, dest, + find_and_prepare_slot(cs, emit_vop, emit_sop, argc, src, dest, mask); if (pos < 0) return; - hwdest = t_hw_dst(fp, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ + hwdest = t_hw_dst(cs, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ if (flags & PFS_FLAG_SAT) { vop |= R300_ALU_OUTC_CLAMP; @@ -1441,19 +1449,19 @@ static void emit_arith(struct r300_fragment_program *fp, /* Throw the pieces together and get ALU/1 */ if (emit_vop) { - fp->alu.inst[pos].inst0 |= vop; + code->alu.inst[pos].inst0 |= vop; - fp->alu.inst[pos].inst1 |= hwdest << R300_ALU_DSTC_SHIFT; + code->alu.inst[pos].inst1 |= hwdest << R300_ALU_DSTC_SHIFT; if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - fp->alu.inst[pos].inst1 |= + code->alu.inst[pos].inst1 |= (mask & WRITEMASK_XYZ) << R300_ALU_DSTC_OUTPUT_MASK_SHIFT; } else assert(0); } else { - fp->alu.inst[pos].inst1 |= + code->alu.inst[pos].inst1 |= (mask & WRITEMASK_XYZ) << R300_ALU_DSTC_REG_MASK_SHIFT; @@ -1463,22 +1471,22 @@ static void emit_arith(struct r300_fragment_program *fp, /* And now ALU/3 */ if (emit_sop) { - fp->alu.inst[pos].inst2 |= sop; + code->alu.inst[pos].inst2 |= sop; if (mask & WRITEMASK_W) { if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - fp->alu.inst[pos].inst3 |= + code->alu.inst[pos].inst3 |= (hwdest << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_OUTPUT; } else if (REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { - fp->alu.inst[pos].inst3 |= + code->alu.inst[pos].inst3 |= R300_ALU_DSTA_DEPTH; } else assert(0); } else { - fp->alu.inst[pos].inst3 |= + code->alu.inst[pos].inst3 |= (hwdest << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_REG; @@ -1548,7 +1556,7 @@ static GLfloat SinCosConsts[2][4] = { static const GLfloat LitConst[4] = { 127.999999, 127.999999, 127.999999, -127.999999 }; -static void emit_lit(struct r300_fragment_program *fp, +static void emit_lit(struct r300_pfs_compile_state *cs, GLuint dest, int mask, GLuint src, int flags) { COMPILE_STATE; @@ -1556,7 +1564,7 @@ static void emit_lit(struct r300_fragment_program *fp, int needTemporary; GLuint temp; - cnst = emit_const4fv(fp, LitConst); + cnst = emit_const4fv(cs, LitConst); needTemporary = 0; if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { @@ -1568,7 +1576,7 @@ static void emit_lit(struct r300_fragment_program *fp, } if (needTemporary) { - temp = keep(get_temp_reg(fp)); + temp = keep(get_temp_reg(cs)); } else { temp = keep(dest); } @@ -1579,48 +1587,49 @@ static void emit_lit(struct r300_fragment_program *fp, // so swizzling between the two parts can create fake dependencies. // First slot - emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_XY, + emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_XY, keep(src), pfs_zero, undef, 0); - emit_arith(fp, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); + emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); // Second slot - emit_arith(fp, PFS_OP_MIN, temp, WRITEMASK_Z, + emit_arith(cs, PFS_OP_MIN, temp, WRITEMASK_Z, swizzle(temp, W, W, W, W), cnst, undef, 0); - emit_arith(fp, PFS_OP_LG2, temp, WRITEMASK_W, + emit_arith(cs, PFS_OP_LG2, temp, WRITEMASK_W, swizzle(temp, Y, Y, Y, Y), undef, undef, 0); // Third slot // If desired, we saturate the y result here. // This does not affect the use as a condition variable in the CMP later - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, + emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W, temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_Y, + emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_Y, swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); // Fourth slot - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_X, + emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_X, pfs_one, pfs_one, pfs_zero, 0); - emit_arith(fp, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); + emit_arith(cs, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); // Fifth slot - emit_arith(fp, PFS_OP_CMP, temp, WRITEMASK_Z, + emit_arith(cs, PFS_OP_CMP, temp, WRITEMASK_Z, pfs_zero, swizzle(temp, W, W, W, W), negate(swizzle(temp, Y, Y, Y, Y)), flags); - emit_arith(fp, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, + emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, pfs_zero, 0); if (needTemporary) { - emit_arith(fp, PFS_OP_MAD, dest, mask, + emit_arith(cs, PFS_OP_MAD, dest, mask, temp, pfs_one, pfs_zero, flags); - free_temp(fp, temp); + free_temp(cs, temp); } else { // Decrease refcount of the destination - t_hw_dst(fp, dest, GL_FALSE, cs->nrslots); + t_hw_dst(cs, dest, GL_FALSE, cs->nrslots); } } -static GLboolean parse_program(struct r300_fragment_program *fp) +static GLboolean parse_program(struct r300_pfs_compile_state *cs) { + COMPILE_STATE; struct gl_fragment_program *mp = &fp->mesa_program; const struct prog_instruction *inst = mp->Base.Instructions; struct prog_instruction *fpi; @@ -1640,30 +1649,30 @@ static GLboolean parse_program(struct r300_fragment_program *fp) flags = 0; if (fpi->Opcode != OPCODE_KIL) { - dest = t_dst(fp, fpi->DstReg); + dest = t_dst(cs, fpi->DstReg); mask = fpi->DstReg.WriteMask; } switch (fpi->Opcode) { case OPCODE_ABS: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_MAD, dest, mask, absolute(src[0]), pfs_one, pfs_zero, flags); break; case OPCODE_ADD: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], pfs_one, src[1], flags); break; case OPCODE_CMP: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - src[2] = t_src(fp, fpi->SrcReg[2]); + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + src[2] = t_src(cs, fpi->SrcReg[2]); /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c * r300 - if src2.c < 0.0 ? src1.c : src0.c */ - emit_arith(fp, PFS_OP_CMP, dest, mask, + emit_arith(cs, PFS_OP_CMP, dest, mask, src[2], src[1], src[0], flags); break; case OPCODE_COS: @@ -1675,196 +1684,196 @@ static GLboolean parse_program(struct r300_fragment_program *fp) * x = (x*2*PI)-PI * result = sin(x) */ - temp[0] = get_temp_reg(fp); - const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); - const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + temp[0] = get_temp_reg(cs); + const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); + const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); /* add 0.5*PI and do range reduction */ - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, swizzle(src[0], X, X, X, X), swizzle(const_sin[1], Z, Z, Z, Z), swizzle(const_sin[1], X, X, X, X), 0); - emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, + emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X, swizzle(temp[0], X, X, X, X), undef, undef, 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI 0); /* SIN */ - emit_arith(fp, PFS_OP_MAD, temp[0], + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], Z, Z, Z, Z), const_sin[0], pfs_zero, 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, swizzle(temp[0], Y, Y, Y, Y), absolute(swizzle(temp[0], Z, Z, Z, Z)), swizzle(temp[0], X, X, X, X), 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y, swizzle(temp[0], X, X, X, X), absolute(swizzle(temp[0], X, X, X, X)), negate(swizzle(temp[0], X, X, X, X)), 0); - emit_arith(fp, PFS_OP_MAD, dest, mask, + emit_arith(cs, PFS_OP_MAD, dest, mask, swizzle(temp[0], Y, Y, Y, Y), swizzle(const_sin[0], W, W, W, W), swizzle(temp[0], X, X, X, X), flags); - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; case OPCODE_DP3: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_DP3, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_DP3, dest, mask, src[0], src[1], undef, flags); break; case OPCODE_DP4: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_DP4, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_DP4, dest, mask, src[0], src[1], undef, flags); break; case OPCODE_DPH: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); /* src0.xyz1 -> temp * DP4 dest, temp, src1 */ - emit_arith(fp, PFS_OP_DP4, dest, mask, + emit_arith(cs, PFS_OP_DP4, dest, mask, swizzle(src[0], X, Y, Z, ONE), src[1], undef, flags); break; case OPCODE_DST: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); /* dest.y = src0.y * src1.y */ if (mask & WRITEMASK_Y) - emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Y, + emit_arith(cs, PFS_OP_MAD, dest, WRITEMASK_Y, keep(src[0]), keep(src[1]), pfs_zero, flags); /* dest.z = src0.z */ if (mask & WRITEMASK_Z) - emit_arith(fp, PFS_OP_MAD, dest, WRITEMASK_Z, + emit_arith(cs, PFS_OP_MAD, dest, WRITEMASK_Z, src[0], pfs_one, pfs_zero, flags); /* result.x = 1.0 * result.w = src1.w */ if (mask & WRITEMASK_XW) { REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat */ - emit_arith(fp, PFS_OP_MAD, dest, + emit_arith(cs, PFS_OP_MAD, dest, mask & WRITEMASK_XW, src[1], pfs_one, pfs_zero, flags); } break; case OPCODE_EX2: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_EX2, dest, mask, + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_EX2, dest, mask, src[0], undef, undef, flags); break; case OPCODE_FLR: - src[0] = t_src(fp, fpi->SrcReg[0]); - temp[0] = get_temp_reg(fp); + src[0] = t_src(cs, fpi->SrcReg[0]); + temp[0] = get_temp_reg(cs); /* FRC temp, src0 * MAD dest, src0, 1.0, -temp */ - emit_arith(fp, PFS_OP_FRC, temp[0], mask, + emit_arith(cs, PFS_OP_FRC, temp[0], mask, keep(src[0]), undef, undef, 0); - emit_arith(fp, PFS_OP_MAD, dest, mask, + emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], pfs_one, negate(temp[0]), flags); - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; case OPCODE_FRC: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_FRC, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_FRC, dest, mask, src[0], undef, undef, flags); break; case OPCODE_KIL: - emit_tex(fp, fpi, R300_TEX_OP_KIL); + emit_tex(cs, fpi, R300_TEX_OP_KIL); break; case OPCODE_LG2: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_LG2, dest, mask, + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_LG2, dest, mask, src[0], undef, undef, flags); break; case OPCODE_LIT: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_lit(fp, dest, mask, src[0], flags); + src[0] = t_src(cs, fpi->SrcReg[0]); + emit_lit(cs, dest, mask, src[0], flags); break; case OPCODE_LRP: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - src[2] = t_src(fp, fpi->SrcReg[2]); + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + src[2] = t_src(cs, fpi->SrcReg[2]); /* result = tmp0tmp1 + (1 - tmp0)tmp2 * = tmp0tmp1 + tmp2 + (-tmp0)tmp2 * MAD temp, -tmp0, tmp2, tmp2 * MAD result, tmp0, tmp1, temp */ - temp[0] = get_temp_reg(fp); - emit_arith(fp, PFS_OP_MAD, temp[0], mask, + temp[0] = get_temp_reg(cs); + emit_arith(cs, PFS_OP_MAD, temp[0], mask, negate(keep(src[0])), keep(src[2]), src[2], 0); - emit_arith(fp, PFS_OP_MAD, dest, mask, + emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], src[1], temp[0], flags); - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; case OPCODE_MAD: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - src[2] = t_src(fp, fpi->SrcReg[2]); - emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + src[2] = t_src(cs, fpi->SrcReg[2]); + emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], src[1], src[2], flags); break; case OPCODE_MAX: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAX, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MAX, dest, mask, src[0], src[1], undef, flags); break; case OPCODE_MIN: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MIN, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MIN, dest, mask, src[0], src[1], undef, flags); break; case OPCODE_MOV: case OPCODE_SWZ: - src[0] = t_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], pfs_one, pfs_zero, flags); break; case OPCODE_MUL: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], src[1], pfs_zero, flags); break; case OPCODE_POW: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - src[1] = t_scalar_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); - emit_arith(fp, PFS_OP_LG2, temp[0], WRITEMASK_W, + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + src[1] = t_scalar_src(cs, fpi->SrcReg[1]); + temp[0] = get_temp_reg(cs); + emit_arith(cs, PFS_OP_LG2, temp[0], WRITEMASK_W, src[0], undef, undef, 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_W, temp[0], src[1], pfs_zero, 0); - emit_arith(fp, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, + emit_arith(cs, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, temp[0], undef, undef, 0); - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; case OPCODE_RCP: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_RCP, dest, mask, + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_RCP, dest, mask, src[0], undef, undef, flags); break; case OPCODE_RSQ: - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); - emit_arith(fp, PFS_OP_RSQ, dest, mask, + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_RSQ, dest, mask, absolute(src[0]), pfs_zero, pfs_zero, flags); break; case OPCODE_SCS: @@ -1875,51 +1884,51 @@ static GLboolean parse_program(struct r300_fragment_program *fp) * result.y = sin(x) (sin) * */ - temp[0] = get_temp_reg(fp); - temp[1] = get_temp_reg(fp); - const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); - const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + temp[0] = get_temp_reg(cs); + temp[1] = get_temp_reg(cs); + const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); + const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); /* x = -abs(x)+0.5*PI */ - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI pfs_half, negate(abs (swizzle(keep(src[0]), X, X, X, X))), 0); /* C*x (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_W, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_W, swizzle(const_sin[0], Y, Y, Y, Y), swizzle(keep(src[0]), X, X, X, X), pfs_zero, 0); /* B*x, C*x (cos) */ - emit_arith(fp, PFS_OP_MAD, temp[0], + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], Z, Z, Z, Z), const_sin[0], pfs_zero, 0); /* B*x (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, + emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W, swizzle(const_sin[0], X, X, X, X), keep(src[0]), pfs_zero, 0); /* y = B*x + C*x*abs(x) (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_Z, + emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_Z, absolute(src[0]), swizzle(temp[0], W, W, W, W), swizzle(temp[1], W, W, W, W), 0); /* y = B*x + C*x*abs(x) (cos) */ - emit_arith(fp, PFS_OP_MAD, temp[1], WRITEMASK_W, + emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W, swizzle(temp[0], Y, Y, Y, Y), absolute(swizzle(temp[0], Z, Z, Z, Z)), swizzle(temp[0], X, X, X, X), 0); /* y*abs(y) - y (cos), y*abs(y) - y (sin) */ - emit_arith(fp, PFS_OP_MAD, temp[0], + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1], W, Z, Y, X), @@ -1927,26 +1936,26 @@ static GLboolean parse_program(struct r300_fragment_program *fp) negate(swizzle(temp[1], W, Z, Y, X)), 0); /* dest.xy = mad(temp.xy, P, temp2.wz) */ - emit_arith(fp, PFS_OP_MAD, dest, + emit_arith(cs, PFS_OP_MAD, dest, mask & (WRITEMASK_X | WRITEMASK_Y), temp[0], swizzle(const_sin[0], W, W, W, W), swizzle(temp[1], W, Z, Y, X), flags); - free_temp(fp, temp[0]); - free_temp(fp, temp[1]); + free_temp(cs, temp[0]); + free_temp(cs, temp[1]); break; case OPCODE_SGE: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + temp[0] = get_temp_reg(cs); /* temp = src0 - src1 * dest.c = (temp.c < 0.0) ? 0 : 1 */ - emit_arith(fp, PFS_OP_MAD, temp[0], mask, + emit_arith(cs, PFS_OP_MAD, temp[0], mask, src[0], pfs_one, negate(src[1]), 0); - emit_arith(fp, PFS_OP_CMP, dest, mask, + emit_arith(cs, PFS_OP_CMP, dest, mask, pfs_one, pfs_zero, temp[0], 0); - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; case OPCODE_SIN: /* @@ -1956,85 +1965,85 @@ static GLboolean parse_program(struct r300_fragment_program *fp) * itself squared. */ - temp[0] = get_temp_reg(fp); - const_sin[0] = emit_const4fv(fp, SinCosConsts[0]); - const_sin[1] = emit_const4fv(fp, SinCosConsts[1]); - src[0] = t_scalar_src(fp, fpi->SrcReg[0]); + temp[0] = get_temp_reg(cs); + const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); + const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); /* do range reduction */ - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, swizzle(keep(src[0]), X, X, X, X), swizzle(const_sin[1], Z, Z, Z, Z), pfs_half, 0); - emit_arith(fp, PFS_OP_FRC, temp[0], WRITEMASK_X, + emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X, swizzle(temp[0], X, X, X, X), undef, undef, 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI 0); /* SIN */ - emit_arith(fp, PFS_OP_MAD, temp[0], + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], Z, Z, Z, Z), const_sin[0], pfs_zero, 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_X, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, swizzle(temp[0], Y, Y, Y, Y), absolute(swizzle(temp[0], Z, Z, Z, Z)), swizzle(temp[0], X, X, X, X), 0); - emit_arith(fp, PFS_OP_MAD, temp[0], WRITEMASK_Y, + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y, swizzle(temp[0], X, X, X, X), absolute(swizzle(temp[0], X, X, X, X)), negate(swizzle(temp[0], X, X, X, X)), 0); - emit_arith(fp, PFS_OP_MAD, dest, mask, + emit_arith(cs, PFS_OP_MAD, dest, mask, swizzle(temp[0], Y, Y, Y, Y), swizzle(const_sin[0], W, W, W, W), swizzle(temp[0], X, X, X, X), flags); - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; case OPCODE_SLT: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + temp[0] = get_temp_reg(cs); /* temp = src0 - src1 * dest.c = (temp.c < 0.0) ? 1 : 0 */ - emit_arith(fp, PFS_OP_MAD, temp[0], mask, + emit_arith(cs, PFS_OP_MAD, temp[0], mask, src[0], pfs_one, negate(src[1]), 0); - emit_arith(fp, PFS_OP_CMP, dest, mask, + emit_arith(cs, PFS_OP_CMP, dest, mask, pfs_zero, pfs_one, temp[0], 0); - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; case OPCODE_SUB: - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - emit_arith(fp, PFS_OP_MAD, dest, mask, + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], pfs_one, negate(src[1]), flags); break; case OPCODE_TEX: - emit_tex(fp, fpi, R300_TEX_OP_LD); + emit_tex(cs, fpi, R300_TEX_OP_LD); break; case OPCODE_TXB: - emit_tex(fp, fpi, R300_TEX_OP_TXB); + emit_tex(cs, fpi, R300_TEX_OP_TXB); break; case OPCODE_TXP: - emit_tex(fp, fpi, R300_TEX_OP_TXP); + emit_tex(cs, fpi, R300_TEX_OP_TXP); break; case OPCODE_XPD:{ - src[0] = t_src(fp, fpi->SrcReg[0]); - src[1] = t_src(fp, fpi->SrcReg[1]); - temp[0] = get_temp_reg(fp); + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + temp[0] = get_temp_reg(cs); /* temp = src0.zxy * src1.yzx */ - emit_arith(fp, PFS_OP_MAD, temp[0], + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_XYZ, swizzle(keep(src[0]), Z, X, Y, W), swizzle(keep(src[1]), Y, Z, X, W), @@ -2042,14 +2051,14 @@ static GLboolean parse_program(struct r300_fragment_program *fp) /* dest.xyz = src0.yzx * src1.zxy - temp * dest.w = undefined * */ - emit_arith(fp, PFS_OP_MAD, dest, + emit_arith(cs, PFS_OP_MAD, dest, mask & WRITEMASK_XYZ, swizzle(src[0], Y, Z, X, W), swizzle(src[1], Z, X, Y, W), negate(temp[0]), flags); /* cleanup */ - free_temp(fp, temp[0]); + free_temp(cs, temp[0]); break; } default: @@ -2160,7 +2169,7 @@ static void insert_wpos(struct gl_program *prog) /* - Init structures * - Determine what hwregs each input corresponds to */ -static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) +static void init_program(struct r300_pfs_compile_state *cs) { COMPILE_STATE; struct gl_fragment_program *mp = &fp->mesa_program; @@ -2171,19 +2180,18 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) /* New compile, reset tracking data */ fp->optimization = - driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); + driQueryOptioni(&cs->r300->radeon.optionCache, "fp_optimization"); fp->translated = GL_FALSE; fp->error = GL_FALSE; fp->WritesDepth = GL_FALSE; - fp->tex.length = 0; - fp->cur_node = 0; - fp->first_node_has_tex = 0; - fp->const_nr = 0; - fp->max_temp_idx = 0; - fp->node[0].alu_end = -1; - fp->node[0].tex_end = -1; - - _mesa_memset(cs, 0, sizeof(*fp->cs)); + code->tex.length = 0; + code->cur_node = 0; + code->first_node_has_tex = 0; + code->const_nr = 0; + code->max_temp_idx = 0; + code->node[0].alu_end = -1; + code->node[0].tex_end = -1; + for (i = 0; i < PFS_MAX_ALU_INST; i++) { for (j = 0; j < 3; j++) { cs->slot[i].vsrc[j] = SRC_CONST; @@ -2200,11 +2208,11 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) */ /* Texcoords come first */ - for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { + for (i = 0; i < cs->r300->radeon.glCtx->Const.MaxTextureUnits; i++) { if (InputsRead & (FRAG_BIT_TEX0 << i)) { cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = - get_hw_temp(fp, 0); + get_hw_temp(cs, 0); } } InputsRead &= ~FRAG_BITS_TEX_ANY; @@ -2212,7 +2220,7 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) /* fragment position treated as a texcoord */ if (InputsRead & FRAG_BIT_WPOS) { cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; - cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(fp, 0); + cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(cs, 0); insert_wpos(&mp->Base); } InputsRead &= ~FRAG_BIT_WPOS; @@ -2220,14 +2228,14 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) /* Then primary colour */ if (InputsRead & FRAG_BIT_COL0) { cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(fp, 0); + cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(cs, 0); } InputsRead &= ~FRAG_BIT_COL0; /* Secondary color */ if (InputsRead & FRAG_BIT_COL1) { cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(fp, 0); + cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(cs, 0); } InputsRead &= ~FRAG_BIT_COL1; @@ -2283,13 +2291,13 @@ static void init_program(r300ContextPtr r300, struct r300_fragment_program *fp) cs->temp_in_use = temps_used; } -static void update_params(struct r300_fragment_program *fp) +static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp) { struct gl_fragment_program *mp = &fp->mesa_program; /* Ask Mesa nicely to fill in ParameterValues for us */ if (mp->Base.Parameters) - _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters); + _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); } void r300TranslateFragmentShader(r300ContextPtr r300, @@ -2298,37 +2306,40 @@ void r300TranslateFragmentShader(r300ContextPtr r300, if (!fp->translated) { struct r300_pfs_compile_state cs; - fp->cs = &cs; - init_program(r300, fp); + _mesa_memset(&cs, 0, sizeof(cs)); + cs.r300 = r300; + cs.fp = fp; + init_program(&cs); - if (parse_program(fp) == GL_FALSE) { - dump_program(fp); + if (parse_program(&cs) == GL_FALSE) { + dump_program(fp, &fp->code); return; } /* Finish off */ - fp->node[fp->cur_node].alu_end = - cs.nrslots - fp->node[fp->cur_node].alu_offset - 1; - if (fp->node[fp->cur_node].tex_end < 0) - fp->node[fp->cur_node].tex_end = 0; - fp->alu_offset = 0; - fp->alu_end = cs.nrslots - 1; - fp->tex_offset = 0; - fp->tex_end = fp->tex.length ? fp->tex.length - 1 : 0; - assert(fp->node[fp->cur_node].alu_end >= 0); - assert(fp->alu_end >= 0); + fp->code.node[fp->code.cur_node].alu_end = + cs.nrslots - fp->code.node[fp->code.cur_node].alu_offset - 1; + if (fp->code.node[fp->code.cur_node].tex_end < 0) + fp->code.node[fp->code.cur_node].tex_end = 0; + fp->code.alu_offset = 0; + fp->code.alu_end = cs.nrslots - 1; + fp->code.tex_offset = 0; + fp->code.tex_end = fp->code.tex.length ? fp->code.tex.length - 1 : 0; + assert(fp->code.node[fp->code.cur_node].alu_end >= 0); + assert(fp->code.alu_end >= 0); fp->translated = GL_TRUE; if (RADEON_DEBUG & DEBUG_PIXEL) - dump_program(fp); - r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); + dump_program(fp, &fp->code); + r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); } - update_params(fp); + update_params(r300, fp); } /* just some random things... */ -static void dump_program(struct r300_fragment_program *fp) +static void dump_program(struct r300_fragment_program *fp, + struct r300_fragment_program_code *code) { int n, i, j; static int pc = 0; @@ -2343,21 +2354,21 @@ static void dump_program(struct r300_fragment_program *fp) fprintf(stderr, "Hardware program\n"); fprintf(stderr, "----------------\n"); - for (n = 0; n < (fp->cur_node + 1); n++) { + for (n = 0; n < (code->cur_node + 1); n++) { fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " "alu_end: %d, tex_end: %d\n", n, - fp->node[n].alu_offset, - fp->node[n].tex_offset, - fp->node[n].alu_end, fp->node[n].tex_end); + code->node[n].alu_offset, + code->node[n].tex_offset, + code->node[n].alu_end, code->node[n].tex_end); - if (fp->tex.length) { + if (code->tex.length) { fprintf(stderr, " TEX:\n"); - for (i = fp->node[n].tex_offset; - i <= fp->node[n].tex_offset + fp->node[n].tex_end; + for (i = code->node[n].tex_offset; + i <= code->node[n].tex_offset + code->node[n].tex_end; ++i) { const char *instr; - switch ((fp->tex. + switch ((code->tex. inst[i] >> R300_TEX_INST_SHIFT) & 15) { case R300_TEX_OP_LD: @@ -2379,20 +2390,20 @@ static void dump_program(struct r300_fragment_program *fp) fprintf(stderr, " %s t%i, %c%i, texture[%i] (%08x)\n", instr, - (fp->tex. + (code->tex. inst[i] >> R300_DST_ADDR_SHIFT) & 31, 't', - (fp->tex. + (code->tex. inst[i] >> R300_SRC_ADDR_SHIFT) & 31, - (fp->tex. + (code->tex. inst[i] & R300_TEX_ID_MASK) >> R300_TEX_ID_SHIFT, - fp->tex.inst[i]); + code->tex.inst[i]); } } - for (i = fp->node[n].alu_offset; - i <= fp->node[n].alu_offset + fp->node[n].alu_end; ++i) { + for (i = code->node[n].alu_offset; + i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) { char srcc[3][10], dstc[20]; char srca[3][10], dsta[20]; char argc[3][20]; @@ -2400,8 +2411,8 @@ static void dump_program(struct r300_fragment_program *fp) char flags[5], tmp[10]; for (j = 0; j < 3; ++j) { - int regc = fp->alu.inst[i].inst1 >> (j * 6); - int rega = fp->alu.inst[i].inst3 >> (j * 6); + int regc = code->alu.inst[i].inst1 >> (j * 6); + int rega = code->alu.inst[i].inst3 >> (j * 6); sprintf(srcc[j], "%c%i", (regc & 32) ? 'c' : 't', regc & 31); @@ -2411,46 +2422,46 @@ static void dump_program(struct r300_fragment_program *fp) dstc[0] = 0; sprintf(flags, "%s%s%s", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 & R300_ALU_DSTC_REG_X) ? "x" : "", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 & R300_ALU_DSTC_REG_Z) ? "z" : ""); if (flags[0] != 0) { sprintf(dstc, "t%i.%s ", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 >> R300_ALU_DSTC_SHIFT) & 31, flags); } sprintf(flags, "%s%s%s", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); if (flags[0] != 0) { sprintf(tmp, "o%i.%s", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst1 >> R300_ALU_DSTC_SHIFT) & 31, flags); strcat(dstc, tmp); } dsta[0] = 0; - if (fp->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { sprintf(dsta, "t%i.w ", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst3 >> R300_ALU_DSTA_SHIFT) & 31); } - if (fp->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { sprintf(tmp, "o%i.w ", - (fp->alu.inst[i]. + (code->alu.inst[i]. inst3 >> R300_ALU_DSTA_SHIFT) & 31); strcat(dsta, tmp); } - if (fp->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { + if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { strcat(dsta, "Z"); } @@ -2458,12 +2469,12 @@ static void dump_program(struct r300_fragment_program *fp) "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" " w: %3s %3s %3s -> %-20s (%08x)\n", i, srcc[0], srcc[1], srcc[2], dstc, - fp->alu.inst[i].inst1, srca[0], srca[1], - srca[2], dsta, fp->alu.inst[i].inst3); + code->alu.inst[i].inst1, srca[0], srca[1], + srca[2], dsta, code->alu.inst[i].inst3); for (j = 0; j < 3; ++j) { - int regc = fp->alu.inst[i].inst0 >> (j * 7); - int rega = fp->alu.inst[i].inst2 >> (j * 7); + int regc = code->alu.inst[i].inst0 >> (j * 7); + int rega = code->alu.inst[i].inst2 >> (j * 7); int d; char buf[20]; @@ -2545,8 +2556,8 @@ static void dump_program(struct r300_fragment_program *fp) fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" " w: %8s %8s %8s op: %08x\n", argc[0], argc[1], argc[2], - fp->alu.inst[i].inst0, arga[0], arga[1], - arga[2], fp->alu.inst[i].inst2); + code->alu.inst[i].inst0, arga[0], arga[1], + arga[2], code->alu.inst[i].inst2); } } } diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c index 5c8fd8a5e5..f30fd986e0 100644 --- a/src/mesa/drivers/dri/r300/r300_shader.c +++ b/src/mesa/drivers/dri/r300/r300_shader.c @@ -28,7 +28,6 @@ static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target, target, id); } else { r300_fp = CALLOC_STRUCT(r300_fragment_program); - r300_fp->ctx = ctx; return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program, target, id); } diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index c3f1cc9e28..1dcf9e0cab 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1308,18 +1308,19 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) int i; struct r300_fragment_program *fp = (struct r300_fragment_program *) (char *)ctx->FragmentProgram._Current; + struct r300_fragment_program_code *code = &fp->code; R300_STATECHANGE(r300, fpt); - for (i = 0; i < fp->tex.length; i++) { + for (i = 0; i < code->tex.length; i++) { int unit; int opcode; unsigned long val; - unit = fp->tex.inst[i] >> R300_TEX_ID_SHIFT; + unit = code->tex.inst[i] >> R300_TEX_ID_SHIFT; unit &= 15; - val = fp->tex.inst[i]; + val = code->tex.inst[i]; val &= ~R300_TEX_ID_MASK; opcode = @@ -1341,7 +1342,7 @@ static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) } r300->hw.fpt.cmd[R300_FPT_CMD_0] = - cmdpacket0(R300_US_TEX_INST_0, fp->tex.length); + cmdpacket0(R300_US_TEX_INST_0, code->tex.length); } static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) @@ -2405,6 +2406,7 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) GLcontext *ctx = rmesa->radeon.glCtx; struct r300_fragment_program *fp = (struct r300_fragment_program *) (char *)ctx->FragmentProgram._Current; + struct r300_fragment_program_code *code; int i, k; if (!fp) /* should only happenen once, just after context is created */ @@ -2416,62 +2418,63 @@ static void r300SetupPixelShader(r300ContextPtr rmesa) __FUNCTION__); return; } + code = &fp->code; r300SetupTextures(ctx); R300_STATECHANGE(rmesa, fpi[0]); - rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, fp->alu_end + 1); - for (i = 0; i <= fp->alu_end; i++) { - rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst0; + rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_INST_0, code->alu_end + 1); + for (i = 0; i <= code->alu_end; i++) { + rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0; } R300_STATECHANGE(rmesa, fpi[1]); - rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, fp->alu_end + 1); - for (i = 0; i <= fp->alu_end; i++) { - rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst1; + rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_RGB_ADDR_0, code->alu_end + 1); + for (i = 0; i <= code->alu_end; i++) { + rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1; } R300_STATECHANGE(rmesa, fpi[2]); - rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, fp->alu_end + 1); - for (i = 0; i <= fp->alu_end; i++) { - rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst2; + rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_INST_0, code->alu_end + 1); + for (i = 0; i <= code->alu_end; i++) { + rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst2; } R300_STATECHANGE(rmesa, fpi[3]); - rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, fp->alu_end + 1); - for (i = 0; i <= fp->alu_end; i++) { - rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = fp->alu.inst[i].inst3; + rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(R300_US_ALU_ALPHA_ADDR_0, code->alu_end + 1); + for (i = 0; i <= code->alu_end; i++) { + rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst3; } R300_STATECHANGE(rmesa, fp); - rmesa->hw.fp.cmd[R300_FP_CNTL0] = fp->cur_node | (fp->first_node_has_tex << 3); - rmesa->hw.fp.cmd[R300_FP_CNTL1] = fp->max_temp_idx; + rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->cur_node | (code->first_node_has_tex << 3); + rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->max_temp_idx; rmesa->hw.fp.cmd[R300_FP_CNTL2] = - (fp->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | - (fp->alu_end << R300_PFS_CNTL_ALU_END_SHIFT) | - (fp->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | - (fp->tex_end << R300_PFS_CNTL_TEX_END_SHIFT); + (code->alu_offset << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | + (code->alu_end << R300_PFS_CNTL_ALU_END_SHIFT) | + (code->tex_offset << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | + (code->tex_end << R300_PFS_CNTL_TEX_END_SHIFT); /* I just want to say, the way these nodes are stored.. weird.. */ - for (i = 0, k = (4 - (fp->cur_node + 1)); i < 4; i++, k++) { - if (i < (fp->cur_node + 1)) { + for (i = 0, k = (4 - (code->cur_node + 1)); i < 4; i++, k++) { + if (i < (code->cur_node + 1)) { rmesa->hw.fp.cmd[R300_FP_NODE0 + k] = - (fp->node[i].alu_offset << R300_ALU_START_SHIFT) | - (fp->node[i].alu_end << R300_ALU_SIZE_SHIFT) | - (fp->node[i].tex_offset << R300_TEX_START_SHIFT) | - (fp->node[i].tex_end << R300_TEX_SIZE_SHIFT) | - fp->node[i].flags; + (code->node[i].alu_offset << R300_ALU_START_SHIFT) | + (code->node[i].alu_end << R300_ALU_SIZE_SHIFT) | + (code->node[i].tex_offset << R300_TEX_START_SHIFT) | + (code->node[i].tex_end << R300_TEX_SIZE_SHIFT) | + code->node[i].flags; } else { rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0; } } R300_STATECHANGE(rmesa, fpp); - rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, fp->const_nr * 4); - for (i = 0; i < fp->const_nr; i++) { - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(fp->constant[i][0]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(fp->constant[i][1]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(fp->constant[i][2]); - rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(fp->constant[i][3]); + rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(R300_PFS_PARAM_0_X, code->const_nr * 4); + for (i = 0; i < code->const_nr; i++) { + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(code->constant[i][0]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(code->constant[i][1]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(code->constant[i][2]); + rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(code->constant[i][3]); } } -- cgit v1.2.3 From caeabb1ebea33828e956efed19dce46767a068b4 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 13 Jun 2008 22:03:26 +0200 Subject: r300_fragprog: Remove dead declarations, move NOP declarations into source --- src/mesa/drivers/dri/r300/r300_fragprog.c | 20 ++++++++++++++++++++ src/mesa/drivers/dri/r300/r300_fragprog.h | 25 ------------------------- 2 files changed, 20 insertions(+), 25 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index ba88b88f45..819615c141 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -245,6 +245,26 @@ struct r300_pfs_compile_state { #define REG_NEGS(reg) \ reg = (reg | REG_NEGS_MASK) +#define NOP_INST0 ( \ + (R300_ALU_OUTC_MAD) | \ + (R300_ALU_ARGC_ZERO << R300_ALU_ARG0C_SHIFT) | \ + (R300_ALU_ARGC_ZERO << R300_ALU_ARG1C_SHIFT) | \ + (R300_ALU_ARGC_ZERO << R300_ALU_ARG2C_SHIFT)) +#define NOP_INST1 ( \ + ((0 | SRC_CONST) << R300_ALU_SRC0C_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC1C_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC2C_SHIFT)) +#define NOP_INST2 ( \ + (R300_ALU_OUTA_MAD) | \ + (R300_ALU_ARGA_ZERO << R300_ALU_ARG0A_SHIFT) | \ + (R300_ALU_ARGA_ZERO << R300_ALU_ARG1A_SHIFT) | \ + (R300_ALU_ARGA_ZERO << R300_ALU_ARG2A_SHIFT)) +#define NOP_INST3 ( \ + ((0 | SRC_CONST) << R300_ALU_SRC0A_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC1A_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC2A_SHIFT)) + + /* * Datas structures for fragment program generation */ diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index 573aacf19a..561d7c6423 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -41,12 +41,6 @@ #include "r300_context.h" -typedef struct r300_fragment_program_swizzle { - GLuint length; - GLuint src[4]; - GLuint inst[8]; -} r300_fragment_program_swizzle_t; - /* supported hw opcodes */ #define PFS_OP_MAD 0 #define PFS_OP_DP3 1 @@ -74,25 +68,6 @@ typedef struct r300_fragment_program_swizzle { #define SRC_MASK (63 << 0) #define SRC_STRIDE 6 -#define NOP_INST0 ( \ - (R300_ALU_OUTC_MAD) | \ - (R300_ALU_ARGC_ZERO << R300_ALU_ARG0C_SHIFT) | \ - (R300_ALU_ARGC_ZERO << R300_ALU_ARG1C_SHIFT) | \ - (R300_ALU_ARGC_ZERO << R300_ALU_ARG2C_SHIFT)) -#define NOP_INST1 ( \ - ((0 | SRC_CONST) << R300_ALU_SRC0C_SHIFT) | \ - ((0 | SRC_CONST) << R300_ALU_SRC1C_SHIFT) | \ - ((0 | SRC_CONST) << R300_ALU_SRC2C_SHIFT)) -#define NOP_INST2 ( \ - (R300_ALU_OUTA_MAD) | \ - (R300_ALU_ARGA_ZERO << R300_ALU_ARG0A_SHIFT) | \ - (R300_ALU_ARGA_ZERO << R300_ALU_ARG1A_SHIFT) | \ - (R300_ALU_ARGA_ZERO << R300_ALU_ARG2A_SHIFT)) -#define NOP_INST3 ( \ - ((0 | SRC_CONST) << R300_ALU_SRC0A_SHIFT) | \ - ((0 | SRC_CONST) << R300_ALU_SRC1A_SHIFT) | \ - ((0 | SRC_CONST) << R300_ALU_SRC2A_SHIFT)) - #define DRI_CONF_FP_OPTIMIZATION_SPEED 0 #define DRI_CONF_FP_OPTIMIZATION_QUALITY 1 -- cgit v1.2.3 From e2aa45c2f9584ff76151a99b4fcd0ecb56260473 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 13 Jun 2008 22:09:37 +0200 Subject: r300: Do not include r300_fragprog.h from r300_context.h and other cleanups --- src/mesa/drivers/dri/r300/r300_context.h | 1 - src/mesa/drivers/dri/r300/r300_fragprog.c | 34 ++++++++----------------------- src/mesa/drivers/dri/r300/r300_ioctl.c | 1 + src/mesa/drivers/dri/r300/r300_render.c | 1 + src/mesa/drivers/dri/r300/r300_state.c | 1 + 5 files changed, 11 insertions(+), 27 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 05efb813e5..a9b3b061f4 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -73,7 +73,6 @@ typedef struct r300_context *r300ContextPtr; } #include "r300_vertprog.h" -#include "r300_fragprog.h" #include "r500_fragprog.h" /** diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 819615c141..9d7a8c6570 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -445,8 +445,8 @@ static int get_hw_temp(struct r300_pfs_compile_state *cs, int slot) cs->hwtemps[r].vector_valid = 0; cs->hwtemps[r].scalar_valid = 0; - if (r > fp->code.max_temp_idx) - fp->code.max_temp_idx = r; + if (r > code->max_temp_idx) + code->max_temp_idx = r; return r; } @@ -1029,11 +1029,11 @@ static void emit_tex(struct r300_pfs_compile_state *cs, tokens[2] = unit; factor_index = - _mesa_add_state_reference(cs->fp->mesa_program.Base. + _mesa_add_state_reference(fp->mesa_program.Base. Parameters, tokens); factorreg = emit_const4fv(cs, - cs->fp->mesa_program.Base.Parameters-> + fp->mesa_program.Base.Parameters-> ParameterValues[factor_index]); tempreg = keep(get_temp_reg(cs)); @@ -1264,10 +1264,10 @@ static int find_and_prepare_slot(struct r300_pfs_compile_state *cs, return -1; } - fp->code.alu.inst[pos].inst0 = NOP_INST0; - fp->code.alu.inst[pos].inst1 = NOP_INST1; - fp->code.alu.inst[pos].inst2 = NOP_INST2; - fp->code.alu.inst[pos].inst3 = NOP_INST3; + code->alu.inst[pos].inst0 = NOP_INST0; + code->alu.inst[pos].inst1 = NOP_INST1; + code->alu.inst[pos].inst2 = NOP_INST2; + code->alu.inst[pos].inst3 = NOP_INST3; cs->nrslots++; } @@ -1518,24 +1518,6 @@ static void emit_arith(struct r300_pfs_compile_state *cs, return; } -#if 0 -static GLuint get_attrib(struct r300_fragment_program *fp, GLuint attr) -{ - struct gl_fragment_program *mp = &fp->mesa_program; - GLuint r = undef; - - if (!(mp->Base.InputsRead & (1 << attr))) { - ERROR("Attribute %d was not provided!\n", attr); - return undef; - } - - REG_SET_TYPE(r, REG_TYPE_INPUT); - REG_SET_INDEX(r, attr); - REG_SET_VALID(r, GL_TRUE); - return r; -} -#endif - static GLfloat SinCosConsts[2][4] = { { 1.273239545, // 4/PI diff --git a/src/mesa/drivers/dri/r300/r300_ioctl.c b/src/mesa/drivers/dri/r300/r300_ioctl.c index 6af23300f2..71821a01ea 100644 --- a/src/mesa/drivers/dri/r300/r300_ioctl.c +++ b/src/mesa/drivers/dri/r300/r300_ioctl.c @@ -54,6 +54,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_vertprog.h" #include "radeon_reg.h" #include "r300_emit.h" +#include "r300_fragprog.h" #include "vblank.h" diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index fc07105c56..8f74f9d785 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -74,6 +74,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_reg.h" #include "r300_tex.h" #include "r300_emit.h" +#include "r300_fragprog.h" extern int future_hw_tcl_on; /** diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 1dcf9e0cab..e82c3d9681 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -60,6 +60,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "r300_state.h" #include "r300_reg.h" #include "r300_emit.h" +#include "r300_fragprog.h" #include "r300_tex.h" #include "drirenderbuffer.h" -- cgit v1.2.3 From b5170bc9d32530ec93dae4b543d3552e83d6b4a1 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Fri, 13 Jun 2008 23:46:04 +0200 Subject: r300: Add radeon_program and trivial refactoring of r300_fragprog to use it The idea/hope is that radeon_program will serve as an intermediate representation for r3xx up to r6xx fragment and vertex programs. Right now, it is nothing more than a simplistic wrapper around Mesa's prog_instruction, together with the notion of clauses, taken from r6xx docs. The clauses will eventually be used to represent the nodes that are used in r300 family fragment programs. --- src/mesa/drivers/dri/r300/Makefile | 2 + src/mesa/drivers/dri/r300/r300_fragprog.c | 2251 +----------------------- src/mesa/drivers/dri/r300/r300_fragprog.h | 20 + src/mesa/drivers/dri/r300/r300_fragprog_emit.c | 2232 +++++++++++++++++++++++ src/mesa/drivers/dri/r300/radeon_program.c | 151 ++ src/mesa/drivers/dri/r300/radeon_program.h | 110 ++ 6 files changed, 2564 insertions(+), 2202 deletions(-) create mode 100644 src/mesa/drivers/dri/r300/r300_fragprog_emit.c create mode 100644 src/mesa/drivers/dri/r300/radeon_program.c create mode 100644 src/mesa/drivers/dri/r300/radeon_program.h (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 5b2bd0bc2b..7cd5647064 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -37,8 +37,10 @@ DRIVER_SOURCES = \ r300_texmem.c \ r300_tex.c \ r300_texstate.c \ + radeon_program.c \ r300_vertprog.c \ r300_fragprog.c \ + r300_fragprog_emit.c \ r500_fragprog.c \ r300_shader.c \ r300_emit.c \ diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 9d7a8c6570..94cb11afec 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -28,16 +28,14 @@ /** * \file * - * \author Ben Skeggs + * Fragment program compiler. Perform transformations on the intermediate + * \ref radeon_program representation (which is essentially the Mesa + * program representation plus the notion of clauses) until the program + * is in a form where we can translate it more or less directly into + * machine-readable form. * + * \author Ben Skeggs * \author Jerome Glisse - * - * \todo Depth write, WPOS/FOGC inputs - * - * \todo FogOption - * - * \todo Verify results of opcodes for accuracy, I've only checked them in - * specific cases. */ #include "glheader.h" @@ -49,2047 +47,46 @@ #include "r300_context.h" #include "r300_fragprog.h" -#include "r300_reg.h" #include "r300_state.h" -/* Mapping Mesa registers to R300 temporaries */ -struct reg_acc { - int reg; /* Assigned hw temp */ - unsigned int refcount; /* Number of uses by mesa program */ -}; - -/** - * Describe the current lifetime information for an R300 temporary - */ -struct reg_lifetime { - /* Index of the first slot where this register is free in the sense - that it can be used as a new destination register. - This is -1 if the register has been assigned to a Mesa register - and the last access to the register has not yet been emitted */ - int free; - - /* Index of the first slot where this register is currently reserved. - This is used to stop e.g. a scalar operation from being moved - before the allocation time of a register that was first allocated - for a vector operation. */ - int reserved; - - /* Index of the first slot in which the register can be used as a - source without losing the value that is written by the last - emitted instruction that writes to the register */ - int vector_valid; - int scalar_valid; - - /* Index to the slot where the register was last read. - This is also the first slot in which the register may be written again */ - int vector_lastread; - int scalar_lastread; -}; - -/** - * Store usage information about an ALU instruction slot during the - * compilation of a fragment program. - */ -#define SLOT_SRC_VECTOR (1<<0) -#define SLOT_SRC_SCALAR (1<<3) -#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) -#define SLOT_OP_VECTOR (1<<16) -#define SLOT_OP_SCALAR (1<<17) -#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) - -struct r300_pfs_compile_slot { - /* Bitmask indicating which parts of the slot are used, using SLOT_ constants - defined above */ - unsigned int used; - - /* Selected sources */ - int vsrc[3]; - int ssrc[3]; -}; - -/** - * Store information during compilation of fragment programs. - */ -struct r300_pfs_compile_state { - r300ContextPtr r300; - struct r300_fragment_program *fp; - - int nrslots; /* number of ALU slots used so far */ - - /* Track which (parts of) slots are already filled with instructions */ - struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST]; - - /* Track the validity of R300 temporaries */ - struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; - - /* Used to map Mesa's inputs/temps onto hardware temps */ - int temp_in_use; - struct reg_acc temps[PFS_NUM_TEMP_REGS]; - struct reg_acc inputs[32]; /* don't actually need 32... */ - - /* Track usage of hardware temps, for register allocation, - * indirection detection, etc. */ - GLuint used_in_node; - GLuint dest_in_node; -}; - - -/* - * Usefull macros and values - */ -#define ERROR(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ - fp->error = GL_TRUE; \ - } while(0) - -#define PFS_INVAL 0xFFFFFFFF -#define COMPILE_STATE \ - struct r300_fragment_program *fp = cs->fp; \ - struct r300_fragment_program_code *code = &fp->code; \ - (void)code - -#define SWIZZLE_XYZ 0 -#define SWIZZLE_XXX 1 -#define SWIZZLE_YYY 2 -#define SWIZZLE_ZZZ 3 -#define SWIZZLE_WWW 4 -#define SWIZZLE_YZX 5 -#define SWIZZLE_ZXY 6 -#define SWIZZLE_WZY 7 -#define SWIZZLE_111 8 -#define SWIZZLE_000 9 -#define SWIZZLE_HHH 10 - -#define swizzle(r, x, y, z, w) do_swizzle(cs, r, \ - ((SWIZZLE_##x<<0)| \ - (SWIZZLE_##y<<3)| \ - (SWIZZLE_##z<<6)| \ - (SWIZZLE_##w<<9)), \ - 0) - -#define REG_TYPE_INPUT 0 -#define REG_TYPE_OUTPUT 1 -#define REG_TYPE_TEMP 2 -#define REG_TYPE_CONST 3 - -#define REG_TYPE_SHIFT 0 -#define REG_INDEX_SHIFT 2 -#define REG_VSWZ_SHIFT 8 -#define REG_SSWZ_SHIFT 13 -#define REG_NEGV_SHIFT 18 -#define REG_NEGS_SHIFT 19 -#define REG_ABS_SHIFT 20 -#define REG_NO_USE_SHIFT 21 // Hack for refcounting -#define REG_VALID_SHIFT 22 // Does the register contain a defined value? -#define REG_BUILTIN_SHIFT 23 // Is it a builtin (like all zero/all one)? - -#define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT) -#define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT) -#define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT) -#define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT) -#define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT) -#define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT) -#define REG_ABS_MASK (0x01 << REG_ABS_SHIFT) -#define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT) -#define REG_VALID_MASK (0x01 << REG_VALID_SHIFT) -#define REG_BUILTIN_MASK (0x01 << REG_BUILTIN_SHIFT) - -#define REG(type, index, vswz, sswz, nouse, valid, builtin) \ - (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \ - ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \ - ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \ - ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \ - ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) | \ - ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \ - ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) -#define REG_GET_TYPE(reg) \ - ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT) -#define REG_GET_INDEX(reg) \ - ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT) -#define REG_GET_VSWZ(reg) \ - ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT) -#define REG_GET_SSWZ(reg) \ - ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT) -#define REG_GET_NO_USE(reg) \ - ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT) -#define REG_GET_VALID(reg) \ - ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT) -#define REG_GET_BUILTIN(reg) \ - ((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT) -#define REG_SET_TYPE(reg, type) \ - reg = ((reg & ~REG_TYPE_MASK) | \ - ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK)) -#define REG_SET_INDEX(reg, index) \ - reg = ((reg & ~REG_INDEX_MASK) | \ - ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK)) -#define REG_SET_VSWZ(reg, vswz) \ - reg = ((reg & ~REG_VSWZ_MASK) | \ - ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK)) -#define REG_SET_SSWZ(reg, sswz) \ - reg = ((reg & ~REG_SSWZ_MASK) | \ - ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) -#define REG_SET_NO_USE(reg, nouse) \ - reg = ((reg & ~REG_NO_USE_MASK) | \ - ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK)) -#define REG_SET_VALID(reg, valid) \ - reg = ((reg & ~REG_VALID_MASK) | \ - ((valid << REG_VALID_SHIFT) & REG_VALID_MASK)) -#define REG_SET_BUILTIN(reg, builtin) \ - reg = ((reg & ~REG_BUILTIN_MASK) | \ - ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK)) -#define REG_ABS(reg) \ - reg = (reg | REG_ABS_MASK) -#define REG_NEGV(reg) \ - reg = (reg | REG_NEGV_MASK) -#define REG_NEGS(reg) \ - reg = (reg | REG_NEGS_MASK) - -#define NOP_INST0 ( \ - (R300_ALU_OUTC_MAD) | \ - (R300_ALU_ARGC_ZERO << R300_ALU_ARG0C_SHIFT) | \ - (R300_ALU_ARGC_ZERO << R300_ALU_ARG1C_SHIFT) | \ - (R300_ALU_ARGC_ZERO << R300_ALU_ARG2C_SHIFT)) -#define NOP_INST1 ( \ - ((0 | SRC_CONST) << R300_ALU_SRC0C_SHIFT) | \ - ((0 | SRC_CONST) << R300_ALU_SRC1C_SHIFT) | \ - ((0 | SRC_CONST) << R300_ALU_SRC2C_SHIFT)) -#define NOP_INST2 ( \ - (R300_ALU_OUTA_MAD) | \ - (R300_ALU_ARGA_ZERO << R300_ALU_ARG0A_SHIFT) | \ - (R300_ALU_ARGA_ZERO << R300_ALU_ARG1A_SHIFT) | \ - (R300_ALU_ARGA_ZERO << R300_ALU_ARG2A_SHIFT)) -#define NOP_INST3 ( \ - ((0 | SRC_CONST) << R300_ALU_SRC0A_SHIFT) | \ - ((0 | SRC_CONST) << R300_ALU_SRC1A_SHIFT) | \ - ((0 | SRC_CONST) << R300_ALU_SRC2A_SHIFT)) - - -/* - * Datas structures for fragment program generation - */ - -/* description of r300 native hw instructions */ -static const struct { - const char *name; - int argc; - int v_op; - int s_op; -} r300_fpop[] = { - /* *INDENT-OFF* */ - {"MAD", 3, R300_ALU_OUTC_MAD, R300_ALU_OUTA_MAD}, - {"DP3", 2, R300_ALU_OUTC_DP3, R300_ALU_OUTA_DP4}, - {"DP4", 2, R300_ALU_OUTC_DP4, R300_ALU_OUTA_DP4}, - {"MIN", 2, R300_ALU_OUTC_MIN, R300_ALU_OUTA_MIN}, - {"MAX", 2, R300_ALU_OUTC_MAX, R300_ALU_OUTA_MAX}, - {"CMP", 3, R300_ALU_OUTC_CMP, R300_ALU_OUTA_CMP}, - {"FRC", 1, R300_ALU_OUTC_FRC, R300_ALU_OUTA_FRC}, - {"EX2", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_EX2}, - {"LG2", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_LG2}, - {"RCP", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_RCP}, - {"RSQ", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_RSQ}, - {"REPL_ALPHA", 1, R300_ALU_OUTC_REPL_ALPHA, PFS_INVAL}, - {"CMPH", 3, R300_ALU_OUTC_CMPH, PFS_INVAL}, - /* *INDENT-ON* */ -}; - -/* vector swizzles r300 can support natively, with a couple of - * cases we handle specially - * - * REG_VSWZ/REG_SSWZ is an index into this table - */ - -/* mapping from SWIZZLE_* to r300 native values for scalar insns */ -#define SWIZZLE_HALF 6 - -#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \ - SWIZZLE_##y, \ - SWIZZLE_##z, \ - SWIZZLE_ZERO)) -/* native swizzles */ -static const struct r300_pfs_swizzle { - GLuint hash; /* swizzle value this matches */ - GLuint base; /* base value for hw swizzle */ - GLuint stride; /* difference in base between arg0/1/2 */ - GLuint flags; -} v_swiz[] = { - /* *INDENT-OFF* */ - {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, SLOT_SRC_SCALAR}, - {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR}, - {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH}, - {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0}, - {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0}, - {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}, - {PFS_INVAL, 0, 0, 0}, - /* *INDENT-ON* */ -}; - -/* used during matching of non-native swizzles */ -#define SWZ_X_MASK (7 << 0) -#define SWZ_Y_MASK (7 << 3) -#define SWZ_Z_MASK (7 << 6) -#define SWZ_W_MASK (7 << 9) -static const struct { - GLuint hash; /* used to mask matching swizzle components */ - int mask; /* actual outmask */ - int count; /* count of components matched */ -} s_mask[] = { - /* *INDENT-OFF* */ - {SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK, 1 | 2 | 4, 3}, - {SWZ_X_MASK | SWZ_Y_MASK, 1 | 2, 2}, - {SWZ_X_MASK | SWZ_Z_MASK, 1 | 4, 2}, - {SWZ_Y_MASK | SWZ_Z_MASK, 2 | 4, 2}, - {SWZ_X_MASK, 1, 1}, - {SWZ_Y_MASK, 2, 1}, - {SWZ_Z_MASK, 4, 1}, - {PFS_INVAL, PFS_INVAL, PFS_INVAL} - /* *INDENT-ON* */ -}; - -static const struct { - int base; /* hw value of swizzle */ - int stride; /* difference between SRC0/1/2 */ - GLuint flags; -} s_swiz[] = { - /* *INDENT-OFF* */ - {R300_ALU_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR}, - {R300_ALU_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR}, - {R300_ALU_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR}, - {R300_ALU_ARGA_SRC0A, 1, SLOT_SRC_SCALAR}, - {R300_ALU_ARGA_ZERO, 0, 0}, - {R300_ALU_ARGA_ONE, 0, 0}, - {R300_ALU_ARGA_HALF, 0, 0} - /* *INDENT-ON* */ -}; - -/* boiler-plate reg, for convenience */ -static const GLuint undef = REG(REG_TYPE_TEMP, - 0, - SWIZZLE_XYZ, - SWIZZLE_W, - GL_FALSE, - GL_FALSE, - GL_FALSE); - -/* constant one source */ -static const GLuint pfs_one = REG(REG_TYPE_CONST, - 0, - SWIZZLE_111, - SWIZZLE_ONE, - GL_FALSE, - GL_TRUE, - GL_TRUE); - -/* constant half source */ -static const GLuint pfs_half = REG(REG_TYPE_CONST, - 0, - SWIZZLE_HHH, - SWIZZLE_HALF, - GL_FALSE, - GL_TRUE, - GL_TRUE); - -/* constant zero source */ -static const GLuint pfs_zero = REG(REG_TYPE_CONST, - 0, - SWIZZLE_000, - SWIZZLE_ZERO, - GL_FALSE, - GL_TRUE, - GL_TRUE); - -/* - * Common functions prototypes - */ -static void dump_program(struct r300_fragment_program *fp, - struct r300_fragment_program_code *code); -static void emit_arith(struct r300_pfs_compile_state *cs, int op, - GLuint dest, int mask, - GLuint src0, GLuint src1, GLuint src2, int flags); - -/** - * Get an R300 temporary that can be written to in the given slot. - */ -static int get_hw_temp(struct r300_pfs_compile_state *cs, int slot) -{ - COMPILE_STATE; - int r; - - for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { - if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot) - break; - } - - if (r >= PFS_NUM_TEMP_REGS) { - ERROR("Out of hardware temps\n"); - return 0; - } - // Reserved is used to avoid the following scenario: - // R300 temporary X is first assigned to Mesa temporary Y during vector ops - // R300 temporary X is then assigned to Mesa temporary Z for further vector ops - // Then scalar ops on Mesa temporary Z are emitted and move back in time - // to overwrite the value of temporary Y. - // End scenario. - cs->hwtemps[r].reserved = cs->hwtemps[r].free; - cs->hwtemps[r].free = -1; - - // Reset to some value that won't mess things up when the user - // tries to read from a temporary that hasn't been assigned a value yet. - // In the normal case, vector_valid and scalar_valid should be set to - // a sane value by the first emit that writes to this temporary. - cs->hwtemps[r].vector_valid = 0; - cs->hwtemps[r].scalar_valid = 0; - - if (r > code->max_temp_idx) - code->max_temp_idx = r; - - return r; -} - -/** - * Get an R300 temporary that will act as a TEX destination register. - */ -static int get_hw_temp_tex(struct r300_pfs_compile_state *cs) -{ - COMPILE_STATE; - int r; - - for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { - if (cs->used_in_node & (1 << r)) - continue; - - // Note: Be very careful here - if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0) - break; - } - - if (r >= PFS_NUM_TEMP_REGS) - return get_hw_temp(cs, 0); /* Will cause an indirection */ - - cs->hwtemps[r].reserved = cs->hwtemps[r].free; - cs->hwtemps[r].free = -1; - - // Reset to some value that won't mess things up when the user - // tries to read from a temporary that hasn't been assigned a value yet. - // In the normal case, vector_valid and scalar_valid should be set to - // a sane value by the first emit that writes to this temporary. - cs->hwtemps[r].vector_valid = cs->nrslots; - cs->hwtemps[r].scalar_valid = cs->nrslots; - - if (r > code->max_temp_idx) - code->max_temp_idx = r; - - return r; -} - -/** - * Mark the given hardware register as free. - */ -static void free_hw_temp(struct r300_pfs_compile_state *cs, int idx) -{ - // Be very careful here. Consider sequences like - // MAD r0, r1,r2,r3 - // TEX r4, ... - // The TEX instruction may be moved in front of the MAD instruction - // due to the way nodes work. We don't want to alias r1 and r4 in - // this case. - // I'm certain the register allocation could be further sanitized, - // but it's tricky because of stuff that can happen inside emit_tex - // and emit_arith. - cs->hwtemps[idx].free = cs->nrslots + 1; -} - -/** - * Create a new Mesa temporary register. - */ -static GLuint get_temp_reg(struct r300_pfs_compile_state *cs) -{ - COMPILE_STATE; - GLuint r = undef; - GLuint index; - - index = ffs(~cs->temp_in_use); - if (!index) { - ERROR("Out of program temps\n"); - return r; - } - - cs->temp_in_use |= (1 << --index); - cs->temps[index].refcount = 0xFFFFFFFF; - cs->temps[index].reg = -1; - - REG_SET_TYPE(r, REG_TYPE_TEMP); - REG_SET_INDEX(r, index); - REG_SET_VALID(r, GL_TRUE); - return r; -} - -/** - * Create a new Mesa temporary register that will act as the destination - * register for a texture read. - */ -static GLuint get_temp_reg_tex(struct r300_pfs_compile_state *cs) -{ - COMPILE_STATE; - GLuint r = undef; - GLuint index; - - index = ffs(~cs->temp_in_use); - if (!index) { - ERROR("Out of program temps\n"); - return r; - } - - cs->temp_in_use |= (1 << --index); - cs->temps[index].refcount = 0xFFFFFFFF; - cs->temps[index].reg = get_hw_temp_tex(cs); - - REG_SET_TYPE(r, REG_TYPE_TEMP); - REG_SET_INDEX(r, index); - REG_SET_VALID(r, GL_TRUE); - return r; -} - -/** - * Free a Mesa temporary and the associated R300 temporary. - */ -static void free_temp(struct r300_pfs_compile_state *cs, GLuint r) -{ - GLuint index = REG_GET_INDEX(r); - - if (!(cs->temp_in_use & (1 << index))) - return; - - if (REG_GET_TYPE(r) == REG_TYPE_TEMP) { - free_hw_temp(cs, cs->temps[index].reg); - cs->temps[index].reg = -1; - cs->temp_in_use &= ~(1 << index); - } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) { - free_hw_temp(cs, cs->inputs[index].reg); - cs->inputs[index].reg = -1; - } -} - -/** - * Emit a hardware constant/parameter. - * - * \p cp Stable pointer to an array of 4 floats. - * The pointer must be stable in the sense that it remains to be valid - * and hold the contents of the constant/parameter throughout the lifetime - * of the fragment program (actually, up until the next time the fragment - * program is translated). - */ -static GLuint emit_const4fv(struct r300_pfs_compile_state *cs, - const GLfloat * cp) -{ - COMPILE_STATE; - GLuint reg = undef; - int index; - for (index = 0; index < code->const_nr; ++index) { - if (code->constant[index] == cp) - break; - } - - if (index >= code->const_nr) { - if (index >= PFS_NUM_CONST_REGS) { - ERROR("Out of hw constants!\n"); - return reg; - } - - code->const_nr++; - code->constant[index] = cp; - } - - REG_SET_TYPE(reg, REG_TYPE_CONST); - REG_SET_INDEX(reg, index); - REG_SET_VALID(reg, GL_TRUE); - return reg; -} - -static inline GLuint negate(GLuint r) -{ - REG_NEGS(r); - REG_NEGV(r); - return r; -} - -/* Hack, to prevent clobbering sources used multiple times when - * emulating non-native instructions - */ -static inline GLuint keep(GLuint r) -{ - REG_SET_NO_USE(r, GL_TRUE); - return r; -} - -static inline GLuint absolute(GLuint r) -{ - REG_ABS(r); - return r; -} - -static int swz_native(struct r300_pfs_compile_state *cs, - GLuint src, GLuint * r, GLuint arbneg) -{ - COMPILE_STATE; - - /* Native swizzle, handle negation */ - src = (src & ~REG_NEGS_MASK) | (((arbneg >> 3) & 1) << REG_NEGS_SHIFT); - - if ((arbneg & 0x7) == 0x0) { - src = src & ~REG_NEGV_MASK; - *r = src; - } else if ((arbneg & 0x7) == 0x7) { - src |= REG_NEGV_MASK; - *r = src; - } else { - if (!REG_GET_VALID(*r)) - *r = get_temp_reg(cs); - src |= REG_NEGV_MASK; - emit_arith(cs, - PFS_OP_MAD, - *r, arbneg & 0x7, keep(src), pfs_one, pfs_zero, 0); - src = src & ~REG_NEGV_MASK; - emit_arith(cs, - PFS_OP_MAD, - *r, - (arbneg ^ 0x7) | WRITEMASK_W, - src, pfs_one, pfs_zero, 0); - } - - return 3; -} - -static int swz_emit_partial(struct r300_pfs_compile_state *cs, - GLuint src, - GLuint * r, int mask, int mc, GLuint arbneg) -{ - COMPILE_STATE; - GLuint tmp; - GLuint wmask = 0; - - if (!REG_GET_VALID(*r)) - *r = get_temp_reg(cs); - - /* A partial match, VSWZ/mask define what parts of the - * desired swizzle we match - */ - if (mc + s_mask[mask].count == 3) { - wmask = WRITEMASK_W; - src |= ((arbneg >> 3) & 1) << REG_NEGS_SHIFT; - } - - tmp = arbneg & s_mask[mask].mask; - if (tmp) { - tmp = tmp ^ s_mask[mask].mask; - if (tmp) { - emit_arith(cs, - PFS_OP_MAD, - *r, - arbneg & s_mask[mask].mask, - keep(src) | REG_NEGV_MASK, - pfs_one, pfs_zero, 0); - if (!wmask) { - REG_SET_NO_USE(src, GL_TRUE); - } else { - REG_SET_NO_USE(src, GL_FALSE); - } - emit_arith(cs, - PFS_OP_MAD, - *r, tmp | wmask, src, pfs_one, pfs_zero, 0); - } else { - if (!wmask) { - REG_SET_NO_USE(src, GL_TRUE); - } else { - REG_SET_NO_USE(src, GL_FALSE); - } - emit_arith(cs, - PFS_OP_MAD, - *r, - (arbneg & s_mask[mask].mask) | wmask, - src | REG_NEGV_MASK, pfs_one, pfs_zero, 0); - } - } else { - if (!wmask) { - REG_SET_NO_USE(src, GL_TRUE); - } else { - REG_SET_NO_USE(src, GL_FALSE); - } - emit_arith(cs, PFS_OP_MAD, - *r, - s_mask[mask].mask | wmask, - src, pfs_one, pfs_zero, 0); - } - - return s_mask[mask].count; -} - -static GLuint do_swizzle(struct r300_pfs_compile_state *cs, - GLuint src, GLuint arbswz, GLuint arbneg) -{ - COMPILE_STATE; - GLuint r = undef; - GLuint vswz; - int c_mask = 0; - int v_match = 0; - - /* If swizzling from something without an XYZW native swizzle, - * emit result to a temp, and do new swizzle from the temp. - */ -#if 0 - if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { - GLuint temp = get_temp_reg(fp); - emit_arith(fp, - PFS_OP_MAD, - temp, WRITEMASK_XYZW, src, pfs_one, pfs_zero, 0); - src = temp; - } -#endif - - if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { - GLuint vsrcswz = - (v_swiz[REG_GET_VSWZ(src)]. - hash & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK)) | - REG_GET_SSWZ(src) << 9; - GLint i; - - GLuint newswz = 0; - GLuint offset; - for (i = 0; i < 4; ++i) { - offset = GET_SWZ(arbswz, i); - - newswz |= - (offset <= 3) ? GET_SWZ(vsrcswz, - offset) << i * - 3 : offset << i * 3; - } - - arbswz = newswz & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK); - REG_SET_SSWZ(src, GET_SWZ(newswz, 3)); - } else { - /* set scalar swizzling */ - REG_SET_SSWZ(src, GET_SWZ(arbswz, 3)); - - } - do { - vswz = REG_GET_VSWZ(src); - do { - int chash; - - REG_SET_VSWZ(src, vswz); - chash = v_swiz[REG_GET_VSWZ(src)].hash & - s_mask[c_mask].hash; - - if (chash == (arbswz & s_mask[c_mask].hash)) { - if (s_mask[c_mask].count == 3) { - v_match += swz_native(cs, - src, &r, arbneg); - } else { - v_match += swz_emit_partial(cs, - src, - &r, - c_mask, - v_match, - arbneg); - } - - if (v_match == 3) - return r; - - /* Fill with something invalid.. all 0's was - * wrong before, matched SWIZZLE_X. So all - * 1's will be okay for now - */ - arbswz |= (PFS_INVAL & s_mask[c_mask].hash); - } - } while (v_swiz[++vswz].hash != PFS_INVAL); - REG_SET_VSWZ(src, SWIZZLE_XYZ); - } while (s_mask[++c_mask].hash != PFS_INVAL); - - ERROR("should NEVER get here\n"); - return r; -} - -static GLuint t_src(struct r300_pfs_compile_state *cs, - struct prog_src_register fpsrc) -{ - COMPILE_STATE; - GLuint r = undef; - - switch (fpsrc.File) { - case PROGRAM_TEMPORARY: - REG_SET_INDEX(r, fpsrc.Index); - REG_SET_VALID(r, GL_TRUE); - REG_SET_TYPE(r, REG_TYPE_TEMP); - break; - case PROGRAM_INPUT: - REG_SET_INDEX(r, fpsrc.Index); - REG_SET_VALID(r, GL_TRUE); - REG_SET_TYPE(r, REG_TYPE_INPUT); - break; - case PROGRAM_LOCAL_PARAM: - r = emit_const4fv(cs, - fp->mesa_program.Base.LocalParams[fpsrc. - Index]); - break; - case PROGRAM_ENV_PARAM: - r = emit_const4fv(cs, - cs->r300->radeon.glCtx->FragmentProgram.Parameters[fpsrc.Index]); - break; - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - r = emit_const4fv(cs, - fp->mesa_program.Base.Parameters-> - ParameterValues[fpsrc.Index]); - break; - default: - ERROR("unknown SrcReg->File %x\n", fpsrc.File); - return r; - } - - /* no point swizzling ONE/ZERO/HALF constants... */ - if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO) - r = do_swizzle(cs, r, fpsrc.Swizzle, fpsrc.NegateBase); - return r; -} - -static GLuint t_scalar_src(struct r300_pfs_compile_state *cs, - struct prog_src_register fpsrc) -{ - struct prog_src_register src = fpsrc; - int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */ - - src.Swizzle = ((sc << 0) | (sc << 3) | (sc << 6) | (sc << 9)); - - return t_src(cs, src); -} - -static GLuint t_dst(struct r300_pfs_compile_state *cs, - struct prog_dst_register dest) -{ - COMPILE_STATE; - GLuint r = undef; - - switch (dest.File) { - case PROGRAM_TEMPORARY: - REG_SET_INDEX(r, dest.Index); - REG_SET_VALID(r, GL_TRUE); - REG_SET_TYPE(r, REG_TYPE_TEMP); - return r; - case PROGRAM_OUTPUT: - REG_SET_TYPE(r, REG_TYPE_OUTPUT); - switch (dest.Index) { - case FRAG_RESULT_COLR: - case FRAG_RESULT_DEPR: - REG_SET_INDEX(r, dest.Index); - REG_SET_VALID(r, GL_TRUE); - return r; - default: - ERROR("Bad DstReg->Index 0x%x\n", dest.Index); - return r; - } - default: - ERROR("Bad DstReg->File 0x%x\n", dest.File); - return r; - } -} - -static int t_hw_src(struct r300_pfs_compile_state *cs, GLuint src, GLboolean tex) -{ - COMPILE_STATE; - int idx; - int index = REG_GET_INDEX(src); - - switch (REG_GET_TYPE(src)) { - case REG_TYPE_TEMP: - /* NOTE: if reg==-1 here, a source is being read that - * hasn't been written to. Undefined results. - */ - if (cs->temps[index].reg == -1) - cs->temps[index].reg = get_hw_temp(cs, cs->nrslots); - - idx = cs->temps[index].reg; - - if (!REG_GET_NO_USE(src) && (--cs->temps[index].refcount == 0)) - free_temp(cs, src); - break; - case REG_TYPE_INPUT: - idx = cs->inputs[index].reg; - - if (!REG_GET_NO_USE(src) && (--cs->inputs[index].refcount == 0)) - free_hw_temp(cs, cs->inputs[index].reg); - break; - case REG_TYPE_CONST: - return (index | SRC_CONST); - default: - ERROR("Invalid type for source reg\n"); - return (0 | SRC_CONST); - } - - if (!tex) - cs->used_in_node |= (1 << idx); - - return idx; -} - -static int t_hw_dst(struct r300_pfs_compile_state *cs, - GLuint dest, GLboolean tex, int slot) -{ - COMPILE_STATE; - int idx; - GLuint index = REG_GET_INDEX(dest); - assert(REG_GET_VALID(dest)); - - switch (REG_GET_TYPE(dest)) { - case REG_TYPE_TEMP: - if (cs->temps[REG_GET_INDEX(dest)].reg == -1) { - if (!tex) { - cs->temps[index].reg = get_hw_temp(cs, slot); - } else { - cs->temps[index].reg = get_hw_temp_tex(cs); - } - } - idx = cs->temps[index].reg; - - if (!REG_GET_NO_USE(dest) && (--cs->temps[index].refcount == 0)) - free_temp(cs, dest); - - cs->dest_in_node |= (1 << idx); - cs->used_in_node |= (1 << idx); - break; - case REG_TYPE_OUTPUT: - switch (index) { - case FRAG_RESULT_COLR: - code->node[code->cur_node].flags |= R300_RGBA_OUT; - break; - case FRAG_RESULT_DEPR: - fp->WritesDepth = GL_TRUE; - code->node[code->cur_node].flags |= R300_W_OUT; - break; - } - return index; - break; - default: - ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); - return 0; - } - - return idx; -} - -static void emit_nop(struct r300_pfs_compile_state *cs) -{ - COMPILE_STATE; - - if (cs->nrslots >= PFS_MAX_ALU_INST) { - ERROR("Out of ALU instruction slots\n"); - return; - } - - code->alu.inst[cs->nrslots].inst0 = NOP_INST0; - code->alu.inst[cs->nrslots].inst1 = NOP_INST1; - code->alu.inst[cs->nrslots].inst2 = NOP_INST2; - code->alu.inst[cs->nrslots].inst3 = NOP_INST3; - cs->nrslots++; -} - -static void emit_tex(struct r300_pfs_compile_state *cs, - struct prog_instruction *fpi, int opcode) +static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp) { - COMPILE_STATE; - GLuint coord = t_src(cs, fpi->SrcReg[0]); - GLuint dest = undef, rdest = undef; - GLuint din, uin; - int unit = fpi->TexSrcUnit; - int hwsrc, hwdest; - GLuint tempreg = 0; - - /** - * Hardware uses [0..1]x[0..1] range for rectangle textures - * instead of [0..Width]x[0..Height]. - * Add a scaling instruction. - * - * \todo Refactor this once we have proper rewriting/optimization - * support for programs. - */ - if (opcode != R300_TEX_OP_KIL && fpi->TexSrcTarget == TEXTURE_RECT_INDEX) { - gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, - 0 - }; - int factor_index; - GLuint factorreg; - - tokens[2] = unit; - factor_index = - _mesa_add_state_reference(fp->mesa_program.Base. - Parameters, tokens); - factorreg = - emit_const4fv(cs, - fp->mesa_program.Base.Parameters-> - ParameterValues[factor_index]); - tempreg = keep(get_temp_reg(cs)); - - emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, - coord, factorreg, pfs_zero, 0); - - coord = tempreg; - } - - /* Texture operations do not support swizzles etc. in hardware, - * so emit an additional arithmetic operation if necessary. - */ - if (REG_GET_VSWZ(coord) != SWIZZLE_XYZ || - REG_GET_SSWZ(coord) != SWIZZLE_W || - coord & (REG_NEGV_MASK | REG_NEGS_MASK | REG_ABS_MASK)) { - assert(tempreg == 0); - tempreg = keep(get_temp_reg(cs)); - emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, - coord, pfs_one, pfs_zero, 0); - coord = tempreg; - } - - /* Ensure correct node indirection */ - uin = cs->used_in_node; - din = cs->dest_in_node; - - /* Resolve source/dest to hardware registers */ - hwsrc = t_hw_src(cs, coord, GL_TRUE); - - if (opcode != R300_TEX_OP_KIL) { - dest = t_dst(cs, fpi->DstReg); - - /* r300 doesn't seem to be able to do TEX->output reg */ - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - rdest = dest; - dest = get_temp_reg_tex(cs); - } else if (fpi->DstReg.WriteMask != WRITEMASK_XYZW) { - /* in case write mask isn't XYZW */ - rdest = dest; - dest = get_temp_reg_tex(cs); - } - hwdest = - t_hw_dst(cs, dest, GL_TRUE, - code->node[code->cur_node].alu_offset); - - /* Use a temp that hasn't been used in this node, rather - * than causing an indirection - */ - if (uin & (1 << hwdest)) { - free_hw_temp(cs, hwdest); - hwdest = get_hw_temp_tex(cs); - cs->temps[REG_GET_INDEX(dest)].reg = hwdest; - } - } else { - hwdest = 0; - unit = 0; - } - - /* Indirection if source has been written in this node, or if the - * dest has been read/written in this node - */ - if ((REG_GET_TYPE(coord) != REG_TYPE_CONST && - (din & (1 << hwsrc))) || (uin & (1 << hwdest))) { - - /* Finish off current node */ - if (code->node[code->cur_node].alu_offset == cs->nrslots) - emit_nop(cs); - - code->node[code->cur_node].alu_end = - cs->nrslots - code->node[code->cur_node].alu_offset - 1; - assert(code->node[code->cur_node].alu_end >= 0); - - if (++code->cur_node >= PFS_MAX_TEX_INDIRECT) { - ERROR("too many levels of texture indirection\n"); - return; - } - - /* Start new node */ - code->node[code->cur_node].tex_offset = code->tex.length; - code->node[code->cur_node].alu_offset = cs->nrslots; - code->node[code->cur_node].tex_end = -1; - code->node[code->cur_node].alu_end = -1; - code->node[code->cur_node].flags = 0; - cs->used_in_node = 0; - cs->dest_in_node = 0; - } - - if (code->cur_node == 0) - code->first_node_has_tex = 1; - - code->tex.inst[code->tex.length++] = 0 | (hwsrc << R300_SRC_ADDR_SHIFT) - | (hwdest << R300_DST_ADDR_SHIFT) - | (unit << R300_TEX_ID_SHIFT) - | (opcode << R300_TEX_INST_SHIFT); - - cs->dest_in_node |= (1 << hwdest); - if (REG_GET_TYPE(coord) != REG_TYPE_CONST) - cs->used_in_node |= (1 << hwsrc); - - code->node[code->cur_node].tex_end++; - - /* Copy from temp to output if needed */ - if (REG_GET_VALID(rdest)) { - emit_arith(cs, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest, - pfs_one, pfs_zero, 0); - free_temp(cs, dest); - } + struct gl_fragment_program *mp = &fp->mesa_program; - /* Free temp register */ - if (tempreg != 0) - free_temp(cs, tempreg); + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (mp->Base.Parameters) + _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); } -/** - * Returns the first slot where we could possibly allow writing to dest, - * according to register allocation. - */ -static int get_earliest_allowed_write(struct r300_pfs_compile_state *cs, - GLuint dest, int mask) -{ - COMPILE_STATE; - int idx; - int pos; - GLuint index = REG_GET_INDEX(dest); - assert(REG_GET_VALID(dest)); - - switch (REG_GET_TYPE(dest)) { - case REG_TYPE_TEMP: - if (cs->temps[index].reg == -1) - return 0; - - idx = cs->temps[index].reg; - break; - case REG_TYPE_OUTPUT: - return 0; - default: - ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); - return 0; - } - - pos = cs->hwtemps[idx].reserved; - if (mask & WRITEMASK_XYZ) { - if (pos < cs->hwtemps[idx].vector_lastread) - pos = cs->hwtemps[idx].vector_lastread; - } - if (mask & WRITEMASK_W) { - if (pos < cs->hwtemps[idx].scalar_lastread) - pos = cs->hwtemps[idx].scalar_lastread; - } - - return pos; -} /** - * Allocates a slot for an ALU instruction that can consist of - * a vertex part or a scalar part or both. + * Transform the program to support fragment.position. * - * Sources from src (src[0] to src[argc-1]) are added to the slot in the - * appropriate position (vector and/or scalar), and their positions are - * recorded in the srcpos array. + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the FRAG_ATTRIB_WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. * - * This function emits instruction code for the source fetch and the - * argument selection. It does not emit instruction code for the - * opcode or the destination selection. - * - * @return the index of the slot - */ -static int find_and_prepare_slot(struct r300_pfs_compile_state *cs, - GLboolean emit_vop, - GLboolean emit_sop, - int argc, GLuint * src, GLuint dest, int mask) -{ - COMPILE_STATE; - int hwsrc[3]; - int srcpos[3]; - unsigned int used; - int tempused; - int tempvsrc[3]; - int tempssrc[3]; - int pos; - int regnr; - int i, j; - - // Determine instruction slots, whether sources are required on - // vector or scalar side, and the smallest slot number where - // all source registers are available - used = 0; - if (emit_vop) - used |= SLOT_OP_VECTOR; - if (emit_sop) - used |= SLOT_OP_SCALAR; - - pos = get_earliest_allowed_write(cs, dest, mask); - - if (code->node[code->cur_node].alu_offset > pos) - pos = code->node[code->cur_node].alu_offset; - for (i = 0; i < argc; ++i) { - if (!REG_GET_BUILTIN(src[i])) { - if (emit_vop) - used |= v_swiz[REG_GET_VSWZ(src[i])].flags << i; - if (emit_sop) - used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i; - } - - hwsrc[i] = t_hw_src(cs, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ - regnr = hwsrc[i] & 31; - - if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { - if (used & (SLOT_SRC_VECTOR << i)) { - if (cs->hwtemps[regnr].vector_valid > pos) - pos = cs->hwtemps[regnr].vector_valid; - } - if (used & (SLOT_SRC_SCALAR << i)) { - if (cs->hwtemps[regnr].scalar_valid > pos) - pos = cs->hwtemps[regnr].scalar_valid; - } - } - } - - // Find a slot that fits - for (;; ++pos) { - if (cs->slot[pos].used & used & SLOT_OP_BOTH) - continue; - - if (pos >= cs->nrslots) { - if (cs->nrslots >= PFS_MAX_ALU_INST) { - ERROR("Out of ALU instruction slots\n"); - return -1; - } - - code->alu.inst[pos].inst0 = NOP_INST0; - code->alu.inst[pos].inst1 = NOP_INST1; - code->alu.inst[pos].inst2 = NOP_INST2; - code->alu.inst[pos].inst3 = NOP_INST3; - - cs->nrslots++; - } - // Note: When we need both parts (vector and scalar) of a source, - // we always try to put them into the same position. This makes the - // code easier to read, and it is optimal (i.e. one doesn't gain - // anything by splitting the parts). - // It also avoids headaches with swizzles that access both parts (i.e WXY) - tempused = cs->slot[pos].used; - for (i = 0; i < 3; ++i) { - tempvsrc[i] = cs->slot[pos].vsrc[i]; - tempssrc[i] = cs->slot[pos].ssrc[i]; - } - - for (i = 0; i < argc; ++i) { - int flags = (used >> i) & SLOT_SRC_BOTH; - - if (!flags) { - srcpos[i] = 0; - continue; - } - - for (j = 0; j < 3; ++j) { - if ((tempused >> j) & flags & SLOT_SRC_VECTOR) { - if (tempvsrc[j] != hwsrc[i]) - continue; - } - - if ((tempused >> j) & flags & SLOT_SRC_SCALAR) { - if (tempssrc[j] != hwsrc[i]) - continue; - } - - break; - } - - if (j == 3) - break; - - srcpos[i] = j; - tempused |= flags << j; - if (flags & SLOT_SRC_VECTOR) - tempvsrc[j] = hwsrc[i]; - if (flags & SLOT_SRC_SCALAR) - tempssrc[j] = hwsrc[i]; - } - - if (i == argc) - break; - } - - // Found a slot, reserve it - cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH); - for (i = 0; i < 3; ++i) { - cs->slot[pos].vsrc[i] = tempvsrc[i]; - cs->slot[pos].ssrc[i] = tempssrc[i]; - } - - for (i = 0; i < argc; ++i) { - if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { - int regnr = hwsrc[i] & 31; - - if (used & (SLOT_SRC_VECTOR << i)) { - if (cs->hwtemps[regnr].vector_lastread < pos) - cs->hwtemps[regnr].vector_lastread = - pos; - } - if (used & (SLOT_SRC_SCALAR << i)) { - if (cs->hwtemps[regnr].scalar_lastread < pos) - cs->hwtemps[regnr].scalar_lastread = - pos; - } - } - } - - // Emit the source fetch code - code->alu.inst[pos].inst1 &= ~R300_ALU_SRC_MASK; - code->alu.inst[pos].inst1 |= - ((cs->slot[pos].vsrc[0] << R300_ALU_SRC0C_SHIFT) | - (cs->slot[pos].vsrc[1] << R300_ALU_SRC1C_SHIFT) | - (cs->slot[pos].vsrc[2] << R300_ALU_SRC2C_SHIFT)); - - code->alu.inst[pos].inst3 &= ~R300_ALU_SRC_MASK; - code->alu.inst[pos].inst3 |= - ((cs->slot[pos].ssrc[0] << R300_ALU_SRC0A_SHIFT) | - (cs->slot[pos].ssrc[1] << R300_ALU_SRC1A_SHIFT) | - (cs->slot[pos].ssrc[2] << R300_ALU_SRC2A_SHIFT)); - - // Emit the argument selection code - if (emit_vop) { - int swz[3]; - - for (i = 0; i < 3; ++i) { - if (i < argc) { - swz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base + - (srcpos[i] * - v_swiz[REG_GET_VSWZ(src[i])]. - stride)) | ((src[i] & REG_NEGV_MASK) - ? ARG_NEG : 0) | ((src[i] - & - REG_ABS_MASK) - ? - ARG_ABS - : 0); - } else { - swz[i] = R300_ALU_ARGC_ZERO; - } - } - - code->alu.inst[pos].inst0 &= - ~(R300_ALU_ARG0C_MASK | R300_ALU_ARG1C_MASK | - R300_ALU_ARG2C_MASK); - code->alu.inst[pos].inst0 |= - (swz[0] << R300_ALU_ARG0C_SHIFT) | (swz[1] << - R300_ALU_ARG1C_SHIFT) - | (swz[2] << R300_ALU_ARG2C_SHIFT); - } - - if (emit_sop) { - int swz[3]; - - for (i = 0; i < 3; ++i) { - if (i < argc) { - swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + - (srcpos[i] * - s_swiz[REG_GET_SSWZ(src[i])]. - stride)) | ((src[i] & REG_NEGV_MASK) - ? ARG_NEG : 0) | ((src[i] - & - REG_ABS_MASK) - ? - ARG_ABS - : 0); - } else { - swz[i] = R300_ALU_ARGA_ZERO; - } - } - - code->alu.inst[pos].inst2 &= - ~(R300_ALU_ARG0A_MASK | R300_ALU_ARG1A_MASK | - R300_ALU_ARG2A_MASK); - code->alu.inst[pos].inst2 |= - (swz[0] << R300_ALU_ARG0A_SHIFT) | (swz[1] << - R300_ALU_ARG1A_SHIFT) - | (swz[2] << R300_ALU_ARG2A_SHIFT); - } - - return pos; -} - -/** - * Append an ALU instruction to the instruction list. + * \todo if/when r5xx supports the radeon_program architecture, this is a + * likely candidate for code sharing. */ -static void emit_arith(struct r300_pfs_compile_state *cs, - int op, - GLuint dest, - int mask, - GLuint src0, GLuint src1, GLuint src2, int flags) +static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) { - COMPILE_STATE; - GLuint src[3] = { src0, src1, src2 }; - int hwdest; - GLboolean emit_vop, emit_sop; - int vop, sop, argc; - int pos; - - vop = r300_fpop[op].v_op; - sop = r300_fpop[op].s_op; - argc = r300_fpop[op].argc; + GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT && - REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { - if (mask & WRITEMASK_Z) { - mask = WRITEMASK_W; - } else { - return; - } - } - - emit_vop = GL_FALSE; - emit_sop = GL_FALSE; - if ((mask & WRITEMASK_XYZ) || vop == R300_ALU_OUTC_DP3) - emit_vop = GL_TRUE; - if ((mask & WRITEMASK_W) || vop == R300_ALU_OUTC_REPL_ALPHA) - emit_sop = GL_TRUE; - - pos = - find_and_prepare_slot(cs, emit_vop, emit_sop, argc, src, dest, - mask); - if (pos < 0) + if (!(InputsRead & FRAG_BIT_WPOS)) return; - hwdest = t_hw_dst(cs, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ - - if (flags & PFS_FLAG_SAT) { - vop |= R300_ALU_OUTC_CLAMP; - sop |= R300_ALU_OUTA_CLAMP; - } - - /* Throw the pieces together and get ALU/1 */ - if (emit_vop) { - code->alu.inst[pos].inst0 |= vop; - - code->alu.inst[pos].inst1 |= hwdest << R300_ALU_DSTC_SHIFT; - - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - code->alu.inst[pos].inst1 |= - (mask & WRITEMASK_XYZ) << - R300_ALU_DSTC_OUTPUT_MASK_SHIFT; - } else - assert(0); - } else { - code->alu.inst[pos].inst1 |= - (mask & WRITEMASK_XYZ) << - R300_ALU_DSTC_REG_MASK_SHIFT; - - cs->hwtemps[hwdest].vector_valid = pos + 1; - } - } - - /* And now ALU/3 */ - if (emit_sop) { - code->alu.inst[pos].inst2 |= sop; - - if (mask & WRITEMASK_W) { - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { - code->alu.inst[pos].inst3 |= - (hwdest << R300_ALU_DSTA_SHIFT) | - R300_ALU_DSTA_OUTPUT; - } else if (REG_GET_INDEX(dest) == - FRAG_RESULT_DEPR) { - code->alu.inst[pos].inst3 |= - R300_ALU_DSTA_DEPTH; - } else - assert(0); - } else { - code->alu.inst[pos].inst3 |= - (hwdest << R300_ALU_DSTA_SHIFT) | - R300_ALU_DSTA_REG; - - cs->hwtemps[hwdest].scalar_valid = pos + 1; - } - } - } - - return; -} - -static GLfloat SinCosConsts[2][4] = { - { - 1.273239545, // 4/PI - -0.405284735, // -4/(PI*PI) - 3.141592654, // PI - 0.2225 // weight - }, - { - 0.75, - 0.0, - 0.159154943, // 1/(2*PI) - 6.283185307 // 2*PI - } -}; - -/** - * Emit a LIT instruction. - * \p flags may be PFS_FLAG_SAT - * - * Definition of LIT (from ARB_fragment_program): - * tmp = VectorLoad(op0); - * if (tmp.x < 0) tmp.x = 0; - * if (tmp.y < 0) tmp.y = 0; - * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); - * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; - * result.x = 1.0; - * result.y = tmp.x; - * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; - * result.w = 1.0; - * - * The longest path of computation is the one leading to result.z, - * consisting of 5 operations. This implementation of LIT takes - * 5 slots. So unless there's some special undocumented opcode, - * this implementation is potentially optimal. Unfortunately, - * emit_arith is a bit too conservative because it doesn't understand - * partial writes to the vector component. - */ -static const GLfloat LitConst[4] = - { 127.999999, 127.999999, 127.999999, -127.999999 }; - -static void emit_lit(struct r300_pfs_compile_state *cs, - GLuint dest, int mask, GLuint src, int flags) -{ - COMPILE_STATE; - GLuint cnst; - int needTemporary; - GLuint temp; - - cnst = emit_const4fv(cs, LitConst); - - needTemporary = 0; - if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { - needTemporary = 1; - } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - // LIT is typically followed by DP3/DP4, so there's no point - // in creating special code for this case - needTemporary = 1; - } - - if (needTemporary) { - temp = keep(get_temp_reg(cs)); - } else { - temp = keep(dest); - } - - // Note: The order of emit_arith inside the slots is relevant, - // because emit_arith only looks at scalar vs. vector when resolving - // dependencies, and it does not consider individual vector components, - // so swizzling between the two parts can create fake dependencies. - - // First slot - emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_XY, - keep(src), pfs_zero, undef, 0); - emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); - - // Second slot - emit_arith(cs, PFS_OP_MIN, temp, WRITEMASK_Z, - swizzle(temp, W, W, W, W), cnst, undef, 0); - emit_arith(cs, PFS_OP_LG2, temp, WRITEMASK_W, - swizzle(temp, Y, Y, Y, Y), undef, undef, 0); - - // Third slot - // If desired, we saturate the y result here. - // This does not affect the use as a condition variable in the CMP later - emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W, - temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); - emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_Y, - swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); - - // Fourth slot - emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_X, - pfs_one, pfs_one, pfs_zero, 0); - emit_arith(cs, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); - - // Fifth slot - emit_arith(cs, PFS_OP_CMP, temp, WRITEMASK_Z, - pfs_zero, swizzle(temp, W, W, W, W), - negate(swizzle(temp, Y, Y, Y, Y)), flags); - emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, - pfs_zero, 0); - - if (needTemporary) { - emit_arith(cs, PFS_OP_MAD, dest, mask, - temp, pfs_one, pfs_zero, flags); - free_temp(cs, temp); - } else { - // Decrease refcount of the destination - t_hw_dst(cs, dest, GL_FALSE, cs->nrslots); - } -} - -static GLboolean parse_program(struct r300_pfs_compile_state *cs) -{ - COMPILE_STATE; - struct gl_fragment_program *mp = &fp->mesa_program; - const struct prog_instruction *inst = mp->Base.Instructions; - struct prog_instruction *fpi; - GLuint src[3], dest, temp[2]; - int flags, mask = 0; - int const_sin[2]; - - if (!inst || inst[0].Opcode == OPCODE_END) { - ERROR("empty program?\n"); - return GL_FALSE; - } - - for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { - if (fpi->SaturateMode == SATURATE_ZERO_ONE) - flags = PFS_FLAG_SAT; - else - flags = 0; - - if (fpi->Opcode != OPCODE_KIL) { - dest = t_dst(cs, fpi->DstReg); - mask = fpi->DstReg.WriteMask; - } - - switch (fpi->Opcode) { - case OPCODE_ABS: - src[0] = t_src(cs, fpi->SrcReg[0]); - emit_arith(cs, PFS_OP_MAD, dest, mask, - absolute(src[0]), pfs_one, pfs_zero, flags); - break; - case OPCODE_ADD: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - emit_arith(cs, PFS_OP_MAD, dest, mask, - src[0], pfs_one, src[1], flags); - break; - case OPCODE_CMP: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - src[2] = t_src(cs, fpi->SrcReg[2]); - /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c - * r300 - if src2.c < 0.0 ? src1.c : src0.c - */ - emit_arith(cs, PFS_OP_CMP, dest, mask, - src[2], src[1], src[0], flags); - break; - case OPCODE_COS: - /* - * cos using a parabola (see SIN): - * cos(x): - * x = (x/(2*PI))+0.75 - * x = frac(x) - * x = (x*2*PI)-PI - * result = sin(x) - */ - temp[0] = get_temp_reg(cs); - const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); - const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); - src[0] = t_scalar_src(cs, fpi->SrcReg[0]); - - /* add 0.5*PI and do range reduction */ - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(src[0], X, X, X, X), - swizzle(const_sin[1], Z, Z, Z, Z), - swizzle(const_sin[1], X, X, X, X), 0); - - emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X, - swizzle(temp[0], X, X, X, X), - undef, undef, 0); - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI - negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI - 0); - - /* SIN */ - - emit_arith(cs, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], - Z, Z, Z, - Z), - const_sin[0], pfs_zero, 0); - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(temp[0], Y, Y, Y, Y), - absolute(swizzle(temp[0], Z, Z, Z, Z)), - swizzle(temp[0], X, X, X, X), 0); - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y, - swizzle(temp[0], X, X, X, X), - absolute(swizzle(temp[0], X, X, X, X)), - negate(swizzle(temp[0], X, X, X, X)), 0); - - emit_arith(cs, PFS_OP_MAD, dest, mask, - swizzle(temp[0], Y, Y, Y, Y), - swizzle(const_sin[0], W, W, W, W), - swizzle(temp[0], X, X, X, X), flags); - - free_temp(cs, temp[0]); - break; - case OPCODE_DP3: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - emit_arith(cs, PFS_OP_DP3, dest, mask, - src[0], src[1], undef, flags); - break; - case OPCODE_DP4: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - emit_arith(cs, PFS_OP_DP4, dest, mask, - src[0], src[1], undef, flags); - break; - case OPCODE_DPH: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - /* src0.xyz1 -> temp - * DP4 dest, temp, src1 - */ - emit_arith(cs, PFS_OP_DP4, dest, mask, - swizzle(src[0], X, Y, Z, ONE), src[1], - undef, flags); - break; - case OPCODE_DST: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - /* dest.y = src0.y * src1.y */ - if (mask & WRITEMASK_Y) - emit_arith(cs, PFS_OP_MAD, dest, WRITEMASK_Y, - keep(src[0]), keep(src[1]), - pfs_zero, flags); - /* dest.z = src0.z */ - if (mask & WRITEMASK_Z) - emit_arith(cs, PFS_OP_MAD, dest, WRITEMASK_Z, - src[0], pfs_one, pfs_zero, flags); - /* result.x = 1.0 - * result.w = src1.w */ - if (mask & WRITEMASK_XW) { - REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat */ - emit_arith(cs, PFS_OP_MAD, dest, - mask & WRITEMASK_XW, - src[1], pfs_one, pfs_zero, flags); - } - break; - case OPCODE_EX2: - src[0] = t_scalar_src(cs, fpi->SrcReg[0]); - emit_arith(cs, PFS_OP_EX2, dest, mask, - src[0], undef, undef, flags); - break; - case OPCODE_FLR: - src[0] = t_src(cs, fpi->SrcReg[0]); - temp[0] = get_temp_reg(cs); - /* FRC temp, src0 - * MAD dest, src0, 1.0, -temp - */ - emit_arith(cs, PFS_OP_FRC, temp[0], mask, - keep(src[0]), undef, undef, 0); - emit_arith(cs, PFS_OP_MAD, dest, mask, - src[0], pfs_one, negate(temp[0]), flags); - free_temp(cs, temp[0]); - break; - case OPCODE_FRC: - src[0] = t_src(cs, fpi->SrcReg[0]); - emit_arith(cs, PFS_OP_FRC, dest, mask, - src[0], undef, undef, flags); - break; - case OPCODE_KIL: - emit_tex(cs, fpi, R300_TEX_OP_KIL); - break; - case OPCODE_LG2: - src[0] = t_scalar_src(cs, fpi->SrcReg[0]); - emit_arith(cs, PFS_OP_LG2, dest, mask, - src[0], undef, undef, flags); - break; - case OPCODE_LIT: - src[0] = t_src(cs, fpi->SrcReg[0]); - emit_lit(cs, dest, mask, src[0], flags); - break; - case OPCODE_LRP: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - src[2] = t_src(cs, fpi->SrcReg[2]); - /* result = tmp0tmp1 + (1 - tmp0)tmp2 - * = tmp0tmp1 + tmp2 + (-tmp0)tmp2 - * MAD temp, -tmp0, tmp2, tmp2 - * MAD result, tmp0, tmp1, temp - */ - temp[0] = get_temp_reg(cs); - emit_arith(cs, PFS_OP_MAD, temp[0], mask, - negate(keep(src[0])), keep(src[2]), src[2], - 0); - emit_arith(cs, PFS_OP_MAD, dest, mask, - src[0], src[1], temp[0], flags); - free_temp(cs, temp[0]); - break; - case OPCODE_MAD: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - src[2] = t_src(cs, fpi->SrcReg[2]); - emit_arith(cs, PFS_OP_MAD, dest, mask, - src[0], src[1], src[2], flags); - break; - case OPCODE_MAX: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - emit_arith(cs, PFS_OP_MAX, dest, mask, - src[0], src[1], undef, flags); - break; - case OPCODE_MIN: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - emit_arith(cs, PFS_OP_MIN, dest, mask, - src[0], src[1], undef, flags); - break; - case OPCODE_MOV: - case OPCODE_SWZ: - src[0] = t_src(cs, fpi->SrcReg[0]); - emit_arith(cs, PFS_OP_MAD, dest, mask, - src[0], pfs_one, pfs_zero, flags); - break; - case OPCODE_MUL: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - emit_arith(cs, PFS_OP_MAD, dest, mask, - src[0], src[1], pfs_zero, flags); - break; - case OPCODE_POW: - src[0] = t_scalar_src(cs, fpi->SrcReg[0]); - src[1] = t_scalar_src(cs, fpi->SrcReg[1]); - temp[0] = get_temp_reg(cs); - emit_arith(cs, PFS_OP_LG2, temp[0], WRITEMASK_W, - src[0], undef, undef, 0); - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_W, - temp[0], src[1], pfs_zero, 0); - emit_arith(cs, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, - temp[0], undef, undef, 0); - free_temp(cs, temp[0]); - break; - case OPCODE_RCP: - src[0] = t_scalar_src(cs, fpi->SrcReg[0]); - emit_arith(cs, PFS_OP_RCP, dest, mask, - src[0], undef, undef, flags); - break; - case OPCODE_RSQ: - src[0] = t_scalar_src(cs, fpi->SrcReg[0]); - emit_arith(cs, PFS_OP_RSQ, dest, mask, - absolute(src[0]), pfs_zero, pfs_zero, flags); - break; - case OPCODE_SCS: - /* - * scs using a parabola : - * scs(x): - * result.x = sin(-abs(x)+0.5*PI) (cos) - * result.y = sin(x) (sin) - * - */ - temp[0] = get_temp_reg(cs); - temp[1] = get_temp_reg(cs); - const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); - const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); - src[0] = t_scalar_src(cs, fpi->SrcReg[0]); - - /* x = -abs(x)+0.5*PI */ - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI - pfs_half, - negate(abs - (swizzle(keep(src[0]), X, X, X, X))), - 0); - - /* C*x (sin) */ - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_W, - swizzle(const_sin[0], Y, Y, Y, Y), - swizzle(keep(src[0]), X, X, X, X), - pfs_zero, 0); - - /* B*x, C*x (cos) */ - emit_arith(cs, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], - Z, Z, Z, - Z), - const_sin[0], pfs_zero, 0); - - /* B*x (sin) */ - emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W, - swizzle(const_sin[0], X, X, X, X), - keep(src[0]), pfs_zero, 0); - - /* y = B*x + C*x*abs(x) (sin) */ - emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_Z, - absolute(src[0]), - swizzle(temp[0], W, W, W, W), - swizzle(temp[1], W, W, W, W), 0); - - /* y = B*x + C*x*abs(x) (cos) */ - emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W, - swizzle(temp[0], Y, Y, Y, Y), - absolute(swizzle(temp[0], Z, Z, Z, Z)), - swizzle(temp[0], X, X, X, X), 0); - - /* y*abs(y) - y (cos), y*abs(y) - y (sin) */ - emit_arith(cs, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1], - W, Z, Y, - X), - absolute(swizzle(temp[1], W, Z, Y, X)), - negate(swizzle(temp[1], W, Z, Y, X)), 0); - - /* dest.xy = mad(temp.xy, P, temp2.wz) */ - emit_arith(cs, PFS_OP_MAD, dest, - mask & (WRITEMASK_X | WRITEMASK_Y), temp[0], - swizzle(const_sin[0], W, W, W, W), - swizzle(temp[1], W, Z, Y, X), flags); - - free_temp(cs, temp[0]); - free_temp(cs, temp[1]); - break; - case OPCODE_SGE: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - temp[0] = get_temp_reg(cs); - /* temp = src0 - src1 - * dest.c = (temp.c < 0.0) ? 0 : 1 - */ - emit_arith(cs, PFS_OP_MAD, temp[0], mask, - src[0], pfs_one, negate(src[1]), 0); - emit_arith(cs, PFS_OP_CMP, dest, mask, - pfs_one, pfs_zero, temp[0], 0); - free_temp(cs, temp[0]); - break; - case OPCODE_SIN: - /* - * using a parabola: - * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x) - * extra precision is obtained by weighting against - * itself squared. - */ - - temp[0] = get_temp_reg(cs); - const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); - const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); - src[0] = t_scalar_src(cs, fpi->SrcReg[0]); - - /* do range reduction */ - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(keep(src[0]), X, X, X, X), - swizzle(const_sin[1], Z, Z, Z, Z), - pfs_half, 0); - - emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X, - swizzle(temp[0], X, X, X, X), - undef, undef, 0); - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI - negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI - 0); - - /* SIN */ - - emit_arith(cs, PFS_OP_MAD, temp[0], - WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], - Z, Z, Z, - Z), - const_sin[0], pfs_zero, 0); - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, - swizzle(temp[0], Y, Y, Y, Y), - absolute(swizzle(temp[0], Z, Z, Z, Z)), - swizzle(temp[0], X, X, X, X), 0); - - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y, - swizzle(temp[0], X, X, X, X), - absolute(swizzle(temp[0], X, X, X, X)), - negate(swizzle(temp[0], X, X, X, X)), 0); - - emit_arith(cs, PFS_OP_MAD, dest, mask, - swizzle(temp[0], Y, Y, Y, Y), - swizzle(const_sin[0], W, W, W, W), - swizzle(temp[0], X, X, X, X), flags); - - free_temp(cs, temp[0]); - break; - case OPCODE_SLT: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - temp[0] = get_temp_reg(cs); - /* temp = src0 - src1 - * dest.c = (temp.c < 0.0) ? 1 : 0 - */ - emit_arith(cs, PFS_OP_MAD, temp[0], mask, - src[0], pfs_one, negate(src[1]), 0); - emit_arith(cs, PFS_OP_CMP, dest, mask, - pfs_zero, pfs_one, temp[0], 0); - free_temp(cs, temp[0]); - break; - case OPCODE_SUB: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - emit_arith(cs, PFS_OP_MAD, dest, mask, - src[0], pfs_one, negate(src[1]), flags); - break; - case OPCODE_TEX: - emit_tex(cs, fpi, R300_TEX_OP_LD); - break; - case OPCODE_TXB: - emit_tex(cs, fpi, R300_TEX_OP_TXB); - break; - case OPCODE_TXP: - emit_tex(cs, fpi, R300_TEX_OP_TXP); - break; - case OPCODE_XPD:{ - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - temp[0] = get_temp_reg(cs); - /* temp = src0.zxy * src1.yzx */ - emit_arith(cs, PFS_OP_MAD, temp[0], - WRITEMASK_XYZ, swizzle(keep(src[0]), - Z, X, Y, W), - swizzle(keep(src[1]), Y, Z, X, W), - pfs_zero, 0); - /* dest.xyz = src0.yzx * src1.zxy - temp - * dest.w = undefined - * */ - emit_arith(cs, PFS_OP_MAD, dest, - mask & WRITEMASK_XYZ, swizzle(src[0], - Y, Z, - X, W), - swizzle(src[1], Z, X, Y, W), - negate(temp[0]), flags); - /* cleanup */ - free_temp(cs, temp[0]); - break; - } - default: - ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); - break; - } - - if (fp->error) - return GL_FALSE; - - } - - return GL_TRUE; -} - -static void insert_wpos(struct gl_program *prog) -{ static gl_state_index tokens[STATE_LENGTH] = { STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 }; struct prog_instruction *fpi; GLuint window_index; int i = 0; - GLuint tempregi = prog->NumTemporaries; - /* should do something else if no temps left... */ - prog->NumTemporaries++; + GLuint tempregi = radeonCompilerAllocateTemporary(&compiler->compiler); - fpi = _mesa_alloc_instructions(prog->NumInstructions + 3); - _mesa_init_instructions(fpi, prog->NumInstructions + 3); + fpi = radeonClauseInsertInstructions(&compiler->compiler, &compiler->compiler.Clauses[0], 0, 3); /* perspective divide */ fpi[i].Opcode = OPCODE_RCP; @@ -2121,7 +118,7 @@ static void insert_wpos(struct gl_program *prog) i++; /* viewport transformation */ - window_index = _mesa_add_state_reference(prog->Parameters, tokens); + window_index = _mesa_add_state_reference(compiler->fp->mesa_program.Base.Parameters, tokens); fpi[i].Opcode = OPCODE_MAD; @@ -2146,193 +143,42 @@ static void insert_wpos(struct gl_program *prog) MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); i++; - _mesa_copy_instructions(&fpi[i], prog->Instructions, - prog->NumInstructions); - - free(prog->Instructions); - - prog->Instructions = fpi; - - prog->NumInstructions += i; - fpi = &prog->Instructions[prog->NumInstructions - 1]; - - assert(fpi->Opcode == OPCODE_END); - - for (fpi = &prog->Instructions[3]; fpi->Opcode != OPCODE_END; fpi++) { - for (i = 0; i < 3; i++) - if (fpi->SrcReg[i].File == PROGRAM_INPUT && - fpi->SrcReg[i].Index == FRAG_ATTRIB_WPOS) { - fpi->SrcReg[i].File = PROGRAM_TEMPORARY; - fpi->SrcReg[i].Index = tempregi; - } - } -} - -/* - Init structures - * - Determine what hwregs each input corresponds to - */ -static void init_program(struct r300_pfs_compile_state *cs) -{ - COMPILE_STATE; - struct gl_fragment_program *mp = &fp->mesa_program; - struct prog_instruction *fpi; - GLuint InputsRead = mp->Base.InputsRead; - GLuint temps_used = 0; /* for fp->temps[] */ - int i, j; - - /* New compile, reset tracking data */ - fp->optimization = - driQueryOptioni(&cs->r300->radeon.optionCache, "fp_optimization"); - fp->translated = GL_FALSE; - fp->error = GL_FALSE; - fp->WritesDepth = GL_FALSE; - code->tex.length = 0; - code->cur_node = 0; - code->first_node_has_tex = 0; - code->const_nr = 0; - code->max_temp_idx = 0; - code->node[0].alu_end = -1; - code->node[0].tex_end = -1; - - for (i = 0; i < PFS_MAX_ALU_INST; i++) { - for (j = 0; j < 3; j++) { - cs->slot[i].vsrc[j] = SRC_CONST; - cs->slot[i].ssrc[j] = SRC_CONST; - } - } - - /* Work out what temps the Mesa inputs correspond to, this must match - * what setup_rs_unit does, which shouldn't be a problem as rs_unit - * configures itself based on the fragprog's InputsRead - * - * NOTE: this depends on get_hw_temp() allocating registers in order, - * starting from register 0. - */ - - /* Texcoords come first */ - for (i = 0; i < cs->r300->radeon.glCtx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; - cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = - get_hw_temp(cs, 0); - } - } - InputsRead &= ~FRAG_BITS_TEX_ANY; - - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) { - cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; - cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(cs, 0); - insert_wpos(&mp->Base); - } - InputsRead &= ~FRAG_BIT_WPOS; - - /* Then primary colour */ - if (InputsRead & FRAG_BIT_COL0) { - cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(cs, 0); - } - InputsRead &= ~FRAG_BIT_COL0; - - /* Secondary color */ - if (InputsRead & FRAG_BIT_COL1) { - cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(cs, 0); - } - InputsRead &= ~FRAG_BIT_COL1; - - /* Anything else */ - if (InputsRead) { - WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); - /* force read from hwreg 0 for now */ - for (i = 0; i < 32; i++) - if (InputsRead & (1 << i)) - cs->inputs[i].reg = 0; - } - - /* Pre-parse the mesa program, grabbing refcounts on input/temp regs. - * That way, we can free up the reg when it's no longer needed - */ - if (!mp->Base.Instructions) { - ERROR("No instructions found in program\n"); - return; - } - - for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { - int idx; - - for (i = 0; i < 3; i++) { - idx = fpi->SrcReg[i].Index; - switch (fpi->SrcReg[i].File) { - case PROGRAM_TEMPORARY: - if (!(temps_used & (1 << idx))) { - cs->temps[idx].reg = -1; - cs->temps[idx].refcount = 1; - temps_used |= (1 << idx); - } else - cs->temps[idx].refcount++; - break; - case PROGRAM_INPUT: - cs->inputs[idx].refcount++; - break; - default: - break; + for (; i < compiler->compiler.Clauses[0].NumInstructions; ++i) { + int reg; + for (reg = 0; reg < 3; reg++) { + if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && + fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { + fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[reg].Index = tempregi; } } - - idx = fpi->DstReg.Index; - if (fpi->DstReg.File == PROGRAM_TEMPORARY) { - if (!(temps_used & (1 << idx))) { - cs->temps[idx].reg = -1; - cs->temps[idx].refcount = 1; - temps_used |= (1 << idx); - } else - cs->temps[idx].refcount++; - } } - cs->temp_in_use = temps_used; } -static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp) -{ - struct gl_fragment_program *mp = &fp->mesa_program; - - /* Ask Mesa nicely to fill in ParameterValues for us */ - if (mp->Base.Parameters) - _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); -} void r300TranslateFragmentShader(r300ContextPtr r300, struct r300_fragment_program *fp) { if (!fp->translated) { - struct r300_pfs_compile_state cs; + struct r300_fragment_program_compiler compiler; - _mesa_memset(&cs, 0, sizeof(cs)); - cs.r300 = r300; - cs.fp = fp; - init_program(&cs); + compiler.r300 = r300; + compiler.fp = fp; + compiler.code = &fp->code; - if (parse_program(&cs) == GL_FALSE) { - dump_program(fp, &fp->code); - return; - } + radeonCompilerInit(&compiler.compiler, r300->radeon.glCtx, &fp->mesa_program.Base); + + insert_WPOS_trailer(&compiler); + + if (!r300FragmentProgramEmit(&compiler)) + fp->error = GL_TRUE; - /* Finish off */ - fp->code.node[fp->code.cur_node].alu_end = - cs.nrslots - fp->code.node[fp->code.cur_node].alu_offset - 1; - if (fp->code.node[fp->code.cur_node].tex_end < 0) - fp->code.node[fp->code.cur_node].tex_end = 0; - fp->code.alu_offset = 0; - fp->code.alu_end = cs.nrslots - 1; - fp->code.tex_offset = 0; - fp->code.tex_end = fp->code.tex.length ? fp->code.tex.length - 1 : 0; - assert(fp->code.node[fp->code.cur_node].alu_end >= 0); - assert(fp->code.alu_end >= 0); + radeonCompilerCleanup(&compiler.compiler); - fp->translated = GL_TRUE; - if (RADEON_DEBUG & DEBUG_PIXEL) - dump_program(fp, &fp->code); + if (!fp->error) + fp->translated = GL_TRUE; + if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL)) + r300FragmentProgramDump(fp, &fp->code); r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); } @@ -2340,8 +186,9 @@ void r300TranslateFragmentShader(r300ContextPtr r300, } /* just some random things... */ -static void dump_program(struct r300_fragment_program *fp, - struct r300_fragment_program_code *code) +void r300FragmentProgramDump( + struct r300_fragment_program *fp, + struct r300_fragment_program_code *code) { int n, i, j; static int pc = 0; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index 561d7c6423..8c836c4bda 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -40,6 +40,7 @@ #include "shader/prog_instruction.h" #include "r300_context.h" +#include "radeon_program.h" /* supported hw opcodes */ #define PFS_OP_MAD 0 @@ -136,4 +137,23 @@ struct r300_fragment_program; extern void r300TranslateFragmentShader(r300ContextPtr r300, struct r300_fragment_program *fp); + +/** + * Used internally by the r300 fragment program code to store compile-time + * only data. + */ +struct r300_fragment_program_compiler { + r300ContextPtr r300; + struct r300_fragment_program *fp; + struct r300_fragment_program_code *code; + struct radeon_compiler compiler; +}; + +extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); + + +extern void r300FragmentProgramDump( + struct r300_fragment_program *fp, + struct r300_fragment_program_code *code); + #endif diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c new file mode 100644 index 0000000000..fe8a347a62 --- /dev/null +++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c @@ -0,0 +1,2232 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * Emit the r300_fragment_program_code that can be understood by the hardware. + * Input is a pre-transformed radeon_program. + * + * \author Ben Skeggs + * + * \author Jerome Glisse + * + * \todo FogOption + * + * \todo Verify results of opcodes for accuracy, I've only checked them in + * specific cases. + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "r300_context.h" +#include "r300_fragprog.h" +#include "r300_reg.h" +#include "r300_state.h" + +/* Mapping Mesa registers to R300 temporaries */ +struct reg_acc { + int reg; /* Assigned hw temp */ + unsigned int refcount; /* Number of uses by mesa program */ +}; + +/** + * Describe the current lifetime information for an R300 temporary + */ +struct reg_lifetime { + /* Index of the first slot where this register is free in the sense + that it can be used as a new destination register. + This is -1 if the register has been assigned to a Mesa register + and the last access to the register has not yet been emitted */ + int free; + + /* Index of the first slot where this register is currently reserved. + This is used to stop e.g. a scalar operation from being moved + before the allocation time of a register that was first allocated + for a vector operation. */ + int reserved; + + /* Index of the first slot in which the register can be used as a + source without losing the value that is written by the last + emitted instruction that writes to the register */ + int vector_valid; + int scalar_valid; + + /* Index to the slot where the register was last read. + This is also the first slot in which the register may be written again */ + int vector_lastread; + int scalar_lastread; +}; + +/** + * Store usage information about an ALU instruction slot during the + * compilation of a fragment program. + */ +#define SLOT_SRC_VECTOR (1<<0) +#define SLOT_SRC_SCALAR (1<<3) +#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) +#define SLOT_OP_VECTOR (1<<16) +#define SLOT_OP_SCALAR (1<<17) +#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) + +struct r300_pfs_compile_slot { + /* Bitmask indicating which parts of the slot are used, using SLOT_ constants + defined above */ + unsigned int used; + + /* Selected sources */ + int vsrc[3]; + int ssrc[3]; +}; + +/** + * Store information during compilation of fragment programs. + */ +struct r300_pfs_compile_state { + struct r300_fragment_program_compiler *compiler; + + int nrslots; /* number of ALU slots used so far */ + + /* Track which (parts of) slots are already filled with instructions */ + struct r300_pfs_compile_slot slot[PFS_MAX_ALU_INST]; + + /* Track the validity of R300 temporaries */ + struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; + + /* Used to map Mesa's inputs/temps onto hardware temps */ + int temp_in_use; + struct reg_acc temps[PFS_NUM_TEMP_REGS]; + struct reg_acc inputs[32]; /* don't actually need 32... */ + + /* Track usage of hardware temps, for register allocation, + * indirection detection, etc. */ + GLuint used_in_node; + GLuint dest_in_node; +}; + + +/* + * Usefull macros and values + */ +#define ERROR(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + fp->error = GL_TRUE; \ + } while(0) + +#define PFS_INVAL 0xFFFFFFFF +#define COMPILE_STATE \ + struct r300_fragment_program *fp = cs->compiler->fp; \ + struct r300_fragment_program_code *code = cs->compiler->code; \ + (void)code; (void)fp + +#define SWIZZLE_XYZ 0 +#define SWIZZLE_XXX 1 +#define SWIZZLE_YYY 2 +#define SWIZZLE_ZZZ 3 +#define SWIZZLE_WWW 4 +#define SWIZZLE_YZX 5 +#define SWIZZLE_ZXY 6 +#define SWIZZLE_WZY 7 +#define SWIZZLE_111 8 +#define SWIZZLE_000 9 +#define SWIZZLE_HHH 10 + +#define swizzle(r, x, y, z, w) do_swizzle(cs, r, \ + ((SWIZZLE_##x<<0)| \ + (SWIZZLE_##y<<3)| \ + (SWIZZLE_##z<<6)| \ + (SWIZZLE_##w<<9)), \ + 0) + +#define REG_TYPE_INPUT 0 +#define REG_TYPE_OUTPUT 1 +#define REG_TYPE_TEMP 2 +#define REG_TYPE_CONST 3 + +#define REG_TYPE_SHIFT 0 +#define REG_INDEX_SHIFT 2 +#define REG_VSWZ_SHIFT 8 +#define REG_SSWZ_SHIFT 13 +#define REG_NEGV_SHIFT 18 +#define REG_NEGS_SHIFT 19 +#define REG_ABS_SHIFT 20 +#define REG_NO_USE_SHIFT 21 // Hack for refcounting +#define REG_VALID_SHIFT 22 // Does the register contain a defined value? +#define REG_BUILTIN_SHIFT 23 // Is it a builtin (like all zero/all one)? + +#define REG_TYPE_MASK (0x03 << REG_TYPE_SHIFT) +#define REG_INDEX_MASK (0x3F << REG_INDEX_SHIFT) +#define REG_VSWZ_MASK (0x1F << REG_VSWZ_SHIFT) +#define REG_SSWZ_MASK (0x1F << REG_SSWZ_SHIFT) +#define REG_NEGV_MASK (0x01 << REG_NEGV_SHIFT) +#define REG_NEGS_MASK (0x01 << REG_NEGS_SHIFT) +#define REG_ABS_MASK (0x01 << REG_ABS_SHIFT) +#define REG_NO_USE_MASK (0x01 << REG_NO_USE_SHIFT) +#define REG_VALID_MASK (0x01 << REG_VALID_SHIFT) +#define REG_BUILTIN_MASK (0x01 << REG_BUILTIN_SHIFT) + +#define REG(type, index, vswz, sswz, nouse, valid, builtin) \ + (((type << REG_TYPE_SHIFT) & REG_TYPE_MASK) | \ + ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK) | \ + ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK) | \ + ((valid << REG_VALID_SHIFT) & REG_VALID_MASK) | \ + ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK) | \ + ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK) | \ + ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) +#define REG_GET_TYPE(reg) \ + ((reg & REG_TYPE_MASK) >> REG_TYPE_SHIFT) +#define REG_GET_INDEX(reg) \ + ((reg & REG_INDEX_MASK) >> REG_INDEX_SHIFT) +#define REG_GET_VSWZ(reg) \ + ((reg & REG_VSWZ_MASK) >> REG_VSWZ_SHIFT) +#define REG_GET_SSWZ(reg) \ + ((reg & REG_SSWZ_MASK) >> REG_SSWZ_SHIFT) +#define REG_GET_NO_USE(reg) \ + ((reg & REG_NO_USE_MASK) >> REG_NO_USE_SHIFT) +#define REG_GET_VALID(reg) \ + ((reg & REG_VALID_MASK) >> REG_VALID_SHIFT) +#define REG_GET_BUILTIN(reg) \ + ((reg & REG_BUILTIN_MASK) >> REG_BUILTIN_SHIFT) +#define REG_SET_TYPE(reg, type) \ + reg = ((reg & ~REG_TYPE_MASK) | \ + ((type << REG_TYPE_SHIFT) & REG_TYPE_MASK)) +#define REG_SET_INDEX(reg, index) \ + reg = ((reg & ~REG_INDEX_MASK) | \ + ((index << REG_INDEX_SHIFT) & REG_INDEX_MASK)) +#define REG_SET_VSWZ(reg, vswz) \ + reg = ((reg & ~REG_VSWZ_MASK) | \ + ((vswz << REG_VSWZ_SHIFT) & REG_VSWZ_MASK)) +#define REG_SET_SSWZ(reg, sswz) \ + reg = ((reg & ~REG_SSWZ_MASK) | \ + ((sswz << REG_SSWZ_SHIFT) & REG_SSWZ_MASK)) +#define REG_SET_NO_USE(reg, nouse) \ + reg = ((reg & ~REG_NO_USE_MASK) | \ + ((nouse << REG_NO_USE_SHIFT) & REG_NO_USE_MASK)) +#define REG_SET_VALID(reg, valid) \ + reg = ((reg & ~REG_VALID_MASK) | \ + ((valid << REG_VALID_SHIFT) & REG_VALID_MASK)) +#define REG_SET_BUILTIN(reg, builtin) \ + reg = ((reg & ~REG_BUILTIN_MASK) | \ + ((builtin << REG_BUILTIN_SHIFT) & REG_BUILTIN_MASK)) +#define REG_ABS(reg) \ + reg = (reg | REG_ABS_MASK) +#define REG_NEGV(reg) \ + reg = (reg | REG_NEGV_MASK) +#define REG_NEGS(reg) \ + reg = (reg | REG_NEGS_MASK) + +#define NOP_INST0 ( \ + (R300_ALU_OUTC_MAD) | \ + (R300_ALU_ARGC_ZERO << R300_ALU_ARG0C_SHIFT) | \ + (R300_ALU_ARGC_ZERO << R300_ALU_ARG1C_SHIFT) | \ + (R300_ALU_ARGC_ZERO << R300_ALU_ARG2C_SHIFT)) +#define NOP_INST1 ( \ + ((0 | SRC_CONST) << R300_ALU_SRC0C_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC1C_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC2C_SHIFT)) +#define NOP_INST2 ( \ + (R300_ALU_OUTA_MAD) | \ + (R300_ALU_ARGA_ZERO << R300_ALU_ARG0A_SHIFT) | \ + (R300_ALU_ARGA_ZERO << R300_ALU_ARG1A_SHIFT) | \ + (R300_ALU_ARGA_ZERO << R300_ALU_ARG2A_SHIFT)) +#define NOP_INST3 ( \ + ((0 | SRC_CONST) << R300_ALU_SRC0A_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC1A_SHIFT) | \ + ((0 | SRC_CONST) << R300_ALU_SRC2A_SHIFT)) + + +/* + * Datas structures for fragment program generation + */ + +/* description of r300 native hw instructions */ +static const struct { + const char *name; + int argc; + int v_op; + int s_op; +} r300_fpop[] = { + /* *INDENT-OFF* */ + {"MAD", 3, R300_ALU_OUTC_MAD, R300_ALU_OUTA_MAD}, + {"DP3", 2, R300_ALU_OUTC_DP3, R300_ALU_OUTA_DP4}, + {"DP4", 2, R300_ALU_OUTC_DP4, R300_ALU_OUTA_DP4}, + {"MIN", 2, R300_ALU_OUTC_MIN, R300_ALU_OUTA_MIN}, + {"MAX", 2, R300_ALU_OUTC_MAX, R300_ALU_OUTA_MAX}, + {"CMP", 3, R300_ALU_OUTC_CMP, R300_ALU_OUTA_CMP}, + {"FRC", 1, R300_ALU_OUTC_FRC, R300_ALU_OUTA_FRC}, + {"EX2", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_EX2}, + {"LG2", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_LG2}, + {"RCP", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_RCP}, + {"RSQ", 1, R300_ALU_OUTC_REPL_ALPHA, R300_ALU_OUTA_RSQ}, + {"REPL_ALPHA", 1, R300_ALU_OUTC_REPL_ALPHA, PFS_INVAL}, + {"CMPH", 3, R300_ALU_OUTC_CMPH, PFS_INVAL}, + /* *INDENT-ON* */ +}; + +/* vector swizzles r300 can support natively, with a couple of + * cases we handle specially + * + * REG_VSWZ/REG_SSWZ is an index into this table + */ + +/* mapping from SWIZZLE_* to r300 native values for scalar insns */ +#define SWIZZLE_HALF 6 + +#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, \ + SWIZZLE_##y, \ + SWIZZLE_##z, \ + SWIZZLE_ZERO)) +/* native swizzles */ +static const struct r300_pfs_swizzle { + GLuint hash; /* swizzle value this matches */ + GLuint base; /* base value for hw swizzle */ + GLuint stride; /* difference in base between arg0/1/2 */ + GLuint flags; +} v_swiz[] = { + /* *INDENT-OFF* */ + {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1, SLOT_SRC_SCALAR}, + {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1, SLOT_SRC_VECTOR}, + {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1, SLOT_SRC_BOTH}, + {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0, 0}, + {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0, 0}, + {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0, 0}, + {PFS_INVAL, 0, 0, 0}, + /* *INDENT-ON* */ +}; + +/* used during matching of non-native swizzles */ +#define SWZ_X_MASK (7 << 0) +#define SWZ_Y_MASK (7 << 3) +#define SWZ_Z_MASK (7 << 6) +#define SWZ_W_MASK (7 << 9) +static const struct { + GLuint hash; /* used to mask matching swizzle components */ + int mask; /* actual outmask */ + int count; /* count of components matched */ +} s_mask[] = { + /* *INDENT-OFF* */ + {SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK, 1 | 2 | 4, 3}, + {SWZ_X_MASK | SWZ_Y_MASK, 1 | 2, 2}, + {SWZ_X_MASK | SWZ_Z_MASK, 1 | 4, 2}, + {SWZ_Y_MASK | SWZ_Z_MASK, 2 | 4, 2}, + {SWZ_X_MASK, 1, 1}, + {SWZ_Y_MASK, 2, 1}, + {SWZ_Z_MASK, 4, 1}, + {PFS_INVAL, PFS_INVAL, PFS_INVAL} + /* *INDENT-ON* */ +}; + +static const struct { + int base; /* hw value of swizzle */ + int stride; /* difference between SRC0/1/2 */ + GLuint flags; +} s_swiz[] = { + /* *INDENT-OFF* */ + {R300_ALU_ARGA_SRC0C_X, 3, SLOT_SRC_VECTOR}, + {R300_ALU_ARGA_SRC0C_Y, 3, SLOT_SRC_VECTOR}, + {R300_ALU_ARGA_SRC0C_Z, 3, SLOT_SRC_VECTOR}, + {R300_ALU_ARGA_SRC0A, 1, SLOT_SRC_SCALAR}, + {R300_ALU_ARGA_ZERO, 0, 0}, + {R300_ALU_ARGA_ONE, 0, 0}, + {R300_ALU_ARGA_HALF, 0, 0} + /* *INDENT-ON* */ +}; + +/* boiler-plate reg, for convenience */ +static const GLuint undef = REG(REG_TYPE_TEMP, + 0, + SWIZZLE_XYZ, + SWIZZLE_W, + GL_FALSE, + GL_FALSE, + GL_FALSE); + +/* constant one source */ +static const GLuint pfs_one = REG(REG_TYPE_CONST, + 0, + SWIZZLE_111, + SWIZZLE_ONE, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* constant half source */ +static const GLuint pfs_half = REG(REG_TYPE_CONST, + 0, + SWIZZLE_HHH, + SWIZZLE_HALF, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* constant zero source */ +static const GLuint pfs_zero = REG(REG_TYPE_CONST, + 0, + SWIZZLE_000, + SWIZZLE_ZERO, + GL_FALSE, + GL_TRUE, + GL_TRUE); + +/* + * Common functions prototypes + */ +static void emit_arith(struct r300_pfs_compile_state *cs, int op, + GLuint dest, int mask, + GLuint src0, GLuint src1, GLuint src2, int flags); + +/** + * Get an R300 temporary that can be written to in the given slot. + */ +static int get_hw_temp(struct r300_pfs_compile_state *cs, int slot) +{ + COMPILE_STATE; + int r; + + for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { + if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= slot) + break; + } + + if (r >= PFS_NUM_TEMP_REGS) { + ERROR("Out of hardware temps\n"); + return 0; + } + // Reserved is used to avoid the following scenario: + // R300 temporary X is first assigned to Mesa temporary Y during vector ops + // R300 temporary X is then assigned to Mesa temporary Z for further vector ops + // Then scalar ops on Mesa temporary Z are emitted and move back in time + // to overwrite the value of temporary Y. + // End scenario. + cs->hwtemps[r].reserved = cs->hwtemps[r].free; + cs->hwtemps[r].free = -1; + + // Reset to some value that won't mess things up when the user + // tries to read from a temporary that hasn't been assigned a value yet. + // In the normal case, vector_valid and scalar_valid should be set to + // a sane value by the first emit that writes to this temporary. + cs->hwtemps[r].vector_valid = 0; + cs->hwtemps[r].scalar_valid = 0; + + if (r > code->max_temp_idx) + code->max_temp_idx = r; + + return r; +} + +/** + * Get an R300 temporary that will act as a TEX destination register. + */ +static int get_hw_temp_tex(struct r300_pfs_compile_state *cs) +{ + COMPILE_STATE; + int r; + + for (r = 0; r < PFS_NUM_TEMP_REGS; ++r) { + if (cs->used_in_node & (1 << r)) + continue; + + // Note: Be very careful here + if (cs->hwtemps[r].free >= 0 && cs->hwtemps[r].free <= 0) + break; + } + + if (r >= PFS_NUM_TEMP_REGS) + return get_hw_temp(cs, 0); /* Will cause an indirection */ + + cs->hwtemps[r].reserved = cs->hwtemps[r].free; + cs->hwtemps[r].free = -1; + + // Reset to some value that won't mess things up when the user + // tries to read from a temporary that hasn't been assigned a value yet. + // In the normal case, vector_valid and scalar_valid should be set to + // a sane value by the first emit that writes to this temporary. + cs->hwtemps[r].vector_valid = cs->nrslots; + cs->hwtemps[r].scalar_valid = cs->nrslots; + + if (r > code->max_temp_idx) + code->max_temp_idx = r; + + return r; +} + +/** + * Mark the given hardware register as free. + */ +static void free_hw_temp(struct r300_pfs_compile_state *cs, int idx) +{ + // Be very careful here. Consider sequences like + // MAD r0, r1,r2,r3 + // TEX r4, ... + // The TEX instruction may be moved in front of the MAD instruction + // due to the way nodes work. We don't want to alias r1 and r4 in + // this case. + // I'm certain the register allocation could be further sanitized, + // but it's tricky because of stuff that can happen inside emit_tex + // and emit_arith. + cs->hwtemps[idx].free = cs->nrslots + 1; +} + +/** + * Create a new Mesa temporary register. + */ +static GLuint get_temp_reg(struct r300_pfs_compile_state *cs) +{ + COMPILE_STATE; + GLuint r = undef; + GLuint index; + + index = ffs(~cs->temp_in_use); + if (!index) { + ERROR("Out of program temps\n"); + return r; + } + + cs->temp_in_use |= (1 << --index); + cs->temps[index].refcount = 0xFFFFFFFF; + cs->temps[index].reg = -1; + + REG_SET_TYPE(r, REG_TYPE_TEMP); + REG_SET_INDEX(r, index); + REG_SET_VALID(r, GL_TRUE); + return r; +} + +/** + * Create a new Mesa temporary register that will act as the destination + * register for a texture read. + */ +static GLuint get_temp_reg_tex(struct r300_pfs_compile_state *cs) +{ + COMPILE_STATE; + GLuint r = undef; + GLuint index; + + index = ffs(~cs->temp_in_use); + if (!index) { + ERROR("Out of program temps\n"); + return r; + } + + cs->temp_in_use |= (1 << --index); + cs->temps[index].refcount = 0xFFFFFFFF; + cs->temps[index].reg = get_hw_temp_tex(cs); + + REG_SET_TYPE(r, REG_TYPE_TEMP); + REG_SET_INDEX(r, index); + REG_SET_VALID(r, GL_TRUE); + return r; +} + +/** + * Free a Mesa temporary and the associated R300 temporary. + */ +static void free_temp(struct r300_pfs_compile_state *cs, GLuint r) +{ + GLuint index = REG_GET_INDEX(r); + + if (!(cs->temp_in_use & (1 << index))) + return; + + if (REG_GET_TYPE(r) == REG_TYPE_TEMP) { + free_hw_temp(cs, cs->temps[index].reg); + cs->temps[index].reg = -1; + cs->temp_in_use &= ~(1 << index); + } else if (REG_GET_TYPE(r) == REG_TYPE_INPUT) { + free_hw_temp(cs, cs->inputs[index].reg); + cs->inputs[index].reg = -1; + } +} + +/** + * Emit a hardware constant/parameter. + * + * \p cp Stable pointer to an array of 4 floats. + * The pointer must be stable in the sense that it remains to be valid + * and hold the contents of the constant/parameter throughout the lifetime + * of the fragment program (actually, up until the next time the fragment + * program is translated). + */ +static GLuint emit_const4fv(struct r300_pfs_compile_state *cs, + const GLfloat * cp) +{ + COMPILE_STATE; + GLuint reg = undef; + int index; + + for (index = 0; index < code->const_nr; ++index) { + if (code->constant[index] == cp) + break; + } + + if (index >= code->const_nr) { + if (index >= PFS_NUM_CONST_REGS) { + ERROR("Out of hw constants!\n"); + return reg; + } + + code->const_nr++; + code->constant[index] = cp; + } + + REG_SET_TYPE(reg, REG_TYPE_CONST); + REG_SET_INDEX(reg, index); + REG_SET_VALID(reg, GL_TRUE); + return reg; +} + +static inline GLuint negate(GLuint r) +{ + REG_NEGS(r); + REG_NEGV(r); + return r; +} + +/* Hack, to prevent clobbering sources used multiple times when + * emulating non-native instructions + */ +static inline GLuint keep(GLuint r) +{ + REG_SET_NO_USE(r, GL_TRUE); + return r; +} + +static inline GLuint absolute(GLuint r) +{ + REG_ABS(r); + return r; +} + +static int swz_native(struct r300_pfs_compile_state *cs, + GLuint src, GLuint * r, GLuint arbneg) +{ + COMPILE_STATE; + + /* Native swizzle, handle negation */ + src = (src & ~REG_NEGS_MASK) | (((arbneg >> 3) & 1) << REG_NEGS_SHIFT); + + if ((arbneg & 0x7) == 0x0) { + src = src & ~REG_NEGV_MASK; + *r = src; + } else if ((arbneg & 0x7) == 0x7) { + src |= REG_NEGV_MASK; + *r = src; + } else { + if (!REG_GET_VALID(*r)) + *r = get_temp_reg(cs); + src |= REG_NEGV_MASK; + emit_arith(cs, + PFS_OP_MAD, + *r, arbneg & 0x7, keep(src), pfs_one, pfs_zero, 0); + src = src & ~REG_NEGV_MASK; + emit_arith(cs, + PFS_OP_MAD, + *r, + (arbneg ^ 0x7) | WRITEMASK_W, + src, pfs_one, pfs_zero, 0); + } + + return 3; +} + +static int swz_emit_partial(struct r300_pfs_compile_state *cs, + GLuint src, + GLuint * r, int mask, int mc, GLuint arbneg) +{ + COMPILE_STATE; + GLuint tmp; + GLuint wmask = 0; + + if (!REG_GET_VALID(*r)) + *r = get_temp_reg(cs); + + /* A partial match, VSWZ/mask define what parts of the + * desired swizzle we match + */ + if (mc + s_mask[mask].count == 3) { + wmask = WRITEMASK_W; + src |= ((arbneg >> 3) & 1) << REG_NEGS_SHIFT; + } + + tmp = arbneg & s_mask[mask].mask; + if (tmp) { + tmp = tmp ^ s_mask[mask].mask; + if (tmp) { + emit_arith(cs, + PFS_OP_MAD, + *r, + arbneg & s_mask[mask].mask, + keep(src) | REG_NEGV_MASK, + pfs_one, pfs_zero, 0); + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(cs, + PFS_OP_MAD, + *r, tmp | wmask, src, pfs_one, pfs_zero, 0); + } else { + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(cs, + PFS_OP_MAD, + *r, + (arbneg & s_mask[mask].mask) | wmask, + src | REG_NEGV_MASK, pfs_one, pfs_zero, 0); + } + } else { + if (!wmask) { + REG_SET_NO_USE(src, GL_TRUE); + } else { + REG_SET_NO_USE(src, GL_FALSE); + } + emit_arith(cs, PFS_OP_MAD, + *r, + s_mask[mask].mask | wmask, + src, pfs_one, pfs_zero, 0); + } + + return s_mask[mask].count; +} + +static GLuint do_swizzle(struct r300_pfs_compile_state *cs, + GLuint src, GLuint arbswz, GLuint arbneg) +{ + COMPILE_STATE; + GLuint r = undef; + GLuint vswz; + int c_mask = 0; + int v_match = 0; + + /* If swizzling from something without an XYZW native swizzle, + * emit result to a temp, and do new swizzle from the temp. + */ +#if 0 + if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { + GLuint temp = get_temp_reg(fp); + emit_arith(fp, + PFS_OP_MAD, + temp, WRITEMASK_XYZW, src, pfs_one, pfs_zero, 0); + src = temp; + } +#endif + + if (REG_GET_VSWZ(src) != SWIZZLE_XYZ || REG_GET_SSWZ(src) != SWIZZLE_W) { + GLuint vsrcswz = + (v_swiz[REG_GET_VSWZ(src)]. + hash & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK)) | + REG_GET_SSWZ(src) << 9; + GLint i; + + GLuint newswz = 0; + GLuint offset; + for (i = 0; i < 4; ++i) { + offset = GET_SWZ(arbswz, i); + + newswz |= + (offset <= 3) ? GET_SWZ(vsrcswz, + offset) << i * + 3 : offset << i * 3; + } + + arbswz = newswz & (SWZ_X_MASK | SWZ_Y_MASK | SWZ_Z_MASK); + REG_SET_SSWZ(src, GET_SWZ(newswz, 3)); + } else { + /* set scalar swizzling */ + REG_SET_SSWZ(src, GET_SWZ(arbswz, 3)); + + } + do { + vswz = REG_GET_VSWZ(src); + do { + int chash; + + REG_SET_VSWZ(src, vswz); + chash = v_swiz[REG_GET_VSWZ(src)].hash & + s_mask[c_mask].hash; + + if (chash == (arbswz & s_mask[c_mask].hash)) { + if (s_mask[c_mask].count == 3) { + v_match += swz_native(cs, + src, &r, arbneg); + } else { + v_match += swz_emit_partial(cs, + src, + &r, + c_mask, + v_match, + arbneg); + } + + if (v_match == 3) + return r; + + /* Fill with something invalid.. all 0's was + * wrong before, matched SWIZZLE_X. So all + * 1's will be okay for now + */ + arbswz |= (PFS_INVAL & s_mask[c_mask].hash); + } + } while (v_swiz[++vswz].hash != PFS_INVAL); + REG_SET_VSWZ(src, SWIZZLE_XYZ); + } while (s_mask[++c_mask].hash != PFS_INVAL); + + ERROR("should NEVER get here\n"); + return r; +} + +static GLuint t_src(struct r300_pfs_compile_state *cs, + struct prog_src_register fpsrc) +{ + COMPILE_STATE; + GLuint r = undef; + + switch (fpsrc.File) { + case PROGRAM_TEMPORARY: + REG_SET_INDEX(r, fpsrc.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_TEMP); + break; + case PROGRAM_INPUT: + REG_SET_INDEX(r, fpsrc.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_INPUT); + break; + case PROGRAM_LOCAL_PARAM: + r = emit_const4fv(cs, + fp->mesa_program.Base.LocalParams[fpsrc. + Index]); + break; + case PROGRAM_ENV_PARAM: + r = emit_const4fv(cs, + cs->compiler->r300->radeon.glCtx->FragmentProgram.Parameters[fpsrc.Index]); + break; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + r = emit_const4fv(cs, + fp->mesa_program.Base.Parameters-> + ParameterValues[fpsrc.Index]); + break; + default: + ERROR("unknown SrcReg->File %x\n", fpsrc.File); + return r; + } + + /* no point swizzling ONE/ZERO/HALF constants... */ + if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO) + r = do_swizzle(cs, r, fpsrc.Swizzle, fpsrc.NegateBase); + return r; +} + +static GLuint t_scalar_src(struct r300_pfs_compile_state *cs, + struct prog_src_register fpsrc) +{ + struct prog_src_register src = fpsrc; + int sc = GET_SWZ(fpsrc.Swizzle, 0); /* X */ + + src.Swizzle = ((sc << 0) | (sc << 3) | (sc << 6) | (sc << 9)); + + return t_src(cs, src); +} + +static GLuint t_dst(struct r300_pfs_compile_state *cs, + struct prog_dst_register dest) +{ + COMPILE_STATE; + GLuint r = undef; + + switch (dest.File) { + case PROGRAM_TEMPORARY: + REG_SET_INDEX(r, dest.Index); + REG_SET_VALID(r, GL_TRUE); + REG_SET_TYPE(r, REG_TYPE_TEMP); + return r; + case PROGRAM_OUTPUT: + REG_SET_TYPE(r, REG_TYPE_OUTPUT); + switch (dest.Index) { + case FRAG_RESULT_COLR: + case FRAG_RESULT_DEPR: + REG_SET_INDEX(r, dest.Index); + REG_SET_VALID(r, GL_TRUE); + return r; + default: + ERROR("Bad DstReg->Index 0x%x\n", dest.Index); + return r; + } + default: + ERROR("Bad DstReg->File 0x%x\n", dest.File); + return r; + } +} + +static int t_hw_src(struct r300_pfs_compile_state *cs, GLuint src, GLboolean tex) +{ + COMPILE_STATE; + int idx; + int index = REG_GET_INDEX(src); + + switch (REG_GET_TYPE(src)) { + case REG_TYPE_TEMP: + /* NOTE: if reg==-1 here, a source is being read that + * hasn't been written to. Undefined results. + */ + if (cs->temps[index].reg == -1) + cs->temps[index].reg = get_hw_temp(cs, cs->nrslots); + + idx = cs->temps[index].reg; + + if (!REG_GET_NO_USE(src) && (--cs->temps[index].refcount == 0)) + free_temp(cs, src); + break; + case REG_TYPE_INPUT: + idx = cs->inputs[index].reg; + + if (!REG_GET_NO_USE(src) && (--cs->inputs[index].refcount == 0)) + free_hw_temp(cs, cs->inputs[index].reg); + break; + case REG_TYPE_CONST: + return (index | SRC_CONST); + default: + ERROR("Invalid type for source reg\n"); + return (0 | SRC_CONST); + } + + if (!tex) + cs->used_in_node |= (1 << idx); + + return idx; +} + +static int t_hw_dst(struct r300_pfs_compile_state *cs, + GLuint dest, GLboolean tex, int slot) +{ + COMPILE_STATE; + int idx; + GLuint index = REG_GET_INDEX(dest); + assert(REG_GET_VALID(dest)); + + switch (REG_GET_TYPE(dest)) { + case REG_TYPE_TEMP: + if (cs->temps[REG_GET_INDEX(dest)].reg == -1) { + if (!tex) { + cs->temps[index].reg = get_hw_temp(cs, slot); + } else { + cs->temps[index].reg = get_hw_temp_tex(cs); + } + } + idx = cs->temps[index].reg; + + if (!REG_GET_NO_USE(dest) && (--cs->temps[index].refcount == 0)) + free_temp(cs, dest); + + cs->dest_in_node |= (1 << idx); + cs->used_in_node |= (1 << idx); + break; + case REG_TYPE_OUTPUT: + switch (index) { + case FRAG_RESULT_COLR: + code->node[code->cur_node].flags |= R300_RGBA_OUT; + break; + case FRAG_RESULT_DEPR: + fp->WritesDepth = GL_TRUE; + code->node[code->cur_node].flags |= R300_W_OUT; + break; + } + return index; + break; + default: + ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); + return 0; + } + + return idx; +} + +static void emit_nop(struct r300_pfs_compile_state *cs) +{ + COMPILE_STATE; + + if (cs->nrslots >= PFS_MAX_ALU_INST) { + ERROR("Out of ALU instruction slots\n"); + return; + } + + code->alu.inst[cs->nrslots].inst0 = NOP_INST0; + code->alu.inst[cs->nrslots].inst1 = NOP_INST1; + code->alu.inst[cs->nrslots].inst2 = NOP_INST2; + code->alu.inst[cs->nrslots].inst3 = NOP_INST3; + cs->nrslots++; +} + +static void emit_tex(struct r300_pfs_compile_state *cs, + struct prog_instruction *fpi, int opcode) +{ + COMPILE_STATE; + GLuint coord = t_src(cs, fpi->SrcReg[0]); + GLuint dest = undef, rdest = undef; + GLuint din, uin; + int unit = fpi->TexSrcUnit; + int hwsrc, hwdest; + GLuint tempreg = 0; + + /** + * Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + * + * \todo Refactor this once we have proper rewriting/optimization + * support for programs. + */ + if (opcode != R300_TEX_OP_KIL && fpi->TexSrcTarget == TEXTURE_RECT_INDEX) { + gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, + 0 + }; + int factor_index; + GLuint factorreg; + + tokens[2] = unit; + factor_index = + _mesa_add_state_reference(fp->mesa_program.Base. + Parameters, tokens); + factorreg = + emit_const4fv(cs, + fp->mesa_program.Base.Parameters-> + ParameterValues[factor_index]); + tempreg = keep(get_temp_reg(cs)); + + emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, + coord, factorreg, pfs_zero, 0); + + coord = tempreg; + } + + /* Texture operations do not support swizzles etc. in hardware, + * so emit an additional arithmetic operation if necessary. + */ + if (REG_GET_VSWZ(coord) != SWIZZLE_XYZ || + REG_GET_SSWZ(coord) != SWIZZLE_W || + coord & (REG_NEGV_MASK | REG_NEGS_MASK | REG_ABS_MASK)) { + assert(tempreg == 0); + tempreg = keep(get_temp_reg(cs)); + emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, + coord, pfs_one, pfs_zero, 0); + coord = tempreg; + } + + /* Ensure correct node indirection */ + uin = cs->used_in_node; + din = cs->dest_in_node; + + /* Resolve source/dest to hardware registers */ + hwsrc = t_hw_src(cs, coord, GL_TRUE); + + if (opcode != R300_TEX_OP_KIL) { + dest = t_dst(cs, fpi->DstReg); + + /* r300 doesn't seem to be able to do TEX->output reg */ + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + rdest = dest; + dest = get_temp_reg_tex(cs); + } else if (fpi->DstReg.WriteMask != WRITEMASK_XYZW) { + /* in case write mask isn't XYZW */ + rdest = dest; + dest = get_temp_reg_tex(cs); + } + hwdest = + t_hw_dst(cs, dest, GL_TRUE, + code->node[code->cur_node].alu_offset); + + /* Use a temp that hasn't been used in this node, rather + * than causing an indirection + */ + if (uin & (1 << hwdest)) { + free_hw_temp(cs, hwdest); + hwdest = get_hw_temp_tex(cs); + cs->temps[REG_GET_INDEX(dest)].reg = hwdest; + } + } else { + hwdest = 0; + unit = 0; + } + + /* Indirection if source has been written in this node, or if the + * dest has been read/written in this node + */ + if ((REG_GET_TYPE(coord) != REG_TYPE_CONST && + (din & (1 << hwsrc))) || (uin & (1 << hwdest))) { + + /* Finish off current node */ + if (code->node[code->cur_node].alu_offset == cs->nrslots) + emit_nop(cs); + + code->node[code->cur_node].alu_end = + cs->nrslots - code->node[code->cur_node].alu_offset - 1; + assert(code->node[code->cur_node].alu_end >= 0); + + if (++code->cur_node >= PFS_MAX_TEX_INDIRECT) { + ERROR("too many levels of texture indirection\n"); + return; + } + + /* Start new node */ + code->node[code->cur_node].tex_offset = code->tex.length; + code->node[code->cur_node].alu_offset = cs->nrslots; + code->node[code->cur_node].tex_end = -1; + code->node[code->cur_node].alu_end = -1; + code->node[code->cur_node].flags = 0; + cs->used_in_node = 0; + cs->dest_in_node = 0; + } + + if (code->cur_node == 0) + code->first_node_has_tex = 1; + + code->tex.inst[code->tex.length++] = 0 | (hwsrc << R300_SRC_ADDR_SHIFT) + | (hwdest << R300_DST_ADDR_SHIFT) + | (unit << R300_TEX_ID_SHIFT) + | (opcode << R300_TEX_INST_SHIFT); + + cs->dest_in_node |= (1 << hwdest); + if (REG_GET_TYPE(coord) != REG_TYPE_CONST) + cs->used_in_node |= (1 << hwsrc); + + code->node[code->cur_node].tex_end++; + + /* Copy from temp to output if needed */ + if (REG_GET_VALID(rdest)) { + emit_arith(cs, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest, + pfs_one, pfs_zero, 0); + free_temp(cs, dest); + } + + /* Free temp register */ + if (tempreg != 0) + free_temp(cs, tempreg); +} + +/** + * Returns the first slot where we could possibly allow writing to dest, + * according to register allocation. + */ +static int get_earliest_allowed_write(struct r300_pfs_compile_state *cs, + GLuint dest, int mask) +{ + COMPILE_STATE; + int idx; + int pos; + GLuint index = REG_GET_INDEX(dest); + assert(REG_GET_VALID(dest)); + + switch (REG_GET_TYPE(dest)) { + case REG_TYPE_TEMP: + if (cs->temps[index].reg == -1) + return 0; + + idx = cs->temps[index].reg; + break; + case REG_TYPE_OUTPUT: + return 0; + default: + ERROR("invalid dest reg type %d\n", REG_GET_TYPE(dest)); + return 0; + } + + pos = cs->hwtemps[idx].reserved; + if (mask & WRITEMASK_XYZ) { + if (pos < cs->hwtemps[idx].vector_lastread) + pos = cs->hwtemps[idx].vector_lastread; + } + if (mask & WRITEMASK_W) { + if (pos < cs->hwtemps[idx].scalar_lastread) + pos = cs->hwtemps[idx].scalar_lastread; + } + + return pos; +} + +/** + * Allocates a slot for an ALU instruction that can consist of + * a vertex part or a scalar part or both. + * + * Sources from src (src[0] to src[argc-1]) are added to the slot in the + * appropriate position (vector and/or scalar), and their positions are + * recorded in the srcpos array. + * + * This function emits instruction code for the source fetch and the + * argument selection. It does not emit instruction code for the + * opcode or the destination selection. + * + * @return the index of the slot + */ +static int find_and_prepare_slot(struct r300_pfs_compile_state *cs, + GLboolean emit_vop, + GLboolean emit_sop, + int argc, GLuint * src, GLuint dest, int mask) +{ + COMPILE_STATE; + int hwsrc[3]; + int srcpos[3]; + unsigned int used; + int tempused; + int tempvsrc[3]; + int tempssrc[3]; + int pos; + int regnr; + int i, j; + + // Determine instruction slots, whether sources are required on + // vector or scalar side, and the smallest slot number where + // all source registers are available + used = 0; + if (emit_vop) + used |= SLOT_OP_VECTOR; + if (emit_sop) + used |= SLOT_OP_SCALAR; + + pos = get_earliest_allowed_write(cs, dest, mask); + + if (code->node[code->cur_node].alu_offset > pos) + pos = code->node[code->cur_node].alu_offset; + for (i = 0; i < argc; ++i) { + if (!REG_GET_BUILTIN(src[i])) { + if (emit_vop) + used |= v_swiz[REG_GET_VSWZ(src[i])].flags << i; + if (emit_sop) + used |= s_swiz[REG_GET_SSWZ(src[i])].flags << i; + } + + hwsrc[i] = t_hw_src(cs, src[i], GL_FALSE); /* Note: sideeffects wrt refcounting! */ + regnr = hwsrc[i] & 31; + + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { + if (used & (SLOT_SRC_VECTOR << i)) { + if (cs->hwtemps[regnr].vector_valid > pos) + pos = cs->hwtemps[regnr].vector_valid; + } + if (used & (SLOT_SRC_SCALAR << i)) { + if (cs->hwtemps[regnr].scalar_valid > pos) + pos = cs->hwtemps[regnr].scalar_valid; + } + } + } + + // Find a slot that fits + for (;; ++pos) { + if (cs->slot[pos].used & used & SLOT_OP_BOTH) + continue; + + if (pos >= cs->nrslots) { + if (cs->nrslots >= PFS_MAX_ALU_INST) { + ERROR("Out of ALU instruction slots\n"); + return -1; + } + + code->alu.inst[pos].inst0 = NOP_INST0; + code->alu.inst[pos].inst1 = NOP_INST1; + code->alu.inst[pos].inst2 = NOP_INST2; + code->alu.inst[pos].inst3 = NOP_INST3; + + cs->nrslots++; + } + // Note: When we need both parts (vector and scalar) of a source, + // we always try to put them into the same position. This makes the + // code easier to read, and it is optimal (i.e. one doesn't gain + // anything by splitting the parts). + // It also avoids headaches with swizzles that access both parts (i.e WXY) + tempused = cs->slot[pos].used; + for (i = 0; i < 3; ++i) { + tempvsrc[i] = cs->slot[pos].vsrc[i]; + tempssrc[i] = cs->slot[pos].ssrc[i]; + } + + for (i = 0; i < argc; ++i) { + int flags = (used >> i) & SLOT_SRC_BOTH; + + if (!flags) { + srcpos[i] = 0; + continue; + } + + for (j = 0; j < 3; ++j) { + if ((tempused >> j) & flags & SLOT_SRC_VECTOR) { + if (tempvsrc[j] != hwsrc[i]) + continue; + } + + if ((tempused >> j) & flags & SLOT_SRC_SCALAR) { + if (tempssrc[j] != hwsrc[i]) + continue; + } + + break; + } + + if (j == 3) + break; + + srcpos[i] = j; + tempused |= flags << j; + if (flags & SLOT_SRC_VECTOR) + tempvsrc[j] = hwsrc[i]; + if (flags & SLOT_SRC_SCALAR) + tempssrc[j] = hwsrc[i]; + } + + if (i == argc) + break; + } + + // Found a slot, reserve it + cs->slot[pos].used = tempused | (used & SLOT_OP_BOTH); + for (i = 0; i < 3; ++i) { + cs->slot[pos].vsrc[i] = tempvsrc[i]; + cs->slot[pos].ssrc[i] = tempssrc[i]; + } + + for (i = 0; i < argc; ++i) { + if (REG_GET_TYPE(src[i]) == REG_TYPE_TEMP) { + int regnr = hwsrc[i] & 31; + + if (used & (SLOT_SRC_VECTOR << i)) { + if (cs->hwtemps[regnr].vector_lastread < pos) + cs->hwtemps[regnr].vector_lastread = + pos; + } + if (used & (SLOT_SRC_SCALAR << i)) { + if (cs->hwtemps[regnr].scalar_lastread < pos) + cs->hwtemps[regnr].scalar_lastread = + pos; + } + } + } + + // Emit the source fetch code + code->alu.inst[pos].inst1 &= ~R300_ALU_SRC_MASK; + code->alu.inst[pos].inst1 |= + ((cs->slot[pos].vsrc[0] << R300_ALU_SRC0C_SHIFT) | + (cs->slot[pos].vsrc[1] << R300_ALU_SRC1C_SHIFT) | + (cs->slot[pos].vsrc[2] << R300_ALU_SRC2C_SHIFT)); + + code->alu.inst[pos].inst3 &= ~R300_ALU_SRC_MASK; + code->alu.inst[pos].inst3 |= + ((cs->slot[pos].ssrc[0] << R300_ALU_SRC0A_SHIFT) | + (cs->slot[pos].ssrc[1] << R300_ALU_SRC1A_SHIFT) | + (cs->slot[pos].ssrc[2] << R300_ALU_SRC2A_SHIFT)); + + // Emit the argument selection code + if (emit_vop) { + int swz[3]; + + for (i = 0; i < 3; ++i) { + if (i < argc) { + swz[i] = (v_swiz[REG_GET_VSWZ(src[i])].base + + (srcpos[i] * + v_swiz[REG_GET_VSWZ(src[i])]. + stride)) | ((src[i] & REG_NEGV_MASK) + ? ARG_NEG : 0) | ((src[i] + & + REG_ABS_MASK) + ? + ARG_ABS + : 0); + } else { + swz[i] = R300_ALU_ARGC_ZERO; + } + } + + code->alu.inst[pos].inst0 &= + ~(R300_ALU_ARG0C_MASK | R300_ALU_ARG1C_MASK | + R300_ALU_ARG2C_MASK); + code->alu.inst[pos].inst0 |= + (swz[0] << R300_ALU_ARG0C_SHIFT) | (swz[1] << + R300_ALU_ARG1C_SHIFT) + | (swz[2] << R300_ALU_ARG2C_SHIFT); + } + + if (emit_sop) { + int swz[3]; + + for (i = 0; i < 3; ++i) { + if (i < argc) { + swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + + (srcpos[i] * + s_swiz[REG_GET_SSWZ(src[i])]. + stride)) | ((src[i] & REG_NEGV_MASK) + ? ARG_NEG : 0) | ((src[i] + & + REG_ABS_MASK) + ? + ARG_ABS + : 0); + } else { + swz[i] = R300_ALU_ARGA_ZERO; + } + } + + code->alu.inst[pos].inst2 &= + ~(R300_ALU_ARG0A_MASK | R300_ALU_ARG1A_MASK | + R300_ALU_ARG2A_MASK); + code->alu.inst[pos].inst2 |= + (swz[0] << R300_ALU_ARG0A_SHIFT) | (swz[1] << + R300_ALU_ARG1A_SHIFT) + | (swz[2] << R300_ALU_ARG2A_SHIFT); + } + + return pos; +} + +/** + * Append an ALU instruction to the instruction list. + */ +static void emit_arith(struct r300_pfs_compile_state *cs, + int op, + GLuint dest, + int mask, + GLuint src0, GLuint src1, GLuint src2, int flags) +{ + COMPILE_STATE; + GLuint src[3] = { src0, src1, src2 }; + int hwdest; + GLboolean emit_vop, emit_sop; + int vop, sop, argc; + int pos; + + vop = r300_fpop[op].v_op; + sop = r300_fpop[op].s_op; + argc = r300_fpop[op].argc; + + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT && + REG_GET_INDEX(dest) == FRAG_RESULT_DEPR) { + if (mask & WRITEMASK_Z) { + mask = WRITEMASK_W; + } else { + return; + } + } + + emit_vop = GL_FALSE; + emit_sop = GL_FALSE; + if ((mask & WRITEMASK_XYZ) || vop == R300_ALU_OUTC_DP3) + emit_vop = GL_TRUE; + if ((mask & WRITEMASK_W) || vop == R300_ALU_OUTC_REPL_ALPHA) + emit_sop = GL_TRUE; + + pos = + find_and_prepare_slot(cs, emit_vop, emit_sop, argc, src, dest, + mask); + if (pos < 0) + return; + + hwdest = t_hw_dst(cs, dest, GL_FALSE, pos); /* Note: Side effects wrt register allocation */ + + if (flags & PFS_FLAG_SAT) { + vop |= R300_ALU_OUTC_CLAMP; + sop |= R300_ALU_OUTA_CLAMP; + } + + /* Throw the pieces together and get ALU/1 */ + if (emit_vop) { + code->alu.inst[pos].inst0 |= vop; + + code->alu.inst[pos].inst1 |= hwdest << R300_ALU_DSTC_SHIFT; + + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { + code->alu.inst[pos].inst1 |= + (mask & WRITEMASK_XYZ) << + R300_ALU_DSTC_OUTPUT_MASK_SHIFT; + } else + assert(0); + } else { + code->alu.inst[pos].inst1 |= + (mask & WRITEMASK_XYZ) << + R300_ALU_DSTC_REG_MASK_SHIFT; + + cs->hwtemps[hwdest].vector_valid = pos + 1; + } + } + + /* And now ALU/3 */ + if (emit_sop) { + code->alu.inst[pos].inst2 |= sop; + + if (mask & WRITEMASK_W) { + if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + if (REG_GET_INDEX(dest) == FRAG_RESULT_COLR) { + code->alu.inst[pos].inst3 |= + (hwdest << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_OUTPUT; + } else if (REG_GET_INDEX(dest) == + FRAG_RESULT_DEPR) { + code->alu.inst[pos].inst3 |= + R300_ALU_DSTA_DEPTH; + } else + assert(0); + } else { + code->alu.inst[pos].inst3 |= + (hwdest << R300_ALU_DSTA_SHIFT) | + R300_ALU_DSTA_REG; + + cs->hwtemps[hwdest].scalar_valid = pos + 1; + } + } + } + + return; +} + +static GLfloat SinCosConsts[2][4] = { + { + 1.273239545, // 4/PI + -0.405284735, // -4/(PI*PI) + 3.141592654, // PI + 0.2225 // weight + }, + { + 0.75, + 0.0, + 0.159154943, // 1/(2*PI) + 6.283185307 // 2*PI + } +}; + +/** + * Emit a LIT instruction. + * \p flags may be PFS_FLAG_SAT + * + * Definition of LIT (from ARB_fragment_program): + * tmp = VectorLoad(op0); + * if (tmp.x < 0) tmp.x = 0; + * if (tmp.y < 0) tmp.y = 0; + * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); + * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; + * result.x = 1.0; + * result.y = tmp.x; + * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; + * result.w = 1.0; + * + * The longest path of computation is the one leading to result.z, + * consisting of 5 operations. This implementation of LIT takes + * 5 slots. So unless there's some special undocumented opcode, + * this implementation is potentially optimal. Unfortunately, + * emit_arith is a bit too conservative because it doesn't understand + * partial writes to the vector component. + */ +static const GLfloat LitConst[4] = + { 127.999999, 127.999999, 127.999999, -127.999999 }; + +static void emit_lit(struct r300_pfs_compile_state *cs, + GLuint dest, int mask, GLuint src, int flags) +{ + COMPILE_STATE; + GLuint cnst; + int needTemporary; + GLuint temp; + + cnst = emit_const4fv(cs, LitConst); + + needTemporary = 0; + if ((mask & WRITEMASK_XYZW) != WRITEMASK_XYZW) { + needTemporary = 1; + } else if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { + // LIT is typically followed by DP3/DP4, so there's no point + // in creating special code for this case + needTemporary = 1; + } + + if (needTemporary) { + temp = keep(get_temp_reg(cs)); + } else { + temp = keep(dest); + } + + // Note: The order of emit_arith inside the slots is relevant, + // because emit_arith only looks at scalar vs. vector when resolving + // dependencies, and it does not consider individual vector components, + // so swizzling between the two parts can create fake dependencies. + + // First slot + emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_XY, + keep(src), pfs_zero, undef, 0); + emit_arith(cs, PFS_OP_MAX, temp, WRITEMASK_W, src, cnst, undef, 0); + + // Second slot + emit_arith(cs, PFS_OP_MIN, temp, WRITEMASK_Z, + swizzle(temp, W, W, W, W), cnst, undef, 0); + emit_arith(cs, PFS_OP_LG2, temp, WRITEMASK_W, + swizzle(temp, Y, Y, Y, Y), undef, undef, 0); + + // Third slot + // If desired, we saturate the y result here. + // This does not affect the use as a condition variable in the CMP later + emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W, + temp, swizzle(temp, Z, Z, Z, Z), pfs_zero, 0); + emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_Y, + swizzle(temp, X, X, X, X), pfs_one, pfs_zero, flags); + + // Fourth slot + emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_X, + pfs_one, pfs_one, pfs_zero, 0); + emit_arith(cs, PFS_OP_EX2, temp, WRITEMASK_W, temp, undef, undef, 0); + + // Fifth slot + emit_arith(cs, PFS_OP_CMP, temp, WRITEMASK_Z, + pfs_zero, swizzle(temp, W, W, W, W), + negate(swizzle(temp, Y, Y, Y, Y)), flags); + emit_arith(cs, PFS_OP_MAD, temp, WRITEMASK_W, pfs_one, pfs_one, + pfs_zero, 0); + + if (needTemporary) { + emit_arith(cs, PFS_OP_MAD, dest, mask, + temp, pfs_one, pfs_zero, flags); + free_temp(cs, temp); + } else { + // Decrease refcount of the destination + t_hw_dst(cs, dest, GL_FALSE, cs->nrslots); + } +} + +static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_instruction *fpi) +{ + COMPILE_STATE; + GLuint src[3], dest, temp[2]; + int flags, mask = 0; + int const_sin[2]; + + if (fpi->SaturateMode == SATURATE_ZERO_ONE) + flags = PFS_FLAG_SAT; + else + flags = 0; + + if (fpi->Opcode != OPCODE_KIL) { + dest = t_dst(cs, fpi->DstReg); + mask = fpi->DstReg.WriteMask; + } + + switch (fpi->Opcode) { + case OPCODE_ABS: + src[0] = t_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_MAD, dest, mask, + absolute(src[0]), pfs_one, pfs_zero, flags); + break; + case OPCODE_ADD: + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MAD, dest, mask, + src[0], pfs_one, src[1], flags); + break; + case OPCODE_CMP: + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + src[2] = t_src(cs, fpi->SrcReg[2]); + /* ARB_f_p - if src0.c < 0.0 ? src1.c : src2.c + * r300 - if src2.c < 0.0 ? src1.c : src0.c + */ + emit_arith(cs, PFS_OP_CMP, dest, mask, + src[2], src[1], src[0], flags); + break; + case OPCODE_COS: + /* + * cos using a parabola (see SIN): + * cos(x): + * x = (x/(2*PI))+0.75 + * x = frac(x) + * x = (x*2*PI)-PI + * result = sin(x) + */ + temp[0] = get_temp_reg(cs); + const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); + const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + + /* add 0.5*PI and do range reduction */ + + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(src[0], X, X, X, X), + swizzle(const_sin[1], Z, Z, Z, Z), + swizzle(const_sin[1], X, X, X, X), 0); + + emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X, + swizzle(temp[0], X, X, X, X), + undef, undef, 0); + + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI + negate(swizzle(const_sin[0], Z, Z, Z, Z)), //-PI + 0); + + /* SIN */ + + emit_arith(cs, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y, + swizzle(temp[0], X, X, X, X), + absolute(swizzle(temp[0], X, X, X, X)), + negate(swizzle(temp[0], X, X, X, X)), 0); + + emit_arith(cs, PFS_OP_MAD, dest, mask, + swizzle(temp[0], Y, Y, Y, Y), + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[0], X, X, X, X), flags); + + free_temp(cs, temp[0]); + break; + case OPCODE_DP3: + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_DP3, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_DP4: + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_DP4, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_DPH: + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + /* src0.xyz1 -> temp + * DP4 dest, temp, src1 + */ + emit_arith(cs, PFS_OP_DP4, dest, mask, + swizzle(src[0], X, Y, Z, ONE), src[1], + undef, flags); + break; + case OPCODE_DST: + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + /* dest.y = src0.y * src1.y */ + if (mask & WRITEMASK_Y) + emit_arith(cs, PFS_OP_MAD, dest, WRITEMASK_Y, + keep(src[0]), keep(src[1]), + pfs_zero, flags); + /* dest.z = src0.z */ + if (mask & WRITEMASK_Z) + emit_arith(cs, PFS_OP_MAD, dest, WRITEMASK_Z, + src[0], pfs_one, pfs_zero, flags); + /* result.x = 1.0 + * result.w = src1.w */ + if (mask & WRITEMASK_XW) { + REG_SET_VSWZ(src[1], SWIZZLE_111); /*Cheat */ + emit_arith(cs, PFS_OP_MAD, dest, + mask & WRITEMASK_XW, + src[1], pfs_one, pfs_zero, flags); + } + break; + case OPCODE_EX2: + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_EX2, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_FLR: + src[0] = t_src(cs, fpi->SrcReg[0]); + temp[0] = get_temp_reg(cs); + /* FRC temp, src0 + * MAD dest, src0, 1.0, -temp + */ + emit_arith(cs, PFS_OP_FRC, temp[0], mask, + keep(src[0]), undef, undef, 0); + emit_arith(cs, PFS_OP_MAD, dest, mask, + src[0], pfs_one, negate(temp[0]), flags); + free_temp(cs, temp[0]); + break; + case OPCODE_FRC: + src[0] = t_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_FRC, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_KIL: + emit_tex(cs, fpi, R300_TEX_OP_KIL); + break; + case OPCODE_LG2: + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_LG2, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_LIT: + src[0] = t_src(cs, fpi->SrcReg[0]); + emit_lit(cs, dest, mask, src[0], flags); + break; + case OPCODE_LRP: + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + src[2] = t_src(cs, fpi->SrcReg[2]); + /* result = tmp0tmp1 + (1 - tmp0)tmp2 + * = tmp0tmp1 + tmp2 + (-tmp0)tmp2 + * MAD temp, -tmp0, tmp2, tmp2 + * MAD result, tmp0, tmp1, temp + */ + temp[0] = get_temp_reg(cs); + emit_arith(cs, PFS_OP_MAD, temp[0], mask, + negate(keep(src[0])), keep(src[2]), src[2], + 0); + emit_arith(cs, PFS_OP_MAD, dest, mask, + src[0], src[1], temp[0], flags); + free_temp(cs, temp[0]); + break; + case OPCODE_MAD: + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + src[2] = t_src(cs, fpi->SrcReg[2]); + emit_arith(cs, PFS_OP_MAD, dest, mask, + src[0], src[1], src[2], flags); + break; + case OPCODE_MAX: + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MAX, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_MIN: + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MIN, dest, mask, + src[0], src[1], undef, flags); + break; + case OPCODE_MOV: + case OPCODE_SWZ: + src[0] = t_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_MAD, dest, mask, + src[0], pfs_one, pfs_zero, flags); + break; + case OPCODE_MUL: + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MAD, dest, mask, + src[0], src[1], pfs_zero, flags); + break; + case OPCODE_POW: + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + src[1] = t_scalar_src(cs, fpi->SrcReg[1]); + temp[0] = get_temp_reg(cs); + emit_arith(cs, PFS_OP_LG2, temp[0], WRITEMASK_W, + src[0], undef, undef, 0); + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_W, + temp[0], src[1], pfs_zero, 0); + emit_arith(cs, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, + temp[0], undef, undef, 0); + free_temp(cs, temp[0]); + break; + case OPCODE_RCP: + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_RCP, dest, mask, + src[0], undef, undef, flags); + break; + case OPCODE_RSQ: + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + emit_arith(cs, PFS_OP_RSQ, dest, mask, + absolute(src[0]), pfs_zero, pfs_zero, flags); + break; + case OPCODE_SCS: + /* + * scs using a parabola : + * scs(x): + * result.x = sin(-abs(x)+0.5*PI) (cos) + * result.y = sin(x) (sin) + * + */ + temp[0] = get_temp_reg(cs); + temp[1] = get_temp_reg(cs); + const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); + const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + + /* x = -abs(x)+0.5*PI */ + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(const_sin[0], Z, Z, Z, Z), //PI + pfs_half, + negate(abs + (swizzle(keep(src[0]), X, X, X, X))), + 0); + + /* C*x (sin) */ + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_W, + swizzle(const_sin[0], Y, Y, Y, Y), + swizzle(keep(src[0]), X, X, X, X), + pfs_zero, 0); + + /* B*x, C*x (cos) */ + emit_arith(cs, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + /* B*x (sin) */ + emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W, + swizzle(const_sin[0], X, X, X, X), + keep(src[0]), pfs_zero, 0); + + /* y = B*x + C*x*abs(x) (sin) */ + emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_Z, + absolute(src[0]), + swizzle(temp[0], W, W, W, W), + swizzle(temp[1], W, W, W, W), 0); + + /* y = B*x + C*x*abs(x) (cos) */ + emit_arith(cs, PFS_OP_MAD, temp[1], WRITEMASK_W, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + /* y*abs(y) - y (cos), y*abs(y) - y (sin) */ + emit_arith(cs, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[1], + W, Z, Y, + X), + absolute(swizzle(temp[1], W, Z, Y, X)), + negate(swizzle(temp[1], W, Z, Y, X)), 0); + + /* dest.xy = mad(temp.xy, P, temp2.wz) */ + emit_arith(cs, PFS_OP_MAD, dest, + mask & (WRITEMASK_X | WRITEMASK_Y), temp[0], + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[1], W, Z, Y, X), flags); + + free_temp(cs, temp[0]); + free_temp(cs, temp[1]); + break; + case OPCODE_SGE: + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + temp[0] = get_temp_reg(cs); + /* temp = src0 - src1 + * dest.c = (temp.c < 0.0) ? 0 : 1 + */ + emit_arith(cs, PFS_OP_MAD, temp[0], mask, + src[0], pfs_one, negate(src[1]), 0); + emit_arith(cs, PFS_OP_CMP, dest, mask, + pfs_one, pfs_zero, temp[0], 0); + free_temp(cs, temp[0]); + break; + case OPCODE_SIN: + /* + * using a parabola: + * sin(x) = 4/pi * x + -4/(pi*pi) * x * abs(x) + * extra precision is obtained by weighting against + * itself squared. + */ + + temp[0] = get_temp_reg(cs); + const_sin[0] = emit_const4fv(cs, SinCosConsts[0]); + const_sin[1] = emit_const4fv(cs, SinCosConsts[1]); + src[0] = t_scalar_src(cs, fpi->SrcReg[0]); + + /* do range reduction */ + + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(keep(src[0]), X, X, X, X), + swizzle(const_sin[1], Z, Z, Z, Z), + pfs_half, 0); + + emit_arith(cs, PFS_OP_FRC, temp[0], WRITEMASK_X, + swizzle(temp[0], X, X, X, X), + undef, undef, 0); + + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Z, swizzle(temp[0], X, X, X, X), swizzle(const_sin[1], W, W, W, W), //2*PI + negate(swizzle(const_sin[0], Z, Z, Z, Z)), //PI + 0); + + /* SIN */ + + emit_arith(cs, PFS_OP_MAD, temp[0], + WRITEMASK_X | WRITEMASK_Y, swizzle(temp[0], + Z, Z, Z, + Z), + const_sin[0], pfs_zero, 0); + + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_X, + swizzle(temp[0], Y, Y, Y, Y), + absolute(swizzle(temp[0], Z, Z, Z, Z)), + swizzle(temp[0], X, X, X, X), 0); + + emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_Y, + swizzle(temp[0], X, X, X, X), + absolute(swizzle(temp[0], X, X, X, X)), + negate(swizzle(temp[0], X, X, X, X)), 0); + + emit_arith(cs, PFS_OP_MAD, dest, mask, + swizzle(temp[0], Y, Y, Y, Y), + swizzle(const_sin[0], W, W, W, W), + swizzle(temp[0], X, X, X, X), flags); + + free_temp(cs, temp[0]); + break; + case OPCODE_SLT: + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + temp[0] = get_temp_reg(cs); + /* temp = src0 - src1 + * dest.c = (temp.c < 0.0) ? 1 : 0 + */ + emit_arith(cs, PFS_OP_MAD, temp[0], mask, + src[0], pfs_one, negate(src[1]), 0); + emit_arith(cs, PFS_OP_CMP, dest, mask, + pfs_zero, pfs_one, temp[0], 0); + free_temp(cs, temp[0]); + break; + case OPCODE_SUB: + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + emit_arith(cs, PFS_OP_MAD, dest, mask, + src[0], pfs_one, negate(src[1]), flags); + break; + case OPCODE_TEX: + emit_tex(cs, fpi, R300_TEX_OP_LD); + break; + case OPCODE_TXB: + emit_tex(cs, fpi, R300_TEX_OP_TXB); + break; + case OPCODE_TXP: + emit_tex(cs, fpi, R300_TEX_OP_TXP); + break; + case OPCODE_XPD:{ + src[0] = t_src(cs, fpi->SrcReg[0]); + src[1] = t_src(cs, fpi->SrcReg[1]); + temp[0] = get_temp_reg(cs); + /* temp = src0.zxy * src1.yzx */ + emit_arith(cs, PFS_OP_MAD, temp[0], + WRITEMASK_XYZ, swizzle(keep(src[0]), + Z, X, Y, W), + swizzle(keep(src[1]), Y, Z, X, W), + pfs_zero, 0); + /* dest.xyz = src0.yzx * src1.zxy - temp + * dest.w = undefined + * */ + emit_arith(cs, PFS_OP_MAD, dest, + mask & WRITEMASK_XYZ, swizzle(src[0], + Y, Z, + X, W), + swizzle(src[1], Z, X, Y, W), + negate(temp[0]), flags); + /* cleanup */ + free_temp(cs, temp[0]); + break; + } + default: + ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); + break; + } +} + +static GLboolean parse_program(struct r300_pfs_compile_state *cs) +{ + COMPILE_STATE; + int clauseidx; + + for (clauseidx = 0; clauseidx < cs->compiler->compiler.NumClauses; ++clauseidx) { + struct radeon_clause* clause = &cs->compiler->compiler.Clauses[clauseidx]; + int ip; + + for(ip = 0; ip < clause->NumInstructions; ++ip) { + emit_instruction(cs, clause->Instructions + ip); + + if (fp->error) + return GL_FALSE; + } + } + + return GL_TRUE; +} + + +/* - Init structures + * - Determine what hwregs each input corresponds to + */ +static void init_program(struct r300_pfs_compile_state *cs) +{ + COMPILE_STATE; + struct gl_fragment_program *mp = &fp->mesa_program; + GLuint InputsRead = mp->Base.InputsRead; + GLuint temps_used = 0; /* for fp->temps[] */ + int i, j; + + /* New compile, reset tracking data */ + fp->optimization = + driQueryOptioni(&cs->compiler->r300->radeon.optionCache, "fp_optimization"); + fp->translated = GL_FALSE; + fp->error = GL_FALSE; + fp->WritesDepth = GL_FALSE; + code->tex.length = 0; + code->cur_node = 0; + code->first_node_has_tex = 0; + code->const_nr = 0; + code->max_temp_idx = 0; + code->node[0].alu_end = -1; + code->node[0].tex_end = -1; + + for (i = 0; i < PFS_MAX_ALU_INST; i++) { + for (j = 0; j < 3; j++) { + cs->slot[i].vsrc[j] = SRC_CONST; + cs->slot[i].ssrc[j] = SRC_CONST; + } + } + + /* Work out what temps the Mesa inputs correspond to, this must match + * what setup_rs_unit does, which shouldn't be a problem as rs_unit + * configures itself based on the fragprog's InputsRead + * + * NOTE: this depends on get_hw_temp() allocating registers in order, + * starting from register 0. + */ + + /* Texcoords come first */ + for (i = 0; i < cs->compiler->r300->radeon.glCtx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; + cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = + get_hw_temp(cs, 0); + } + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) { + cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; + cs->inputs[FRAG_ATTRIB_WPOS].reg = get_hw_temp(cs, 0); + } + InputsRead &= ~FRAG_BIT_WPOS; + + /* Then primary colour */ + if (InputsRead & FRAG_BIT_COL0) { + cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL0].reg = get_hw_temp(cs, 0); + } + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) { + cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL1].reg = get_hw_temp(cs, 0); + } + InputsRead &= ~FRAG_BIT_COL1; + + /* Anything else */ + if (InputsRead) { + WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); + /* force read from hwreg 0 for now */ + for (i = 0; i < 32; i++) + if (InputsRead & (1 << i)) + cs->inputs[i].reg = 0; + } + + /* Pre-parse the program, grabbing refcounts on input/temp regs. + * That way, we can free up the reg when it's no longer needed + */ + for (i = 0; i < cs->compiler->compiler.Clauses[0].NumInstructions; ++i) { + struct prog_instruction *fpi = cs->compiler->compiler.Clauses[0].Instructions + i; + int idx; + + for (j = 0; j < 3; j++) { + idx = fpi->SrcReg[j].Index; + switch (fpi->SrcReg[j].File) { + case PROGRAM_TEMPORARY: + if (!(temps_used & (1 << idx))) { + cs->temps[idx].reg = -1; + cs->temps[idx].refcount = 1; + temps_used |= (1 << idx); + } else + cs->temps[idx].refcount++; + break; + case PROGRAM_INPUT: + cs->inputs[idx].refcount++; + break; + default: + break; + } + } + + idx = fpi->DstReg.Index; + if (fpi->DstReg.File == PROGRAM_TEMPORARY) { + if (!(temps_used & (1 << idx))) { + cs->temps[idx].reg = -1; + cs->temps[idx].refcount = 1; + temps_used |= (1 << idx); + } else + cs->temps[idx].refcount++; + } + } + cs->temp_in_use = temps_used; +} + + +/** + * Final compilation step: Turn the intermediate radeon_program into + * machine-readable instructions. + */ +GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler) +{ + struct r300_pfs_compile_state cs; + struct r300_fragment_program_code *code = compiler->code; + + _mesa_memset(&cs, 0, sizeof(cs)); + cs.compiler = compiler; + init_program(&cs); + + if (!parse_program(&cs)) + return GL_FALSE; + + /* Finish off */ + code->node[code->cur_node].alu_end = + cs.nrslots - code->node[code->cur_node].alu_offset - 1; + if (code->node[code->cur_node].tex_end < 0) + code->node[code->cur_node].tex_end = 0; + code->alu_offset = 0; + code->alu_end = cs.nrslots - 1; + code->tex_offset = 0; + code->tex_end = code->tex.length ? code->tex.length - 1 : 0; + assert(code->node[code->cur_node].alu_end >= 0); + assert(code->alu_end >= 0); + + return GL_TRUE; +} + diff --git a/src/mesa/drivers/dri/r300/radeon_program.c b/src/mesa/drivers/dri/r300/radeon_program.c new file mode 100644 index 0000000000..7b03fa6523 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_program.c @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "radeon_program.h" + + +/** + * Initialize a compiler structure with a single mixed clause + * containing all instructions from the source program. + */ +void radeonCompilerInit( + struct radeon_compiler *compiler, + GLcontext *ctx, + struct gl_program *source) +{ + struct radeon_clause* clause; + + _mesa_memset(compiler, 0, sizeof(*compiler)); + compiler->Source = source; + compiler->Ctx = ctx; + + compiler->NumTemporaries = source->NumTemporaries; + + clause = radeonCompilerInsertClause(compiler, 0, CLAUSE_MIXED); + clause->NumInstructions = 0; + while(source->Instructions[clause->NumInstructions].Opcode != OPCODE_END) + clause->NumInstructions++; + clause->ReservedInstructions = clause->NumInstructions; + clause->Instructions = _mesa_alloc_instructions(clause->NumInstructions); + _mesa_copy_instructions(clause->Instructions, source->Instructions, clause->NumInstructions); +} + + +/** + * Free all data that is referenced by the compiler structure. + * However, the compiler structure itself is not freed. + */ +void radeonCompilerCleanup(struct radeon_compiler *compiler) +{ + radeonCompilerEraseClauses(compiler, 0, compiler->NumClauses); +} + + +/** + * Allocate and return a unique temporary register. + */ +int radeonCompilerAllocateTemporary(struct radeon_compiler *compiler) +{ + if (compiler->NumTemporaries >= 256) { + _mesa_problem(compiler->Ctx, "radeonCompiler: Too many temporaries"); + return 0; + } + + return compiler->NumTemporaries++; +} + + +/** + * \p position index of the new clause; later clauses are moved + * \p type of the new clause; one of CLAUSE_XXX + * \return a pointer to the new clause + */ +struct radeon_clause* radeonCompilerInsertClause( + struct radeon_compiler *compiler, + int position, int type) +{ + struct radeon_clause* oldClauses = compiler->Clauses; + struct radeon_clause* clause; + + assert(position >= 0 && position <= compiler->NumClauses); + + compiler->Clauses = (struct radeon_clause *) + _mesa_malloc((compiler->NumClauses+1) * sizeof(struct radeon_clause)); + if (oldClauses) { + _mesa_memcpy(compiler->Clauses, oldClauses, + position*sizeof(struct radeon_clause)); + _mesa_memcpy(compiler->Clauses+position+1, oldClauses+position, + (compiler->NumClauses - position) * sizeof(struct radeon_clause)); + _mesa_free(oldClauses); + } + compiler->NumClauses++; + + clause = compiler->Clauses + position; + _mesa_memset(clause, 0, sizeof(*clause)); + clause->Type = type; + + return clause; +} + + +/** + * Remove clauses in the range [start, end) + */ +void radeonCompilerEraseClauses( + struct radeon_compiler *compiler, + int start, int end) +{ + struct radeon_clause* oldClauses = compiler->Clauses; + int i; + + assert(0 <= start); + assert(start <= end); + assert(end <= compiler->NumClauses); + + if (end == start) + return; + + for(i = start; i < end; ++i) { + struct radeon_clause* clause = oldClauses + i; + _mesa_free_instructions(clause->Instructions, clause->NumInstructions); + } + + if (start > 0 || end < compiler->NumClauses) { + compiler->Clauses = (struct radeon_clause*) + _mesa_malloc((compiler->NumClauses+start-end) * sizeof(struct radeon_clause)); + _mesa_memcpy(compiler->Clauses, oldClauses, + start * sizeof(struct radeon_clause)); + _mesa_memcpy(compiler->Clauses + start, oldClauses + end, + (compiler->NumClauses - end) * sizeof(struct radeon_clause)); + compiler->NumClauses -= end - start; + } else { + compiler->Clauses = 0; + compiler->NumClauses = 0; + } + + _mesa_free(oldClauses); +} diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/radeon_program.h new file mode 100644 index 0000000000..18091ac02a --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_program.h @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_H_ +#define __RADEON_PROGRAM_H_ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" + + +enum { + CLAUSE_MIXED = 0, + CLAUSE_ALU, + CLAUSE_TEX +}; + +/** + * A clause is simply a sequence of instructions that are executed + * in order. + */ +struct radeon_clause { + /** + * Type of this clause, one of CLAUSE_XXX. + */ + int Type : 2; + + /** + * Pointer to an array of instructions. + * The array is terminated by an OPCODE_END instruction. + */ + struct prog_instruction *Instructions; + + /** + * Number of instructions in this clause. + */ + int NumInstructions; + + /** + * Space reserved for instructions in this clause. + */ + int ReservedInstructions; +}; + +/** + * A compile object, holding the current intermediate state during compilation. + */ +struct radeon_compiler { + struct gl_program *Source; + GLcontext* Ctx; + + /** + * Number of clauses in this program. + */ + int NumClauses; + + /** + * Pointer to an array of NumClauses clauses. + */ + struct radeon_clause *Clauses; + + /** + * Number of registers in the PROGRAM_TEMPORARIES file. + */ + int NumTemporaries; +}; + +void radeonCompilerInit( + struct radeon_compiler *compiler, + GLcontext *ctx, + struct gl_program *source); +void radeonCompilerCleanup(struct radeon_compiler *compiler); +int radeonCompilerAllocateTemporary(struct radeon_compiler *compiler); + +struct radeon_clause *radeonCompilerInsertClause( + struct radeon_compiler *compiler, + int position, + int type); +void radeonCompilerEraseClauses( + struct radeon_compiler *compiler, + int start, + int end); + +#endif -- cgit v1.2.3 From e34dc8227c1fa8bc9ffcd311de701053a633a7ec Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 14 Jun 2008 01:46:19 +0200 Subject: r300_fragprog: Refactor TEX transformation Streamlining source and destination registers, as well as texcoord scaling for RECT textures is now done in a radeon_program based transformation. The idea is that this will allow us to optimize away unnecessary indirections more easily. --- src/mesa/drivers/dri/r300/r300_fragprog.c | 131 +++++++++++++++++++++++++ src/mesa/drivers/dri/r300/r300_fragprog.h | 1 + src/mesa/drivers/dri/r300/r300_fragprog_emit.c | 103 ++----------------- src/mesa/drivers/dri/r300/radeon_program.c | 98 ++++++++++++++++++ src/mesa/drivers/dri/r300/radeon_program.h | 53 ++++++++++ 5 files changed, 293 insertions(+), 93 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 94cb11afec..4c6289298e 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -50,6 +50,130 @@ #include "r300_state.h" +static void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} + +/** + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + * + * \todo If/when r5xx uses the radeon_program architecture, this can probably + * be reused. + */ +static GLboolean transform_TEX( + struct radeon_program_transform_context* context, + struct prog_instruction* orig_inst, void* data) +{ + struct r300_fragment_program_compiler *compiler = + (struct r300_fragment_program_compiler*)data; + struct prog_instruction inst = *orig_inst; + struct prog_instruction* tgt; + GLboolean destredirect = GL_FALSE; + + if (inst.Opcode != OPCODE_TEX && + inst.Opcode != OPCODE_TXB && + inst.Opcode != OPCODE_TXP && + inst.Opcode != OPCODE_KIL) + return GL_FALSE; + + /* Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + */ + if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) { + gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, + 0 + }; + + int tempreg = radeonCompilerAllocateTemporary(context->compiler); + int factor_index; + + tokens[2] = inst.TexSrcUnit; + factor_index = + _mesa_add_state_reference( + compiler->fp->mesa_program.Base.Parameters, tokens); + + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tempreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + tgt->SrcReg[1].File = PROGRAM_STATE_VAR; + tgt->SrcReg[1].Index = factor_index; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = SWIZZLE_0000; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tempreg; + } + + /* Texture operations do not support swizzles etc. in hardware, + * so emit an additional arithmetic operation if necessary. + */ + if (inst.SrcReg[0].Swizzle != SWIZZLE_NOOP || + inst.SrcReg[0].Abs || inst.SrcReg[0].NegateBase || inst.SrcReg[0].NegateAbs) { + int tempreg = radeonCompilerAllocateTemporary(context->compiler); + + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tempreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + tgt->SrcReg[1].File = PROGRAM_BUILTIN; + tgt->SrcReg[1].Swizzle = SWIZZLE_1111; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = SWIZZLE_0000; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tempreg; + } + + if (inst.Opcode != OPCODE_KIL) { + if (inst.DstReg.File != PROGRAM_TEMPORARY || + inst.DstReg.WriteMask != WRITEMASK_XYZW) { + int tempreg = radeonCompilerAllocateTemporary(context->compiler); + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; + destredirect = GL_TRUE; + } + } + + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + _mesa_copy_instructions(tgt, &inst, 1); + + if (destredirect) { + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg = orig_inst->DstReg; + tgt->SrcReg[0].File = PROGRAM_TEMPORARY; + tgt->SrcReg[0].Index = inst.DstReg.Index; + tgt->SrcReg[1].File = PROGRAM_BUILTIN; + tgt->SrcReg[1].Swizzle = SWIZZLE_1111; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = SWIZZLE_0000; + } + + return GL_TRUE; +} + + static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp) { struct gl_fragment_program *mp = &fp->mesa_program; @@ -170,6 +294,13 @@ void r300TranslateFragmentShader(r300ContextPtr r300, insert_WPOS_trailer(&compiler); + struct radeon_program_transformation transformations[1] = { + { &transform_TEX, &compiler } + }; + radeonClauseLocalTransform(&compiler.compiler, + &compiler.compiler.Clauses[0], + 1, transformations); + if (!r300FragmentProgramEmit(&compiler)) fp->error = GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.h b/src/mesa/drivers/dri/r300/r300_fragprog.h index 8c836c4bda..7c1e210b04 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.h +++ b/src/mesa/drivers/dri/r300/r300_fragprog.h @@ -149,6 +149,7 @@ struct r300_fragment_program_compiler { struct radeon_compiler compiler; }; +extern void r300FPTransformTextures(struct r300_fragment_program_compiler *compiler); extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler); diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c index fe8a347a62..aec202a129 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c @@ -527,32 +527,6 @@ static GLuint get_temp_reg(struct r300_pfs_compile_state *cs) return r; } -/** - * Create a new Mesa temporary register that will act as the destination - * register for a texture read. - */ -static GLuint get_temp_reg_tex(struct r300_pfs_compile_state *cs) -{ - COMPILE_STATE; - GLuint r = undef; - GLuint index; - - index = ffs(~cs->temp_in_use); - if (!index) { - ERROR("Out of program temps\n"); - return r; - } - - cs->temp_in_use |= (1 << --index); - cs->temps[index].refcount = 0xFFFFFFFF; - cs->temps[index].reg = get_hw_temp_tex(cs); - - REG_SET_TYPE(r, REG_TYPE_TEMP); - REG_SET_INDEX(r, index); - REG_SET_VALID(r, GL_TRUE); - return r; -} - /** * Free a Mesa temporary and the associated R300 temporary. */ @@ -847,6 +821,15 @@ static GLuint t_src(struct r300_pfs_compile_state *cs, fp->mesa_program.Base.Parameters-> ParameterValues[fpsrc.Index]); break; + case PROGRAM_BUILTIN: + switch(fpsrc.Swizzle) { + case SWIZZLE_1111: r = pfs_one; break; + case SWIZZLE_0000: r = pfs_zero; break; + default: + ERROR("bad PROGRAM_BUILTIN swizzle %u\n", fpsrc.Swizzle); + break; + } + break; default: ERROR("unknown SrcReg->File %x\n", fpsrc.File); return r; @@ -1003,56 +986,10 @@ static void emit_tex(struct r300_pfs_compile_state *cs, { COMPILE_STATE; GLuint coord = t_src(cs, fpi->SrcReg[0]); - GLuint dest = undef, rdest = undef; + GLuint dest = undef; GLuint din, uin; int unit = fpi->TexSrcUnit; int hwsrc, hwdest; - GLuint tempreg = 0; - - /** - * Hardware uses [0..1]x[0..1] range for rectangle textures - * instead of [0..Width]x[0..Height]. - * Add a scaling instruction. - * - * \todo Refactor this once we have proper rewriting/optimization - * support for programs. - */ - if (opcode != R300_TEX_OP_KIL && fpi->TexSrcTarget == TEXTURE_RECT_INDEX) { - gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, - 0 - }; - int factor_index; - GLuint factorreg; - - tokens[2] = unit; - factor_index = - _mesa_add_state_reference(fp->mesa_program.Base. - Parameters, tokens); - factorreg = - emit_const4fv(cs, - fp->mesa_program.Base.Parameters-> - ParameterValues[factor_index]); - tempreg = keep(get_temp_reg(cs)); - - emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, - coord, factorreg, pfs_zero, 0); - - coord = tempreg; - } - - /* Texture operations do not support swizzles etc. in hardware, - * so emit an additional arithmetic operation if necessary. - */ - if (REG_GET_VSWZ(coord) != SWIZZLE_XYZ || - REG_GET_SSWZ(coord) != SWIZZLE_W || - coord & (REG_NEGV_MASK | REG_NEGS_MASK | REG_ABS_MASK)) { - assert(tempreg == 0); - tempreg = keep(get_temp_reg(cs)); - emit_arith(cs, PFS_OP_MAD, tempreg, WRITEMASK_XYZW, - coord, pfs_one, pfs_zero, 0); - coord = tempreg; - } /* Ensure correct node indirection */ uin = cs->used_in_node; @@ -1064,15 +1001,6 @@ static void emit_tex(struct r300_pfs_compile_state *cs, if (opcode != R300_TEX_OP_KIL) { dest = t_dst(cs, fpi->DstReg); - /* r300 doesn't seem to be able to do TEX->output reg */ - if (REG_GET_TYPE(dest) == REG_TYPE_OUTPUT) { - rdest = dest; - dest = get_temp_reg_tex(cs); - } else if (fpi->DstReg.WriteMask != WRITEMASK_XYZW) { - /* in case write mask isn't XYZW */ - rdest = dest; - dest = get_temp_reg_tex(cs); - } hwdest = t_hw_dst(cs, dest, GL_TRUE, code->node[code->cur_node].alu_offset); @@ -1132,17 +1060,6 @@ static void emit_tex(struct r300_pfs_compile_state *cs, cs->used_in_node |= (1 << hwsrc); code->node[code->cur_node].tex_end++; - - /* Copy from temp to output if needed */ - if (REG_GET_VALID(rdest)) { - emit_arith(cs, PFS_OP_MAD, rdest, fpi->DstReg.WriteMask, dest, - pfs_one, pfs_zero, 0); - free_temp(cs, dest); - } - - /* Free temp register */ - if (tempreg != 0) - free_temp(cs, tempreg); } /** diff --git a/src/mesa/drivers/dri/r300/radeon_program.c b/src/mesa/drivers/dri/r300/radeon_program.c index 7b03fa6523..41cedbe61d 100644 --- a/src/mesa/drivers/dri/r300/radeon_program.c +++ b/src/mesa/drivers/dri/r300/radeon_program.c @@ -149,3 +149,101 @@ void radeonCompilerEraseClauses( _mesa_free(oldClauses); } + + +/** + * Insert new instructions at the given position, initialize them as NOPs + * and return a pointer to the first new instruction. + */ +struct prog_instruction* radeonClauseInsertInstructions( + struct radeon_compiler *compiler, + struct radeon_clause *clause, + int position, int count) +{ + int newNumInstructions = clause->NumInstructions + count; + + assert(position >= 0 && position <= clause->NumInstructions); + + if (newNumInstructions <= clause->ReservedInstructions) { + memmove(clause->Instructions + position + count, clause->Instructions + position, + (clause->NumInstructions - position) * sizeof(struct prog_instruction)); + } else { + struct prog_instruction *oldInstructions = clause->Instructions; + + clause->ReservedInstructions *= 2; + if (newNumInstructions > clause->ReservedInstructions) + clause->ReservedInstructions = newNumInstructions; + + clause->Instructions = (struct prog_instruction*) + _mesa_malloc(clause->ReservedInstructions * sizeof(struct prog_instruction)); + + if (oldInstructions) { + _mesa_memcpy(clause->Instructions, oldInstructions, + position * sizeof(struct prog_instruction)); + _mesa_memcpy(clause->Instructions + position + count, oldInstructions + position, + (clause->NumInstructions - position) * sizeof(struct prog_instruction)); + + _mesa_free(oldInstructions); + } + } + + clause->NumInstructions = newNumInstructions; + _mesa_init_instructions(clause->Instructions + position, count); + return clause->Instructions + position; +} + + +/** + * Transform the given clause in the following way: + * 1. Replace it with an empty clause + * 2. For every instruction in the original clause, try the given + * transformations in order. + * 3. If one of the transformations returns GL_TRUE, assume that it + * has emitted the appropriate instruction(s) into the new clause; + * otherwise, copy the instruction verbatim. + * + * \note The transformation is currently not recursive; in other words, + * instructions emitted by transformations are not transformed. + * + * \note The transform is called 'local' because it can only look at + * one instruction at a time. + */ +void radeonClauseLocalTransform( + struct radeon_compiler *compiler, + struct radeon_clause *clause, + int num_transformations, + struct radeon_program_transformation* transformations) +{ + struct radeon_program_transform_context context; + struct radeon_clause source; + int ip; + + source = *clause; + clause->Instructions = 0; + clause->NumInstructions = 0; + clause->ReservedInstructions = 0; + + context.compiler = compiler; + context.dest = clause; + context.src = &source; + + for(ip = 0; ip < source.NumInstructions; ++ip) { + struct prog_instruction *instr = source.Instructions + ip; + int i; + + for(i = 0; i < num_transformations; ++i) { + struct radeon_program_transformation* t = transformations + i; + + if (t->function(&context, instr, t->userData)) + break; + } + + if (i >= num_transformations) { + struct prog_instruction *tgt = + radeonClauseInsertInstructions(compiler, clause, clause->NumInstructions, 1); + _mesa_copy_instructions(tgt, instr, 1); + } + } + + _mesa_free_instructions(source.Instructions, source.NumInstructions); +} diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/radeon_program.h index 18091ac02a..3cde4d4f6f 100644 --- a/src/mesa/drivers/dri/r300/radeon_program.h +++ b/src/mesa/drivers/dri/r300/radeon_program.h @@ -41,6 +41,13 @@ enum { CLAUSE_TEX }; +enum { + PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */ +}; + +#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO) +#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE) + /** * A clause is simply a sequence of instructions that are executed * in order. @@ -107,4 +114,50 @@ void radeonCompilerEraseClauses( int start, int end); +struct prog_instruction* radeonClauseInsertInstructions( + struct radeon_compiler *compiler, + struct radeon_clause *clause, + int position, int count); + +/** + * + */ +struct radeon_program_transform_context { + struct radeon_compiler *compiler; + + /** + * Destination clause where new instructions must be written. + */ + struct radeon_clause *dest; + + /** + * Original clause that is currently being transformed. + */ + struct radeon_clause *src; +}; + +/** + * A transformation that can be passed to \ref radeonClauseLinearTransform. + * + * The function will be called once for each instruction. + * It has to either emit the appropriate transformed code for the instruction + * and return GL_TRUE, or return GL_FALSE if it doesn't understand the + * instruction. + * + * The function gets passed the userData as last parameter. + */ +struct radeon_program_transformation { + GLboolean (*function)( + struct radeon_program_transform_context*, + struct prog_instruction*, + void*); + void *userData; +}; + +void radeonClauseLocalTransform( + struct radeon_compiler *compiler, + struct radeon_clause *clause, + int num_transformations, + struct radeon_program_transformation* transformations); + #endif -- cgit v1.2.3 From 2882e5162525138316db9a1ab539a17498d06da1 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 14 Jun 2008 03:34:09 +0200 Subject: r300: Add radeonCompilerDump for debugging --- src/mesa/drivers/dri/r300/r300_fragprog.c | 5 +++++ src/mesa/drivers/dri/r300/radeon_program.c | 32 ++++++++++++++++++++++++++++++ src/mesa/drivers/dri/r300/radeon_program.h | 1 + 3 files changed, 38 insertions(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 4c6289298e..814ccd3eac 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -301,6 +301,11 @@ void r300TranslateFragmentShader(r300ContextPtr r300, &compiler.compiler.Clauses[0], 1, transformations); + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler state after transformations:\n"); + radeonCompilerDump(&compiler.compiler); + } + if (!r300FragmentProgramEmit(&compiler)) fp->error = GL_TRUE; diff --git a/src/mesa/drivers/dri/r300/radeon_program.c b/src/mesa/drivers/dri/r300/radeon_program.c index 41cedbe61d..c8f40e8189 100644 --- a/src/mesa/drivers/dri/r300/radeon_program.c +++ b/src/mesa/drivers/dri/r300/radeon_program.c @@ -27,6 +27,7 @@ #include "radeon_program.h" +#include "shader/prog_print.h" /** * Initialize a compiler structure with a single mixed clause @@ -79,6 +80,37 @@ int radeonCompilerAllocateTemporary(struct radeon_compiler *compiler) } +static const char* clausename(int type) +{ + switch(type) { + case CLAUSE_MIXED: return "CLAUSE_MIXED"; + case CLAUSE_ALU: return "CLAUSE_ALU"; + case CLAUSE_TEX: return "CLAUSE_TEX"; + default: return "CLAUSE_UNKNOWN"; + } +} + + +/** + * Dump the current compiler state to the console for debugging. + */ +void radeonCompilerDump(struct radeon_compiler *compiler) +{ + int i; + for(i = 0; i < compiler->NumClauses; ++i) { + struct radeon_clause *clause = &compiler->Clauses[i]; + int j; + + _mesa_printf("%2i: %s\n", i+1, clausename(clause->Type)); + + for(j = 0; j < clause->NumInstructions; ++j) { + _mesa_printf("%4i: ", j+1); + _mesa_print_instruction(&clause->Instructions[j]); + } + } +} + + /** * \p position index of the new clause; later clauses are moved * \p type of the new clause; one of CLAUSE_XXX diff --git a/src/mesa/drivers/dri/r300/radeon_program.h b/src/mesa/drivers/dri/r300/radeon_program.h index 3cde4d4f6f..25e70505b1 100644 --- a/src/mesa/drivers/dri/r300/radeon_program.h +++ b/src/mesa/drivers/dri/r300/radeon_program.h @@ -104,6 +104,7 @@ void radeonCompilerInit( struct gl_program *source); void radeonCompilerCleanup(struct radeon_compiler *compiler); int radeonCompilerAllocateTemporary(struct radeon_compiler *compiler); +void radeonCompilerDump(struct radeon_compiler *compiler); struct radeon_clause *radeonCompilerInsertClause( struct radeon_compiler *compiler, -- cgit v1.2.3 From 83ad2a756ea8dd1b0ca9746e355ce3de0f29356e Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 14 Jun 2008 02:28:58 +0200 Subject: texenvprogram: Mark textures using ARB_shadow as ShadowSamplers Since ARB_fragment_program and friends are defined to ignore the setting of the GL_TEXTURE_COMPARE_FUNC parameter, we have to explicitly enable the shadow comparison by marking the texture unit in ShadowSamplers when appropriate. --- src/mesa/main/texenvprogram.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c index af19a38c42..512d52704d 100644 --- a/src/mesa/main/texenvprogram.c +++ b/src/mesa/main/texenvprogram.c @@ -68,6 +68,7 @@ struct state_key { struct { GLuint enabled:1; GLuint source_index:3; /* one of TEXTURE_1D/2D/3D/CUBE/RECT_INDEX */ + GLuint shadow:1; GLuint ScaleShiftRGB:2; GLuint ScaleShiftA:2; @@ -219,6 +220,7 @@ static void make_state_key( GLcontext *ctx, struct state_key *key ) key->unit[i].source_index = translate_tex_src_bit(texUnit->_ReallyEnabled); + key->unit[i].shadow = texUnit->_Current->CompareMode == GL_COMPARE_R_TO_TEXTURE; key->unit[i].NumArgsRGB = texUnit->_CurrentCombine->_NumArgsRGB; key->unit[i].NumArgsA = texUnit->_CurrentCombine->_NumArgsA; @@ -945,11 +947,13 @@ static void load_texture( struct texenv_fragment_program *p, GLuint unit ) /* TODO: Use D0_MASK_XY where possible. */ - if (p->state->unit[unit].enabled) + if (p->state->unit[unit].enabled) { p->src_texture[unit] = emit_texld( p, OPCODE_TXP, tmp, WRITEMASK_XYZW, unit, dim, texcoord ); - else + if (p->state->unit[unit].shadow) + p->program->Base.ShadowSamplers |= 1 << unit; + } else p->src_texture[unit] = get_zero(p); } } -- cgit v1.2.3 From a1c0c56d70308a562c90cc01982c89ed1396c830 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 14 Jun 2008 04:07:51 +0200 Subject: r300: Implement GL_ARB_shadow and GL_EXT_shadow_funcs --- src/mesa/drivers/dri/r300/r300_context.c | 2 + src/mesa/drivers/dri/r300/r300_context.h | 29 +++++++ src/mesa/drivers/dri/r300/r300_fragprog.c | 122 +++++++++++++++++++++++++++++- 3 files changed, 152 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index 31cc00a081..063d4e575e 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -98,6 +98,7 @@ const struct dri_extension card_extensions[] = { {"GL_ARB_fragment_program", NULL}, {"GL_ARB_multisample", GL_ARB_multisample_functions}, {"GL_ARB_multitexture", NULL}, + {"GL_ARB_shadow", NULL}, {"GL_ARB_texture_border_clamp", NULL}, {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, {"GL_ARB_texture_cube_map", NULL}, @@ -116,6 +117,7 @@ const struct dri_extension card_extensions[] = { {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions}, {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions}, {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, + {"GL_EXT_shadow_funcs", NULL}, {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions}, {"GL_EXT_stencil_wrap", NULL}, {"GL_EXT_texture_edge_clamp", NULL}, diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index a9b3b061f4..ca22c5dd8a 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -655,6 +655,34 @@ struct r300_vertex_program_cont { struct r300_pfs_compile_state; + +/** + * Stores state that influences the compilation of a fragment program. + */ +struct r300_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + GLuint depth_texture_mode : 2; + + /** + * If the sampler is used as a shadow sampler, + * this field is (texture_compare_func - GL_NEVER). + * [e.g. if compare function is GL_LEQUAL, this field is 3] + * + * Otherwise, this field is 0. + */ + GLuint texture_compare_func : 3; + } unit[16]; +}; + + /** * Stores an R300 fragment program in its compiled-to-hardware form. */ @@ -711,6 +739,7 @@ struct r300_fragment_program { GLboolean translated; GLboolean error; + struct r300_fragment_program_external_state state; struct r300_fragment_program_code code; GLboolean WritesDepth; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index 814ccd3eac..da2dedece8 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -81,6 +81,33 @@ static GLboolean transform_TEX( inst.Opcode != OPCODE_KIL) return GL_FALSE; + if (inst.Opcode != OPCODE_KIL && + compiler->fp->mesa_program.Base.ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg = inst.DstReg; + tgt->SrcReg[0].File = PROGRAM_BUILTIN; + tgt->SrcReg[0].Swizzle = SWIZZLE_0000; + tgt->SrcReg[1].File = PROGRAM_BUILTIN; + tgt->SrcReg[1].Swizzle = SWIZZLE_0000; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = comparefunc == GL_ALWAYS ? SWIZZLE_1111 : SWIZZLE_0000; + return GL_TRUE; + } + + int tempreg = radeonCompilerAllocateTemporary(context->compiler); + + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; + } + + /* Hardware uses [0..1]x[0..1] range for rectangle textures * instead of [0..Width]x[0..Height]. * Add a scaling instruction. @@ -156,7 +183,51 @@ static GLboolean transform_TEX( context->dest->NumInstructions, 1); _mesa_copy_instructions(tgt, &inst, 1); - if (destredirect) { + if (inst.Opcode != OPCODE_KIL && + compiler->fp->mesa_program.Base.ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; + + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 2); + + tgt[0].Opcode = OPCODE_MAD; + tgt[0].DstReg = inst.DstReg; + tgt[0].DstReg.WriteMask = orig_inst->DstReg.WriteMask; + tgt[0].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[0].SrcReg[0].Index = inst.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + tgt[0].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; + tgt[0].SrcReg[1].File = PROGRAM_BUILTIN; + tgt[0].SrcReg[1].Swizzle = SWIZZLE_1111; + tgt[0].SrcReg[2] = inst.SrcReg[0]; + tgt[0].SrcReg[2].Swizzle = SWIZZLE_ZZZZ; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + tgt[0].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + else + tgt[0].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + + tgt[1].Opcode = OPCODE_CMP; + tgt[1].DstReg = orig_inst->DstReg; + tgt[1].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[0].Index = tgt[0].DstReg.Index; + tgt[1].SrcReg[1].File = PROGRAM_BUILTIN; + tgt[1].SrcReg[2].File = PROGRAM_BUILTIN; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + tgt[1].SrcReg[1].Swizzle = SWIZZLE_1111; + tgt[1].SrcReg[2].Swizzle = SWIZZLE_0000; + } else { + tgt[1].SrcReg[1].Swizzle = SWIZZLE_0000; + tgt[1].SrcReg[2].Swizzle = SWIZZLE_1111; + } + } else if (destredirect) { tgt = radeonClauseInsertInstructions(context->compiler, context->dest, context->dest->NumInstructions, 1); @@ -280,9 +351,58 @@ static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler) } +static GLuint build_dtm(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return 0; + case GL_INTENSITY: return 1; + case GL_ALPHA: return 2; + } +} + +static GLuint build_func(GLuint comparefunc) +{ + return comparefunc - GL_NEVER; +} + + +/** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct r300_fragment_program *fp, + struct r300_fragment_program_external_state *state) +{ + int unit; + + _mesa_bzero(state, sizeof(*state)); + + for(unit = 0; unit < 16; ++unit) { + if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; + + state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); + } + } +} + + void r300TranslateFragmentShader(r300ContextPtr r300, struct r300_fragment_program *fp) { + struct r300_fragment_program_external_state state; + + build_state(r300, fp, &state); + if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { + /* TODO: cache compiled programs */ + fp->translated = GL_FALSE; + _mesa_memcpy(&fp->state, &state, sizeof(state)); + } + if (!fp->translated) { struct r300_fragment_program_compiler compiler; -- cgit v1.2.3 From 9704414d1376d449ad6a006a16be8139f82b5d81 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 14 Jun 2008 04:37:09 -0700 Subject: r3xx/r5xx: Don't force aniso. *Pulls paper bag down over head* --- src/mesa/drivers/dri/r300/r300_tex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 2c3c28e638..16321af7d4 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -193,7 +193,7 @@ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat * When anisotropic filtering is enabled, we override min and mag * filter settings completely. This includes driconf's settings. */ - if (anisotropy >= 2.0) { + if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) { t->filter |= R300_TX_MAG_FILTER_ANISO | R300_TX_MIN_FILTER_ANISO | R300_TX_MIN_FILTER_MIP_LINEAR -- cgit v1.2.3 From 0a341ef29657c1ead116c4acaca138551631de16 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 14 Jun 2008 07:03:08 -0700 Subject: r5xx: FP refactor, take one. Yes, I know it's massive. Imagine how I felt, auditing 3000 lines of code. --- src/mesa/drivers/dri/r300/Makefile | 2 +- src/mesa/drivers/dri/r300/r300_context.h | 51 +- src/mesa/drivers/dri/r300/r300_state.c | 49 +- src/mesa/drivers/dri/r300/r500_fragprog.c | 1801 ++++++----------------------- src/mesa/drivers/dri/r300/r500_fragprog.h | 13 + 5 files changed, 426 insertions(+), 1490 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 7cd5647064..7b8f5f1384 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -28,7 +28,6 @@ DRIVER_SOURCES = \ radeon_span.c \ radeon_state.c \ r300_mem.c \ - \ r300_context.c \ r300_ioctl.c \ r300_cmdbuf.c \ @@ -42,6 +41,7 @@ DRIVER_SOURCES = \ r300_fragprog.c \ r300_fragprog_emit.c \ r500_fragprog.c \ + r500_fragprog_emit.c \ r300_shader.c \ r300_emit.c \ r300_swtcl.c \ diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index ca22c5dd8a..1a90f5cabb 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -748,14 +748,30 @@ struct r300_fragment_program { struct r500_pfs_compile_state; -struct r500_fragment_program { - struct gl_fragment_program mesa_program; +struct r500_fragment_program_external_state { + struct { + /** + * If the sampler is used as a shadow sampler, + * this field is: + * 0 - GL_LUMINANCE + * 1 - GL_INTENSITY + * 2 - GL_ALPHA + * depending on the depth texture mode. + */ + GLuint depth_texture_mode : 2; - GLcontext *ctx; - GLboolean translated; - GLboolean error; - struct r500_pfs_compile_state *cs; + /** + * If the sampler is used as a shadow sampler, + * this field is (texture_compare_func - GL_NEVER). + * [e.g. if compare function is GL_LEQUAL, this field is 3] + * + * Otherwise, this field is 0. + */ + GLuint texture_compare_func : 3; + } unit[16]; +}; +struct r500_fragment_program_code { struct { GLuint inst0; GLuint inst1; @@ -772,17 +788,28 @@ struct r500_fragment_program { int inst_end; /* Hardware constants. - * Contains a pointer to the value. The destination of the pointer - * is supposed to be updated when GL state changes. - * Typically, this is either a pointer into - * gl_program_parameter_list::ParameterValues, or a pointer to a - * global constant (e.g. for sin/cos-approximation) - */ + * Contains a pointer to the value. The destination of the pointer + * is supposed to be updated when GL state changes. + * Typically, this is either a pointer into + * gl_program_parameter_list::ParameterValues, or a pointer to a + * global constant (e.g. for sin/cos-approximation) + */ const GLfloat *constant[PFS_NUM_CONST_REGS]; int const_nr; int max_temp_idx; +}; +struct r500_fragment_program { + struct gl_fragment_program mesa_program; + + GLcontext *ctx; + GLboolean translated; + GLboolean error; + + struct r500_fragment_program_external_state state; + struct r500_fragment_program_code code; + GLboolean writes_depth; GLuint optimization; diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index e82c3d9681..a86e4bc344 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1351,14 +1351,15 @@ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) int i; struct r500_fragment_program *fp = (struct r500_fragment_program *) (char *)ctx->FragmentProgram._Current; + struct r500_fragment_program_code *code = &fp->code; /* find all the texture instructions and relocate the texture units */ - for (i = 0; i < fp->inst_end + 1; i++) { - if ((fp->inst[i].inst0 & 0x3) == R500_INST_TYPE_TEX) { + for (i = 0; i < code->inst_end + 1; i++) { + if ((code->inst[i].inst0 & 0x3) == R500_INST_TYPE_TEX) { uint32_t val; int unit, opcode, new_unit; - val = fp->inst[i].inst1; + val = code->inst[i].inst1; unit = (val >> 16) & 0xf; @@ -1375,7 +1376,7 @@ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) } } val |= R500_TEX_ID(new_unit); - fp->inst[i].inst1 = val; + code->inst[i].inst1 = val; } } } @@ -2499,6 +2500,7 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) struct r500_fragment_program *fp = (struct r500_fragment_program *) (char *)ctx->FragmentProgram._Current; int i; + struct r500_fragment_program_code *code; if (!fp) /* should only happenen once, just after context is created */ return; @@ -2512,42 +2514,43 @@ static void r500SetupPixelShader(r300ContextPtr rmesa) __FUNCTION__); return; } + code = &fp->code; r300SetupTextures(ctx); R300_STATECHANGE(rmesa, fp); - rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = fp->max_temp_idx; + rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx; rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] = - R500_US_CODE_START_ADDR(fp->inst_offset) | - R500_US_CODE_END_ADDR(fp->inst_end); + R500_US_CODE_START_ADDR(code->inst_offset) | + R500_US_CODE_END_ADDR(code->inst_end); rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] = - R500_US_CODE_RANGE_ADDR(fp->inst_offset) | - R500_US_CODE_RANGE_SIZE(fp->inst_end); + R500_US_CODE_RANGE_ADDR(code->inst_offset) | + R500_US_CODE_RANGE_SIZE(code->inst_end); rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] = R500_US_CODE_OFFSET_ADDR(0); /* FIXME when we add flow control */ R300_STATECHANGE(rmesa, r500fp); /* Emit our shader... */ - for (i = 0; i < fp->inst_end+1; i++) { - rmesa->hw.r500fp.cmd[i*6+1] = fp->inst[i].inst0; - rmesa->hw.r500fp.cmd[i*6+2] = fp->inst[i].inst1; - rmesa->hw.r500fp.cmd[i*6+3] = fp->inst[i].inst2; - rmesa->hw.r500fp.cmd[i*6+4] = fp->inst[i].inst3; - rmesa->hw.r500fp.cmd[i*6+5] = fp->inst[i].inst4; - rmesa->hw.r500fp.cmd[i*6+6] = fp->inst[i].inst5; + for (i = 0; i < code->inst_end+1; i++) { + rmesa->hw.r500fp.cmd[i*6+1] = code->inst[i].inst0; + rmesa->hw.r500fp.cmd[i*6+2] = code->inst[i].inst1; + rmesa->hw.r500fp.cmd[i*6+3] = code->inst[i].inst2; + rmesa->hw.r500fp.cmd[i*6+4] = code->inst[i].inst3; + rmesa->hw.r500fp.cmd[i*6+5] = code->inst[i].inst4; + rmesa->hw.r500fp.cmd[i*6+6] = code->inst[i].inst5; } - bump_r500fp_count(rmesa->hw.r500fp.cmd, (fp->inst_end + 1) * 6); + bump_r500fp_count(rmesa->hw.r500fp.cmd, (code->inst_end + 1) * 6); R300_STATECHANGE(rmesa, r500fp_const); - for (i = 0; i < fp->const_nr; i++) { - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(fp->constant[i][0]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(fp->constant[i][1]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(fp->constant[i][2]); - rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(fp->constant[i][3]); + for (i = 0; i < code->const_nr; i++) { + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(code->constant[i][0]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(code->constant[i][1]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(code->constant[i][2]); + rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(code->constant[i][3]); } - bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->const_nr * 4); + bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, code->const_nr * 4); } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 70e45f3ea8..197036008a 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -1,8 +1,5 @@ /* - * Copyright (C) 2005 Ben Skeggs. - * * Copyright 2008 Corbin Simpson - * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. * * All Rights Reserved. * @@ -28,1491 +25,391 @@ * */ -/** - * \file - * - * \author Ben Skeggs - * - * \author Jerome Glisse - * - * \author Corbin Simpson - * - * \todo Depth write, WPOS/FOGC inputs - * - * \todo FogOption - * - * \todo Verify results of opcodes for accuracy, I've only checked them in - * specific cases. - */ - -#include "glheader.h" -#include "macros.h" -#include "enums.h" -#include "shader/prog_instruction.h" -#include "shader/prog_parameter.h" -#include "shader/prog_print.h" - -#include "r300_context.h" #include "r500_fragprog.h" -#include "r300_reg.h" -#include "r300_state.h" - -/* Mapping Mesa registers to R500 temporaries */ -struct reg_acc { - int reg; /* Assigned hw temp */ - unsigned int refcount; /* Number of uses by mesa program */ -}; -/** - * Describe the current lifetime information for an R300 temporary - */ -struct reg_lifetime { - /* Index of the first slot where this register is free in the sense - that it can be used as a new destination register. - This is -1 if the register has been assigned to a Mesa register - and the last access to the register has not yet been emitted */ - int free; - - /* Index of the first slot where this register is currently reserved. - This is used to stop e.g. a scalar operation from being moved - before the allocation time of a register that was first allocated - for a vector operation. */ - int reserved; - - /* Index of the first slot in which the register can be used as a - source without losing the value that is written by the last - emitted instruction that writes to the register */ - int vector_valid; - int scalar_valid; - - /* Index to the slot where the register was last read. - This is also the first slot in which the register may be written again */ - int vector_lastread; - int scalar_lastread; -}; - -/** - * Store usage information about an ALU instruction slot during the - * compilation of a fragment program. - */ -#define SLOT_SRC_VECTOR (1<<0) -#define SLOT_SRC_SCALAR (1<<3) -#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) -#define SLOT_OP_VECTOR (1<<16) -#define SLOT_OP_SCALAR (1<<17) -#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) - -struct r500_pfs_compile_slot { - /* Bitmask indicating which parts of the slot are used, using SLOT_ constants - defined above */ - unsigned int used; - - /* Selected sources */ - int vsrc[3]; - int ssrc[3]; -}; +static void reset_srcreg(struct prog_src_register* reg) +{ + _mesa_bzero(reg, sizeof(*reg)); + reg->Swizzle = SWIZZLE_NOOP; +} /** - * Store information during compilation of fragment programs. + * Transform TEX, TXP, TXB, and KIL instructions in the following way: + * - premultiply texture coordinates for RECT + * - extract operand swizzles + * - introduce a temporary register when write masks are needed + * + * \todo If/when r5xx uses the radeon_program architecture, this can probably + * be reused. */ -struct r500_pfs_compile_state { - int nrslots; /* number of ALU slots used so far */ - - /* Track which (parts of) slots are already filled with instructions */ - struct r500_pfs_compile_slot slot[PFS_MAX_ALU_INST]; - - /* Track the validity of R300 temporaries */ - struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; - - /* Used to map Mesa's inputs/temps onto hardware temps */ - int temp_in_use; - struct reg_acc temps[PFS_NUM_TEMP_REGS]; - struct reg_acc inputs[32]; /* don't actually need 32... */ - - /* Track usage of hardware temps, for register allocation, - * indirection detection, etc. */ - GLuint used_in_node; - GLuint dest_in_node; -}; +static GLboolean transform_TEX( + struct radeon_program_transform_context* context, + struct prog_instruction* orig_inst, void* data) +{ + struct r500_fragment_program_compiler *compiler = + (struct r500_fragment_program_compiler*)data; + struct prog_instruction inst = *orig_inst; + struct prog_instruction* tgt; + GLboolean destredirect = GL_FALSE; + + if (inst.Opcode != OPCODE_TEX && + inst.Opcode != OPCODE_TXB && + inst.Opcode != OPCODE_TXP && + inst.Opcode != OPCODE_KIL) + return GL_FALSE; + if (inst.Opcode != OPCODE_KIL && + compiler->fp->mesa_program.Base.ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + + if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) { + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg = inst.DstReg; + tgt->SrcReg[0].File = PROGRAM_BUILTIN; + tgt->SrcReg[0].Swizzle = SWIZZLE_0000; + tgt->SrcReg[1].File = PROGRAM_BUILTIN; + tgt->SrcReg[1].Swizzle = SWIZZLE_0000; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = comparefunc == GL_ALWAYS ? SWIZZLE_1111 : SWIZZLE_0000; + return GL_TRUE; + } -/* - * Useful macros and values - */ -#define ERROR(fmt, args...) do { \ - fprintf(stderr, "%s::%s(): " fmt "\n", \ - __FILE__, __FUNCTION__, ##args); \ - fp->error = GL_TRUE; \ - } while(0) - -#define COMPILE_STATE struct r500_pfs_compile_state *cs = fp->cs - -#define R500_US_NUM_TEMP_REGS 128 -#define R500_US_NUM_CONST_REGS 256 - -/* "Register" flags */ -#define REG_CONSTANT (1 << 8) -#define REG_SRC_REL (1 << 9) -#define REG_DEST_REL (1 << 7) - -/* Swizzle tools */ -#define R500_SWIZZLE_ZERO 4 -#define R500_SWIZZLE_HALF 5 -#define R500_SWIZZLE_ONE 6 -#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) -#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) -#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) -#define R500_SWIZ_MOD_NEG 1 -#define R500_SWIZ_MOD_ABS 2 -#define R500_SWIZ_MOD_NEG_ABS 3 -/* Swizzles for inst2 */ -#define MAKE_SWIZ_TEX_STRQ(x) (x << 8) -#define MAKE_SWIZ_TEX_RGBA(x) (x << 24) -/* Swizzles for inst3 */ -#define MAKE_SWIZ_RGB_A(x) (x << 2) -#define MAKE_SWIZ_RGB_B(x) (x << 15) -/* Swizzles for inst4 */ -#define MAKE_SWIZ_ALPHA_A(x) (x << 14) -#define MAKE_SWIZ_ALPHA_B(x) (x << 21) -/* Swizzle for inst5 */ -#define MAKE_SWIZ_RGBA_C(x) (x << 14) -#define MAKE_SWIZ_ALPHA_C(x) (x << 27) - -/* Writemasks */ -#define R500_WRITEMASK_G 0x2 -#define R500_WRITEMASK_B 0x4 -#define R500_WRITEMASK_RGB 0x7 -#define R500_WRITEMASK_A 0x8 -#define R500_WRITEMASK_AR 0x9 -#define R500_WRITEMASK_AG 0xA -#define R500_WRITEMASK_ARG 0xB -#define R500_WRITEMASK_AB 0xC -#define R500_WRITEMASK_ARGB 0xF - -/* 1/(2pi), needed for quick modulus in trig insts - * Thanks to glisse for pointing out how to do it! */ -static const GLfloat RCP_2PI[] = {0.15915494309189535, - 0.15915494309189535, - 0.15915494309189535, - 0.15915494309189535}; - -static const GLfloat LIT[] = {127.999999, - 127.999999, - 127.999999, - -127.999999}; - -static void dump_program(struct r500_fragment_program *fp); - -static inline GLuint make_rgb_swizzle(struct prog_src_register src) { - GLuint swiz = 0x0; - GLuint temp; - /* This could be optimized, but it should be plenty fast already. */ - int i; - for (i = 0; i < 3; i++) { - temp = GET_SWZ(src.Swizzle, i); - /* Fix SWIZZLE_ONE */ - if (temp == 5) temp++; - swiz |= temp << i*3; - } - if (src.NegateBase) - swiz |= (R500_SWIZ_MOD_NEG << 9); - return swiz; -} + int tempreg = radeonCompilerAllocateTemporary(context->compiler); -static inline GLuint make_rgba_swizzle(GLuint src) { - GLuint swiz = 0x0; - GLuint temp; - int i; - for (i = 0; i < 4; i++) { - temp = GET_SWZ(src, i); - /* Fix SWIZZLE_ONE */ - if (temp == 5) temp++; - swiz |= temp << i*3; + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; } - return swiz; -} -static inline GLuint make_alpha_swizzle(struct prog_src_register src) { - GLuint swiz = GET_SWZ(src.Swizzle, 3); - - if (swiz == 5) swiz++; - - if (src.NegateBase) - swiz |= (R500_SWIZ_MOD_NEG << 3); - - return swiz; -} -static inline GLuint make_sop_swizzle(struct prog_src_register src) { - GLuint swiz = GET_SWZ(src.Swizzle, 0); - - if (swiz == 5) swiz++; - return swiz; -} - -static inline GLuint make_strq_swizzle(struct prog_src_register src) { - GLuint swiz = 0x0, temp = 0x0; - int i; - for (i = 0; i < 4; i++) { - temp = GET_SWZ(src.Swizzle, i) & 0x3; - swiz |= temp << i*2; + /* Hardware uses [0..1]x[0..1] range for rectangle textures + * instead of [0..Width]x[0..Height]. + * Add a scaling instruction. + */ + if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) { + gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, + 0 + }; + + int tempreg = radeonCompilerAllocateTemporary(context->compiler); + int factor_index; + + tokens[2] = inst.TexSrcUnit; + factor_index = + _mesa_add_state_reference( + compiler->fp->mesa_program.Base.Parameters, tokens); + + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tempreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + tgt->SrcReg[1].File = PROGRAM_STATE_VAR; + tgt->SrcReg[1].Index = factor_index; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = SWIZZLE_0000; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tempreg; } - return swiz; -} -static int get_temp(struct r500_fragment_program *fp, int slot) { - - COMPILE_STATE; - - int r = fp->temp_reg_offset + cs->temp_in_use + slot; - - if (r > R500_US_NUM_TEMP_REGS) { - ERROR("Too many temporary registers requested, can't compile!\n"); + /* Texture operations do not support swizzles etc. in hardware, + * so emit an additional arithmetic operation if necessary. + */ + if (inst.SrcReg[0].Swizzle != SWIZZLE_NOOP || + inst.SrcReg[0].Abs || inst.SrcReg[0].NegateBase || inst.SrcReg[0].NegateAbs) { + int tempreg = radeonCompilerAllocateTemporary(context->compiler); + + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg.File = PROGRAM_TEMPORARY; + tgt->DstReg.Index = tempreg; + tgt->SrcReg[0] = inst.SrcReg[0]; + tgt->SrcReg[1].File = PROGRAM_BUILTIN; + tgt->SrcReg[1].Swizzle = SWIZZLE_1111; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = SWIZZLE_0000; + + reset_srcreg(&inst.SrcReg[0]); + inst.SrcReg[0].File = PROGRAM_TEMPORARY; + inst.SrcReg[0].Index = tempreg; } - return r; -} - -/* Borrowed verbatim from r300_fragprog since it hasn't changed. */ -static GLuint emit_const4fv(struct r500_fragment_program *fp, - const GLfloat * cp) -{ - GLuint reg = 0x0; - int index; + if (inst.Opcode != OPCODE_KIL) { + if (inst.DstReg.File != PROGRAM_TEMPORARY || + inst.DstReg.WriteMask != WRITEMASK_XYZW) { + int tempreg = radeonCompilerAllocateTemporary(context->compiler); - for (index = 0; index < fp->const_nr; ++index) { - if (fp->constant[index] == cp) - break; + inst.DstReg.File = PROGRAM_TEMPORARY; + inst.DstReg.Index = tempreg; + inst.DstReg.WriteMask = WRITEMASK_XYZW; + destredirect = GL_TRUE; + } } - if (index >= fp->const_nr) { - if (index >= R500_US_NUM_CONST_REGS) { - ERROR("Out of hw constants!\n"); - return reg; + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + _mesa_copy_instructions(tgt, &inst, 1); + + if (inst.Opcode != OPCODE_KIL && + compiler->fp->mesa_program.Base.ShadowSamplers & (1 << inst.TexSrcUnit)) { + GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; + GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode; + + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 2); + + tgt[0].Opcode = OPCODE_MAD; + tgt[0].DstReg = inst.DstReg; + tgt[0].DstReg.WriteMask = orig_inst->DstReg.WriteMask; + tgt[0].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[0].SrcReg[0].Index = inst.DstReg.Index; + if (depthmode == 0) /* GL_LUMINANCE */ + tgt[0].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); + else if (depthmode == 2) /* GL_ALPHA */ + tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; + tgt[0].SrcReg[1].File = PROGRAM_BUILTIN; + tgt[0].SrcReg[1].Swizzle = SWIZZLE_1111; + tgt[0].SrcReg[2] = inst.SrcReg[0]; + tgt[0].SrcReg[2].Swizzle = SWIZZLE_ZZZZ; + + /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: + * r < tex <=> -tex+r < 0 + * r >= tex <=> not (-tex+r < 0 */ + if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) + tgt[0].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; + else + tgt[0].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + + tgt[1].Opcode = OPCODE_CMP; + tgt[1].DstReg = orig_inst->DstReg; + tgt[1].SrcReg[0].File = PROGRAM_TEMPORARY; + tgt[1].SrcReg[0].Index = tgt[0].DstReg.Index; + tgt[1].SrcReg[1].File = PROGRAM_BUILTIN; + tgt[1].SrcReg[2].File = PROGRAM_BUILTIN; + + if (comparefunc == GL_LESS || comparefunc == GL_GREATER) { + tgt[1].SrcReg[1].Swizzle = SWIZZLE_1111; + tgt[1].SrcReg[2].Swizzle = SWIZZLE_0000; + } else { + tgt[1].SrcReg[1].Swizzle = SWIZZLE_0000; + tgt[1].SrcReg[2].Swizzle = SWIZZLE_1111; } - - fp->const_nr++; - fp->constant[index] = cp; + } else if (destredirect) { + tgt = radeonClauseInsertInstructions(context->compiler, context->dest, + context->dest->NumInstructions, 1); + + tgt->Opcode = OPCODE_MAD; + tgt->DstReg = orig_inst->DstReg; + tgt->SrcReg[0].File = PROGRAM_TEMPORARY; + tgt->SrcReg[0].Index = inst.DstReg.Index; + tgt->SrcReg[1].File = PROGRAM_BUILTIN; + tgt->SrcReg[1].Swizzle = SWIZZLE_1111; + tgt->SrcReg[2].File = PROGRAM_BUILTIN; + tgt->SrcReg[2].Swizzle = SWIZZLE_0000; } - reg = index | REG_CONSTANT; - return reg; -} - -static GLuint make_src(struct r500_fragment_program *fp, struct prog_src_register src) { - COMPILE_STATE; - GLuint reg; - switch (src.File) { - case PROGRAM_TEMPORARY: - reg = src.Index + fp->temp_reg_offset; - break; - case PROGRAM_INPUT: - reg = cs->inputs[src.Index].reg; - break; - case PROGRAM_LOCAL_PARAM: - reg = emit_const4fv(fp, - fp->mesa_program.Base.LocalParams[src. - Index]); - break; - case PROGRAM_ENV_PARAM: - reg = emit_const4fv(fp, - fp->ctx->FragmentProgram.Parameters[src. - Index]); - break; - case PROGRAM_STATE_VAR: - case PROGRAM_NAMED_PARAM: - case PROGRAM_CONSTANT: - reg = emit_const4fv(fp, fp->mesa_program.Base.Parameters-> - ParameterValues[src.Index]); - break; - default: - ERROR("Can't handle src.File %x\n", src.File); - reg = 0x0; - break; - } - return reg; + return GL_TRUE; } -static GLuint make_dest(struct r500_fragment_program *fp, struct prog_dst_register dest) { - GLuint reg; - switch (dest.File) { - case PROGRAM_TEMPORARY: - reg = dest.Index + fp->temp_reg_offset; - break; - case PROGRAM_OUTPUT: - /* Eventually we may need to handle multiple - * rendering targets... */ - reg = dest.Index; - break; - default: - ERROR("Can't handle dest.File %x\n", dest.File); - reg = 0x0; - break; - } - return reg; -} -static void emit_tex(struct r500_fragment_program *fp, - struct prog_instruction *fpi, int dest, int counter) +static void update_params(r300ContextPtr r300, struct r500_fragment_program *fp) { - int hwsrc, hwdest; - GLuint mask; + struct gl_fragment_program *mp = &fp->mesa_program; - mask = fpi->DstReg.WriteMask << 11; - hwsrc = make_src(fp, fpi->SrcReg[0]); + /* Ask Mesa nicely to fill in ParameterValues for us */ + if (mp->Base.Parameters) + _mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters); +} - if (fpi->DstReg.File == PROGRAM_OUTPUT) { - hwdest = get_temp(fp, 0); - } else { - hwdest = dest; - } - fp->inst[counter].inst0 = R500_INST_TYPE_TEX | mask - | R500_INST_TEX_SEM_WAIT; - - fp->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit) - | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; - - if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) - fp->inst[counter].inst1 |= R500_TEX_UNSCALED; - - switch (fpi->Opcode) { - case OPCODE_KIL: - fp->inst[counter].inst1 |= R500_TEX_INST_TEXKILL; - break; - case OPCODE_TEX: - fp->inst[counter].inst1 |= R500_TEX_INST_LD; - break; - case OPCODE_TXB: - fp->inst[counter].inst1 |= R500_TEX_INST_LODBIAS; - break; - case OPCODE_TXP: - fp->inst[counter].inst1 |= R500_TEX_INST_PROJ; - break; - default: - ERROR("emit_tex can't handle opcode %x\n", fpi->Opcode); - } +/** + * Transform the program to support fragment.position. + * + * Introduce a small fragment at the start of the program that will be + * the only code that directly reads the FRAG_ATTRIB_WPOS input. + * All other code pieces that reference that input will be rewritten + * to read from a newly allocated temporary. + * + * \todo if/when r5xx supports the radeon_program architecture, this is a + * likely candidate for code sharing. + */ +static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler) +{ + GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead; - fp->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc) - | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) - /* | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G - | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A */ - | R500_TEX_DST_ADDR(hwdest) - | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; - - fp->inst[counter].inst3 = 0x0; - fp->inst[counter].inst4 = 0x0; - fp->inst[counter].inst5 = 0x0; - - if (fpi->DstReg.File == PROGRAM_OUTPUT) { - counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_OUT - | R500_INST_TEX_SEM_WAIT | (mask << 4); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_OMOD_DISABLE; - fp->inst[counter].inst4 = R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A) - | R500_ALPHA_OMOD_DISABLE; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - } -} + if (!(InputsRead & FRAG_BIT_WPOS)) + return; -static void emit_alu(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi) { - /* Ideally, we shouldn't have to explicitly clear memory here! */ - fp->inst[counter].inst0 = 0x0; - fp->inst[counter].inst1 = 0x0; - fp->inst[counter].inst2 = 0x0; - fp->inst[counter].inst3 = 0x0; - fp->inst[counter].inst4 = 0x0; - fp->inst[counter].inst5 = 0x0; - - if (fpi->DstReg.File == PROGRAM_OUTPUT) { - fp->inst[counter].inst0 = R500_INST_TYPE_OUT; - - if (fpi->DstReg.Index == FRAG_RESULT_COLR) - fp->inst[counter].inst0 |= (fpi->DstReg.WriteMask << 15); - - if (fpi->DstReg.Index == FRAG_RESULT_DEPR) { - fp->inst[counter].inst4 |= R500_ALPHA_W_OMASK; - /* Notify the state emission! */ - fp->writes_depth = GL_TRUE; + static gl_state_index tokens[STATE_LENGTH] = { + STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0 + }; + struct prog_instruction *fpi; + GLuint window_index; + int i = 0; + GLuint tempregi = radeonCompilerAllocateTemporary(&compiler->compiler); + + fpi = radeonClauseInsertInstructions(&compiler->compiler, &compiler->compiler.Clauses[0], 0, 3); + + /* perspective divide */ + fpi[i].Opcode = OPCODE_RCP; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_W; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW; + i++; + + fpi[i].Opcode = OPCODE_MUL; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_INPUT; + fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS; + fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW; + + fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[1].Index = tempregi; + fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW; + i++; + + /* viewport transformation */ + window_index = _mesa_add_state_reference(compiler->fp->mesa_program.Base.Parameters, tokens); + + fpi[i].Opcode = OPCODE_MAD; + + fpi[i].DstReg.File = PROGRAM_TEMPORARY; + fpi[i].DstReg.Index = tempregi; + fpi[i].DstReg.WriteMask = WRITEMASK_XYZ; + fpi[i].DstReg.CondMask = COND_TR; + + fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[0].Index = tempregi; + fpi[i].SrcReg[0].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[1].Index = window_index; + fpi[i].SrcReg[1].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + + fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR; + fpi[i].SrcReg[2].Index = window_index; + fpi[i].SrcReg[2].Swizzle = + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO); + i++; + + for (; i < compiler->compiler.Clauses[0].NumInstructions; ++i) { + int reg; + for (reg = 0; reg < 3; reg++) { + if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT && + fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) { + fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY; + fpi[i].SrcReg[reg].Index = tempregi; + } } - } else { - fp->inst[counter].inst0 = R500_INST_TYPE_ALU - /* pixel_mask */ - | (fpi->DstReg.WriteMask << 11); } - - fp->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT; } -static void emit_mov(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, GLuint src_reg, GLuint swizzle, GLuint dest) { - /* The r3xx shader uses MAD to implement MOV. We are using CMP, since - * it is technically more accurate and recommended by ATI/AMD. */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); - /* (De)mangle the swizzle from Mesa to R500. */ - swizzle = make_rgba_swizzle(swizzle); - /* 0x1FF is 9 bits, size of an RGB swizzle. */ - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A((swizzle & 0x1ff)) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B((swizzle & 0x1ff)) - | R500_ALU_RGB_OMOD_DISABLE; - fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(GET_SWZ(swizzle, 3)) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(GET_SWZ(swizzle, 3)) - | R500_ALPHA_OMOD_DISABLE; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); -} - -static void emit_mad(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, int one, int two, int three) { - /* Note: This code was all Corbin's. Corbin is a rather hackish coder. - * If you can make it pretty or fast, please do so! */ - emit_alu(fp, counter, fpi); - /* Common MAD stuff */ - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(make_dest(fp, fpi->DstReg)); - fp->inst[counter].inst5 |= R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(make_dest(fp, fpi->DstReg)); - switch (one) { - case 0: - case 1: - case 2: - fp->inst[counter].inst1 |= R500_RGB_ADDR0(make_src(fp, fpi->SrcReg[one])); - fp->inst[counter].inst2 |= R500_ALPHA_ADDR0(make_src(fp, fpi->SrcReg[one])); - fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[one])); - fp->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0 - | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one])); - break; - case R500_SWIZZLE_ZERO: - fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO); - fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO); - break; - case R500_SWIZZLE_ONE: - fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE); - fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE); - break; - default: - ERROR("Bad src index in emit_mad: %d\n", one); - break; - } - switch (two) { - case 0: - case 1: - case 2: - fp->inst[counter].inst1 |= R500_RGB_ADDR1(make_src(fp, fpi->SrcReg[two])); - fp->inst[counter].inst2 |= R500_ALPHA_ADDR1(make_src(fp, fpi->SrcReg[two])); - fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[two])); - fp->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1 - | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two])); - break; - case R500_SWIZZLE_ZERO: - fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); - fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); - break; - case R500_SWIZZLE_ONE: - fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); - fp->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); - break; - default: - ERROR("Bad src index in emit_mad: %d\n", two); - break; - } - switch (three) { - case 0: - case 1: - case 2: - fp->inst[counter].inst1 |= R500_RGB_ADDR2(make_src(fp, fpi->SrcReg[three])); - fp->inst[counter].inst2 |= R500_ALPHA_ADDR2(make_src(fp, fpi->SrcReg[three])); - fp->inst[counter].inst5 |= R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[three])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three])); - break; - case R500_SWIZZLE_ZERO: - fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - break; - case R500_SWIZZLE_ONE: - fp->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE); - break; - default: - ERROR("Bad src index in emit_mad: %d\n", three); - break; - } -} -static void emit_sop(struct r500_fragment_program *fp, int counter, struct prog_instruction *fpi, int opcode, GLuint src, GLuint swiz, GLuint dest) { - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src); - fp->inst[counter].inst4 |= R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(swiz); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP - | R500_ALU_RGBA_ADDRD(dest); - switch (opcode) { - case OPCODE_COS: - fp->inst[counter].inst4 |= R500_ALPHA_OP_COS; - break; - case OPCODE_EX2: - fp->inst[counter].inst4 |= R500_ALPHA_OP_EX2; - break; - case OPCODE_LG2: - fp->inst[counter].inst4 |= R500_ALPHA_OP_LN2; - break; - case OPCODE_RCP: - fp->inst[counter].inst4 |= R500_ALPHA_OP_RCP; - break; - case OPCODE_RSQ: - fp->inst[counter].inst4 |= R500_ALPHA_OP_RSQ; - break; - case OPCODE_SIN: - fp->inst[counter].inst4 |= R500_ALPHA_OP_SIN; - break; - default: - ERROR("Bad opcode in emit_sop: %d\n", opcode); - break; +static GLuint build_dtm(GLuint depthmode) +{ + switch(depthmode) { + default: + case GL_LUMINANCE: return 0; + case GL_INTENSITY: return 1; + case GL_ALPHA: return 2; } } -static GLboolean parse_program(struct r500_fragment_program *fp) +static GLuint build_func(GLuint comparefunc) { - struct gl_fragment_program *mp = &fp->mesa_program; - const struct prog_instruction *inst = mp->Base.Instructions; - struct prog_instruction *fpi; - GLuint src[3], dest = 0; - int temp_swiz, counter = 0; + return comparefunc - GL_NEVER; +} - if (!inst || inst[0].Opcode == OPCODE_END) { - ERROR("The program is empty!\n"); - return GL_FALSE; - } - for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { +/** + * Collect all external state that is relevant for compiling the given + * fragment program. + */ +static void build_state( + r300ContextPtr r300, + struct r500_fragment_program *fp, + struct r500_fragment_program_external_state *state) +{ + int unit; - if (fpi->Opcode != OPCODE_KIL) { - dest = make_dest(fp, fpi->DstReg); - } + _mesa_bzero(state, sizeof(*state)); - switch (fpi->Opcode) { - case OPCODE_ABS: - emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); - fp->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS - | R500_ALU_RGB_MOD_B_ABS; - fp->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS - | R500_ALPHA_MOD_B_ABS; - break; - case OPCODE_ADD: - /* Variation on MAD: 1*src0+src1 */ - emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1); - break; - case OPCODE_CMP: - /* This inst's selects need to be swapped as follows: - * 0 -> C ; 1 -> B ; 2 -> A */ - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - src[2] = make_src(fp, fpi->SrcReg[2]); - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[2]) - | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2]) - | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); - break; - case OPCODE_COS: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = emit_const4fv(fp, RCP_2PI); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(get_temp(fp, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)); - counter++; - emit_sop(fp, counter, fpi, OPCODE_COS, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_DP3: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_DP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3 - | R500_ALU_RGBA_ADDRD(dest); - break; - case OPCODE_DP4: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - /* Based on DP3 */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_DP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 - | R500_ALU_RGBA_ADDRD(dest); - break; - case OPCODE_DPH: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - /* Based on DP3 */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_DP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 - | R500_ALU_RGBA_ADDRD(dest); - break; - case OPCODE_DST: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - /* [1, src0.y*src1.y, src0.z, src1.w] - * So basically MUL with lotsa swizzling. */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | R500_ALU_RGB_SEL_B_SRC1; - /* Select [1, y, z, 1] */ - temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE; - fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz); - /* Select [1, y, 1, w] */ - temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6); - fp->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - break; - case OPCODE_EX2: - src[0] = make_src(fp, fpi->SrcReg[0]); - emit_sop(fp, counter, fpi, OPCODE_EX2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_FLR: - src[0] = make_src(fp, fpi->SrcReg[0]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); - counter++; - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(get_temp(fp, 0)); - fp->inst[counter].inst3 = MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC1 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC1 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGBA_MOD_C_NEG; - break; - case OPCODE_FRC: - src[0] = make_src(fp, fpi->SrcReg[0]); - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(dest); - break; - case OPCODE_LG2: - src[0] = make_src(fp, fpi->SrcReg[0]); - emit_sop(fp, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_LIT: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = emit_const4fv(fp, LIT); - /* First inst: MAX temp, input, [0, 0, 0, -128] - * Write: RG, A */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARG << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAX - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)); - counter++; - /* Second inst: MIN temp, temp, [x, x, x, 128] - * Write: A */ - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_A << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)) | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) | R500_ALPHA_ADDR1(src[1]); - /* fp->inst[counter].inst3; */ - fp->inst[counter].inst4 = R500_ALPHA_OP_MAX - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(dest); - counter++; - /* Third-fifth insts: POW temp, temp.y, temp.w - * Write: B */ - emit_sop(fp, counter, fpi, OPCODE_LG2, get_temp(fp, 0), SWIZZLE_Y, get_temp(fp, 1)); - fp->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11); - counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 1)) - | R500_RGB_ADDR1(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 1)) - | R500_ALPHA_ADDR1(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - emit_sop(fp, counter, fpi, OPCODE_EX2, get_temp(fp, 1), SWIZZLE_W, get_temp(fp, 0)); - fp->inst[counter].inst0 |= (R500_WRITEMASK_B << 11); - counter++; - /* Sixth inst: CMP dest, temp.xxxx, temp.[1, x, z, 1], temp.[1, x, 0, 1]; - * Write: ARGB - * This inst's selects need to be swapped as follows: - * 0 -> C ; 1 -> B ; 2 -> A */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | R500_ALU_RGB_R_SWIZ_A_1 - | R500_ALU_RGB_G_SWIZ_A_R - | R500_ALU_RGB_B_SWIZ_A_B - | R500_ALU_RGB_SEL_B_SRC0 - | R500_ALU_RGB_R_SWIZ_B_1 - | R500_ALU_RGB_G_SWIZ_B_R - | R500_ALU_RGB_B_SWIZ_B_0; - fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_1 - | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_1; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC0 - | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 - | R500_ALU_RGBA_R_SWIZ_R - | R500_ALU_RGBA_G_SWIZ_R - | R500_ALU_RGBA_B_SWIZ_R - | R500_ALU_RGBA_A_SWIZ_R; - break; - case OPCODE_LRP: - /* src0 * src1 + INV(src0) * src2 - * 1) MUL src0, src1, temp - * 2) PRE 1-src0; MAD srcp, src2, temp */ - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - src[2] = make_src(fp, fpi->SrcReg[2]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[2]) - | R500_RGB_ADDR2(get_temp(fp, 0)) - | R500_RGB_SRCP_OP_1_MINUS_RGB0; - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[2]) - | R500_ALPHA_ADDR2(get_temp(fp, 0)) - | R500_ALPHA_SRCP_OP_1_MINUS_A0; - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); - break; - case OPCODE_MAD: - emit_mad(fp, counter, fpi, 0, 1, 2); - break; - case OPCODE_MAX: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAX - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX - | R500_ALU_RGBA_ADDRD(dest); - break; - case OPCODE_MIN: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MIN - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN - | R500_ALU_RGBA_ADDRD(dest); - break; - case OPCODE_MOV: - emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); - break; - case OPCODE_MUL: - /* Variation on MAD: src0*src1+0 */ - emit_mad(fp, counter, fpi, 0, 1, R500_SWIZZLE_ZERO); - break; - case OPCODE_POW: - /* POW(a,b) = EX2(LN2(a)*b) */ - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - emit_sop(fp, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), get_temp(fp, 0)); - fp->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11); - counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 1)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - emit_sop(fp, counter, fpi, OPCODE_EX2, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_RCP: - src[0] = make_src(fp, fpi->SrcReg[0]); - emit_sop(fp, counter, fpi, OPCODE_RCP, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_RSQ: - src[0] = make_src(fp, fpi->SrcReg[0]); - emit_sop(fp, counter, fpi, OPCODE_RSQ, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_SCS: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = emit_const4fv(fp, RCP_2PI); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(get_temp(fp, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)); - counter++; - /* Do a cosine, then a sine, masking out the channels we want to protect. */ - /* Cosine only goes in R (x) channel. */ - fpi->DstReg.WriteMask = 0x1; - emit_sop(fp, counter, fpi, OPCODE_COS, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); - counter++; - /* Sine only goes in G (y) channel. */ - fpi->DstReg.WriteMask = 0x2; - emit_sop(fp, counter, fpi, OPCODE_SIN, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_SGE: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) - | R500_RGB_ADDR2(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) - | R500_ALPHA_ADDR2(src[1]); - fp->inst[counter].inst3 = /* 1 */ - MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_MOD_C_NEG - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_ALPHA_MOD_C_NEG; - counter++; - /* This inst's selects need to be swapped as follows: - * 0 -> C ; 1 -> B ; 2 -> A */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); - fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC0 - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 - | R500_ALU_RGBA_A_SWIZ_A; - break; - case OPCODE_SIN: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = emit_const4fv(fp, RCP_2PI); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A - | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); - fp->inst[counter].inst4 = R500_ALPHA_OP_FRC - | R500_ALPHA_ADDRD(get_temp(fp, 1)) - | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC - | R500_ALU_RGBA_ADDRD(get_temp(fp, 1)); - counter++; - emit_sop(fp, counter, fpi, OPCODE_SIN, get_temp(fp, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); - break; - case OPCODE_SLT: - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_ARGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) - | R500_RGB_ADDR2(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) - | R500_ALPHA_ADDR2(src[1]); - fp->inst[counter].inst3 = /* 1 */ - MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) - | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_MOD_C_NEG - | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 - | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) - | R500_ALU_RGBA_ALPHA_MOD_C_NEG; - counter++; - /* This inst's selects need to be swapped as follows: - * 0 -> C ; 1 -> B ; 2 -> A */ - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(fp, 0)); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) - | R500_ALU_RGB_SEL_B_SRC0 - | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); - fp->inst[counter].inst4 |= R500_ALPHA_OP_CMP - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO) - | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC0 - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) - | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 - | R500_ALU_RGBA_A_SWIZ_A; - break; - case OPCODE_SUB: - /* Variation on MAD: 1*src0-src1 */ - fpi->SrcReg[1].NegateBase = 0xF; /* NEG_XYZW */ - emit_mad(fp, counter, fpi, R500_SWIZZLE_ONE, 0, 1); - break; - case OPCODE_SWZ: - /* TODO: The rarer negation masks! */ - emit_mov(fp, counter, fpi, make_src(fp, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); - break; - case OPCODE_XPD: - /* src0 * src1 - src1 * src0 - * 1) MUL temp.xyz, src0.yzx, src1.zxy - * 2) MAD src0.zxy, src1.yzx, -temp.xyz */ - src[0] = make_src(fp, fpi->SrcReg[0]); - src[1] = make_src(fp, fpi->SrcReg[1]); - fp->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT - | (R500_WRITEMASK_RGB << 11); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]); - /* Select [y, z, x] */ - temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]); - temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(temp_swiz); - /* Select [z, x, y] */ - temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]); - temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6); - fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(temp_swiz); - fp->inst[counter].inst4 = R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(get_temp(fp, 0)) - | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) - | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(get_temp(fp, 0)) - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) - | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); - counter++; - emit_alu(fp, counter, fpi); - fp->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) - | R500_RGB_ADDR1(src[1]) - | R500_RGB_ADDR2(get_temp(fp, 0)); - fp->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) - | R500_ALPHA_ADDR1(src[1]) - | R500_ALPHA_ADDR2(get_temp(fp, 0)); - /* Select [z, x, y] */ - temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]); - temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6); - fp->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 - | MAKE_SWIZ_RGB_A(temp_swiz); - /* Select [y, z, x] */ - temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]); - temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6); - fp->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 - | MAKE_SWIZ_RGB_B(temp_swiz); - fp->inst[counter].inst4 |= R500_ALPHA_OP_MAD - | R500_ALPHA_ADDRD(dest) - | R500_ALPHA_SWIZ_A_1 - | R500_ALPHA_SWIZ_B_1; - fp->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD - | R500_ALU_RGBA_ADDRD(dest) - | R500_ALU_RGBA_SEL_C_SRC2 - | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) - | R500_ALU_RGBA_MOD_C_NEG - | R500_ALU_RGBA_A_SWIZ_0; - break; - case OPCODE_KIL: - case OPCODE_TEX: - case OPCODE_TXB: - case OPCODE_TXP: - emit_tex(fp, fpi, dest, counter); - if (fpi->DstReg.File == PROGRAM_OUTPUT) - counter++; - break; - default: - ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode)); - break; - } + for(unit = 0; unit < 16; ++unit) { + if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) { + struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current; - /* Finishing touches */ - if (fpi->SaturateMode == SATURATE_ZERO_ONE) { - fp->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; + state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode); + state->unit[unit].texture_compare_func = build_func(tex->CompareFunc); } - - counter++; - - if (fp->error) - return GL_FALSE; - - } - - /* Finish him! (If it's an ALU/OUT instruction...) */ - if ((fp->inst[counter-1].inst0 & 0x3) == 1) { - fp->inst[counter-1].inst0 |= R500_INST_LAST; - } else { - /* We still need to put an output inst, right? */ - WARN_ONCE("Final FP instruction is not an OUT.\n"); } - - fp->cs->nrslots = counter; - - fp->max_temp_idx++; - - return GL_TRUE; } -static void init_program(r300ContextPtr r300, struct r500_fragment_program *fp) -{ - struct r500_pfs_compile_state *cs = fp->cs; - struct gl_fragment_program *mp = &fp->mesa_program; - struct prog_instruction *fpi; - GLuint InputsRead = mp->Base.InputsRead; - GLuint temps_used = 0; - int i, j; - - /* New compile, reset tracking data */ - fp->optimization = - driQueryOptioni(&r300->radeon.optionCache, "fp_optimization"); - fp->translated = GL_FALSE; - fp->error = GL_FALSE; - fp->const_nr = 0; - /* Size of pixel stack, plus 1. */ - fp->max_temp_idx = 1; - /* Temp register offset. */ - fp->temp_reg_offset = 0; - /* Whether or not we perform any depth writing. */ - fp->writes_depth = GL_FALSE; - - _mesa_memset(cs, 0, sizeof(*fp->cs)); - for (i = 0; i < PFS_MAX_ALU_INST; i++) { - for (j = 0; j < 3; j++) { - cs->slot[i].vsrc[j] = SRC_CONST; - cs->slot[i].ssrc[j] = SRC_CONST; - } - } - - /* Work out what temps the Mesa inputs correspond to, this must match - * what setup_rs_unit does, which shouldn't be a problem as rs_unit - * configures itself based on the fragprog's InputsRead - * - * NOTE: this depends on get_hw_temp() allocating registers in order, - * starting from register 0, so we're just going to do that instead. - */ +static void dump_program(struct r500_fragment_program_code *code); - /* Texcoords come first */ - for (i = 0; i < fp->ctx->Const.MaxTextureUnits; i++) { - if (InputsRead & (FRAG_BIT_TEX0 << i)) { - cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; - cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = - fp->temp_reg_offset; - fp->temp_reg_offset++; - } - } - InputsRead &= ~FRAG_BITS_TEX_ANY; - - /* fragment position treated as a texcoord */ - if (InputsRead & FRAG_BIT_WPOS) { - cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; - cs->inputs[FRAG_ATTRIB_WPOS].reg = - fp->temp_reg_offset; - fp->temp_reg_offset++; - } - InputsRead &= ~FRAG_BIT_WPOS; - - /* Then primary colour */ - if (InputsRead & FRAG_BIT_COL0) { - cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL0].reg = - fp->temp_reg_offset; - fp->temp_reg_offset++; - } - InputsRead &= ~FRAG_BIT_COL0; - - /* Secondary color */ - if (InputsRead & FRAG_BIT_COL1) { - cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; - cs->inputs[FRAG_ATTRIB_COL1].reg = - fp->temp_reg_offset; - fp->temp_reg_offset++; - } - InputsRead &= ~FRAG_BIT_COL1; - - /* Anything else */ - if (InputsRead) { - WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); - /* force read from hwreg 0 for now */ - for (i = 0; i < 32; i++) - if (InputsRead & (1 << i)) - cs->inputs[i].reg = 0; - } +void r500TranslateFragmentShader(r300ContextPtr r300, + struct r500_fragment_program *fp) +{ + struct r500_fragment_program_external_state state; - if (!mp->Base.Instructions) { - ERROR("No instructions found in program, going to go die now.\n"); - return; + build_state(r300, fp, &state); + if (_mesa_memcmp(&fp->state, &state, sizeof(state))) { + /* TODO: cache compiled programs */ + fp->translated = GL_FALSE; + _mesa_memcpy(&fp->state, &state, sizeof(state)); } - for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { - for (i = 0; i < 3; i++) { - if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) { - if (fpi->SrcReg[i].Index >= temps_used) - temps_used = fpi->SrcReg[i].Index + 1; - } - } - } + if (!fp->translated) { + struct r500_fragment_program_compiler compiler; - cs->temp_in_use = temps_used + 1; + compiler.r300 = r300; + compiler.fp = fp; + compiler.code = &fp->code; - fp->max_temp_idx = fp->temp_reg_offset + cs->temp_in_use; + radeonCompilerInit(&compiler.compiler, r300->radeon.glCtx, &fp->mesa_program.Base); - if (RADEON_DEBUG & DEBUG_PIXEL) - fprintf(stderr, "FP temp indices: fp->max_temp_idx: %d cs->temp_in_use: %d\n", fp->max_temp_idx, cs->temp_in_use); -} + insert_WPOS_trailer(&compiler); -static void update_params(struct r500_fragment_program *fp) -{ - struct gl_fragment_program *mp = &fp->mesa_program; + struct radeon_program_transformation transformations[1] = { + { &transform_TEX, &compiler } + }; + radeonClauseLocalTransform(&compiler.compiler, + &compiler.compiler.Clauses[0], + 1, transformations); - /* Ask Mesa nicely to fill in ParameterValues for us */ - if (mp->Base.Parameters) - _mesa_load_state_parameters(fp->ctx, mp->Base.Parameters); -} - -static void dumb_shader(struct r500_fragment_program *fp) -{ - fp->inst[0].inst0 = R500_INST_TYPE_TEX - | R500_INST_TEX_SEM_WAIT - | R500_INST_RGB_WMASK_R - | R500_INST_RGB_WMASK_G - | R500_INST_RGB_WMASK_B - | R500_INST_ALPHA_WMASK - | R500_INST_RGB_CLAMP - | R500_INST_ALPHA_CLAMP; - fp->inst[0].inst1 = R500_TEX_ID(0) - | R500_TEX_INST_LD - | R500_TEX_SEM_ACQUIRE - | R500_TEX_IGNORE_UNCOVERED; - fp->inst[0].inst2 = R500_TEX_SRC_ADDR(0) - | R500_TEX_SRC_S_SWIZ_R - | R500_TEX_SRC_T_SWIZ_G - | R500_TEX_DST_ADDR(0) - | R500_TEX_DST_R_SWIZ_R - | R500_TEX_DST_G_SWIZ_G - | R500_TEX_DST_B_SWIZ_B - | R500_TEX_DST_A_SWIZ_A; - fp->inst[0].inst3 = R500_DX_ADDR(0) - | R500_DX_S_SWIZ_R - | R500_DX_T_SWIZ_R - | R500_DX_R_SWIZ_R - | R500_DX_Q_SWIZ_R - | R500_DY_ADDR(0) - | R500_DY_S_SWIZ_R - | R500_DY_T_SWIZ_R - | R500_DY_R_SWIZ_R - | R500_DY_Q_SWIZ_R; - fp->inst[0].inst4 = 0x0; - fp->inst[0].inst5 = 0x0; - - fp->inst[1].inst0 = R500_INST_TYPE_OUT | - R500_INST_TEX_SEM_WAIT | - R500_INST_LAST | - R500_INST_RGB_OMASK_R | - R500_INST_RGB_OMASK_G | - R500_INST_RGB_OMASK_B | - R500_INST_ALPHA_OMASK; - fp->inst[1].inst1 = R500_RGB_ADDR0(0) | - R500_RGB_ADDR1(0) | - R500_RGB_ADDR1_CONST | - R500_RGB_ADDR2(0) | - R500_RGB_ADDR2_CONST | - R500_RGB_SRCP_OP_1_MINUS_2RGB0; - fp->inst[1].inst2 = R500_ALPHA_ADDR0(0) | - R500_ALPHA_ADDR1(0) | - R500_ALPHA_ADDR1_CONST | - R500_ALPHA_ADDR2(0) | - R500_ALPHA_ADDR2_CONST | - R500_ALPHA_SRCP_OP_1_MINUS_2A0; - fp->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 | - R500_ALU_RGB_R_SWIZ_A_R | - R500_ALU_RGB_G_SWIZ_A_G | - R500_ALU_RGB_B_SWIZ_A_B | - R500_ALU_RGB_SEL_B_SRC0 | - R500_ALU_RGB_R_SWIZ_B_1 | - R500_ALU_RGB_B_SWIZ_B_1 | - R500_ALU_RGB_G_SWIZ_B_1; - fp->inst[1].inst4 = R500_ALPHA_OP_MAD | - R500_ALPHA_SWIZ_A_A | - R500_ALPHA_SWIZ_B_1; - fp->inst[1].inst5 = R500_ALU_RGBA_OP_MAD | - R500_ALU_RGBA_R_SWIZ_0 | - R500_ALU_RGBA_G_SWIZ_0 | - R500_ALU_RGBA_B_SWIZ_0 | - R500_ALU_RGBA_A_SWIZ_0; - - fp->cs->nrslots = 2; - fp->translated = GL_TRUE; -} + if (RADEON_DEBUG & DEBUG_PIXEL) { + _mesa_printf("Compiler state after transformations:\n"); + radeonCompilerDump(&compiler.compiler); + } -void r500TranslateFragmentShader(r300ContextPtr r300, - struct r500_fragment_program *fp) -{ - if (!fp->translated) { - struct r500_pfs_compile_state cs; - fp->cs = &cs; + if (!r500FragmentProgramEmit(&compiler)) + fp->error = GL_TRUE; - init_program(r300, fp); + radeonCompilerCleanup(&compiler.compiler); - if (parse_program(fp) == GL_FALSE) { - ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n"); - dumb_shader(fp); - fp->inst_offset = 0; - fp->inst_end = cs.nrslots - 1; - return; - } - fp->inst_offset = 0; - fp->inst_end = cs.nrslots - 1; + r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); fp->translated = GL_TRUE; if (RADEON_DEBUG & DEBUG_PIXEL) { @@ -1520,16 +417,12 @@ void r500TranslateFragmentShader(r300ContextPtr r300, fprintf(stderr, "-------------\n"); _mesa_print_program(&fp->mesa_program.Base); fflush(stdout); - dump_program(fp); + dump_program(&fp->code); } - - r300UpdateStateParameters(fp->ctx, _NEW_PROGRAM); - - fp->cs = 0; } - update_params(fp); + update_params(r300, fp); } @@ -1630,7 +523,7 @@ static char *to_texop(int val) return NULL; } -static void dump_program(struct r500_fragment_program *fp) +static void dump_program(struct r500_fragment_program_code *code) { fprintf(stderr, "R500 Fragment Program:\n--------\n"); @@ -1640,18 +533,18 @@ static void dump_program(struct r500_fragment_program *fp) uint32_t inst0; char *str = NULL; - if (fp->const_nr) { + if (code->const_nr) { fprintf(stderr, "--------\nConstants:\n"); - for (n = 0; n < fp->const_nr; n++) { + for (n = 0; n < code->const_nr; n++) { fprintf(stderr, "Constant %d: %f %f\n\t %f %f\n", n, - fp->constant[n][0], fp->constant[n][1], fp->constant[n][2], - fp->constant[n][3]); + code->constant[n][0], code->constant[n][1], code->constant[n][2], + code->constant[n][3]); } fprintf(stderr, "--------\n"); } - for (n = 0; n < fp->inst_end+1; n++) { - inst0 = inst = fp->inst[n].inst0; + for (n = 0; n < code->inst_end+1; n++) { + inst0 = inst = code->inst[n].inst0; fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst); switch(inst & 0x3) { case R500_INST_TYPE_ALU: str = "ALU"; break; @@ -1670,8 +563,8 @@ static void dump_program(struct r500_fragment_program *fp) switch(inst0 & 0x3) { case 0: case 1: - fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", fp->inst[n].inst1); - inst = fp->inst[n].inst1; + fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1); + inst = code->inst[n].inst1; fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", inst & 0xff, (inst & (1<<8)) ? 'c' : 't', @@ -1679,15 +572,15 @@ static void dump_program(struct r500_fragment_program *fp) (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', (inst >> 30)); - fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", fp->inst[n].inst2); - inst = fp->inst[n].inst2; + fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2); + inst = code->inst[n].inst2; fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n", inst & 0xff, (inst & (1<<8)) ? 'c' : 't', (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't', (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't', (inst >> 30)); - fprintf(stderr,"\t3 RGB_INST: 0x%08x:", fp->inst[n].inst3); - inst = fp->inst[n].inst3; + fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3); + inst = code->inst[n].inst3; fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n", (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7), (inst >> 11) & 0x3, @@ -1695,16 +588,16 @@ static void dump_program(struct r500_fragment_program *fp) (inst >> 24) & 0x3); - fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", fp->inst[n].inst4); - inst = fp->inst[n].inst4; + fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4); + inst = code->inst[n].inst4; fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf), (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3, (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3, (inst >> 31) & 0x1); - fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", fp->inst[n].inst5); - inst = fp->inst[n].inst5; + fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5); + inst = code->inst[n].inst5; fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf), (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"", (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7), @@ -1714,11 +607,11 @@ static void dump_program(struct r500_fragment_program *fp) case 2: break; case 3: - inst = fp->inst[n].inst1; + inst = code->inst[n].inst1; fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf, to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "", (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED"); - inst = fp->inst[n].inst2; + inst = code->inst[n].inst2; fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst, inst & 127, inst & (1<<7) ? "(rel)" : "", toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3), @@ -1727,7 +620,7 @@ static void dump_program(struct r500_fragment_program *fp) toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3), toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3)); - fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", fp->inst[n].inst3); + fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3); break; } fprintf(stderr,"\n"); diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.h b/src/mesa/drivers/dri/r300/r500_fragprog.h index 5dd2def1c4..ff6a9002c1 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/r500_fragprog.h @@ -36,10 +36,14 @@ #include "glheader.h" #include "macros.h" #include "enums.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" #include "shader/program.h" #include "shader/prog_instruction.h" #include "r300_context.h" +#include "r300_state.h" +#include "radeon_program.h" /* supported hw opcodes */ #define PFS_OP_MAD 0 @@ -76,4 +80,13 @@ struct r500_fragment_program; extern void r500TranslateFragmentShader(r300ContextPtr r300, struct r500_fragment_program *fp); +struct r500_fragment_program_compiler { + r300ContextPtr r300; + struct r500_fragment_program *fp; + struct r500_fragment_program_code *code; + struct radeon_compiler compiler; +}; + +extern GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler); + #endif -- cgit v1.2.3 From 69004fb758629d5c2d201241fc3d2907dd9afde3 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 14 Jun 2008 07:21:40 -0700 Subject: r5xx: New FP code, take two. Add the code emission source file, and comment out unneeded tex de-swizzling. --- src/mesa/drivers/dri/r300/r500_fragprog_emit.c | 1520 ++++++++++++++++++++++++ 1 file changed, 1520 insertions(+) create mode 100644 src/mesa/drivers/dri/r300/r500_fragprog_emit.c (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c new file mode 100644 index 0000000000..24182f18ca --- /dev/null +++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c @@ -0,0 +1,1520 @@ +/* + * Copyright (C) 2005 Ben Skeggs. + * + * Copyright 2008 Corbin Simpson + * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * \file + * + * \author Ben Skeggs + * + * \author Jerome Glisse + * + * \author Corbin Simpson + * + * \todo Depth write, WPOS/FOGC inputs + * + * \todo FogOption + * + */ + +#include "glheader.h" +#include "macros.h" +#include "enums.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "r300_context.h" +#include "r500_fragprog.h" +#include "r300_reg.h" +#include "r300_state.h" + +/* Mapping Mesa registers to R500 temporaries */ +struct reg_acc { + int reg; /* Assigned hw temp */ + unsigned int refcount; /* Number of uses by mesa program */ +}; + +/** + * Describe the current lifetime information for an R300 temporary + */ +struct reg_lifetime { + /* Index of the first slot where this register is free in the sense + that it can be used as a new destination register. + This is -1 if the register has been assigned to a Mesa register + and the last access to the register has not yet been emitted */ + int free; + + /* Index of the first slot where this register is currently reserved. + This is used to stop e.g. a scalar operation from being moved + before the allocation time of a register that was first allocated + for a vector operation. */ + int reserved; + + /* Index of the first slot in which the register can be used as a + source without losing the value that is written by the last + emitted instruction that writes to the register */ + int vector_valid; + int scalar_valid; + + /* Index to the slot where the register was last read. + This is also the first slot in which the register may be written again */ + int vector_lastread; + int scalar_lastread; +}; + +/** + * Store usage information about an ALU instruction slot during the + * compilation of a fragment program. + */ +#define SLOT_SRC_VECTOR (1<<0) +#define SLOT_SRC_SCALAR (1<<3) +#define SLOT_SRC_BOTH (SLOT_SRC_VECTOR | SLOT_SRC_SCALAR) +#define SLOT_OP_VECTOR (1<<16) +#define SLOT_OP_SCALAR (1<<17) +#define SLOT_OP_BOTH (SLOT_OP_VECTOR | SLOT_OP_SCALAR) + +struct r500_pfs_compile_slot { + /* Bitmask indicating which parts of the slot are used, using SLOT_ constants + defined above */ + unsigned int used; + + /* Selected sources */ + int vsrc[3]; + int ssrc[3]; +}; + +/** + * Store information during compilation of fragment programs. + */ +struct r500_pfs_compile_state { + struct r500_fragment_program_compiler *compiler; + + /* number of ALU slots used so far */ + int nrslots; + + /* Track which (parts of) slots are already filled with instructions */ + struct r500_pfs_compile_slot slot[PFS_MAX_ALU_INST]; + + /* Track the validity of R300 temporaries */ + struct reg_lifetime hwtemps[PFS_NUM_TEMP_REGS]; + + /* Used to map Mesa's inputs/temps onto hardware temps */ + int temp_in_use; + struct reg_acc temps[PFS_NUM_TEMP_REGS]; + struct reg_acc inputs[32]; /* don't actually need 32... */ + + /* Track usage of hardware temps, for register allocation, + * indirection detection, etc. */ + GLuint used_in_node; + GLuint dest_in_node; +}; + +/* + * Useful macros and values + */ +#define ERROR(fmt, args...) do { \ + fprintf(stderr, "%s::%s(): " fmt "\n", \ + __FILE__, __FUNCTION__, ##args); \ + cs->compiler->fp->error = GL_TRUE; \ + } while(0) + +#define PROG_CODE struct r500_fragment_program_code *code = cs->compiler->code + +#define R500_US_NUM_TEMP_REGS 128 +#define R500_US_NUM_CONST_REGS 256 + +/* "Register" flags */ +#define REG_CONSTANT (1 << 8) +#define REG_SRC_REL (1 << 9) +#define REG_DEST_REL (1 << 7) + +/* Swizzle tools */ +#define R500_SWIZZLE_ZERO 4 +#define R500_SWIZZLE_HALF 5 +#define R500_SWIZZLE_ONE 6 +#define R500_SWIZ_RGB_ZERO ((4 << 0) | (4 << 3) | (4 << 6)) +#define R500_SWIZ_RGB_ONE ((6 << 0) | (6 << 3) | (6 << 6)) +#define R500_SWIZ_RGB_RGB ((0 << 0) | (1 << 3) | (2 << 6)) +#define R500_SWIZ_MOD_NEG 1 +#define R500_SWIZ_MOD_ABS 2 +#define R500_SWIZ_MOD_NEG_ABS 3 +/* Swizzles for inst2 */ +#define MAKE_SWIZ_TEX_STRQ(x) (x << 8) +#define MAKE_SWIZ_TEX_RGBA(x) (x << 24) +/* Swizzles for inst3 */ +#define MAKE_SWIZ_RGB_A(x) (x << 2) +#define MAKE_SWIZ_RGB_B(x) (x << 15) +/* Swizzles for inst4 */ +#define MAKE_SWIZ_ALPHA_A(x) (x << 14) +#define MAKE_SWIZ_ALPHA_B(x) (x << 21) +/* Swizzle for inst5 */ +#define MAKE_SWIZ_RGBA_C(x) (x << 14) +#define MAKE_SWIZ_ALPHA_C(x) (x << 27) + +/* Writemasks */ +#define R500_WRITEMASK_G 0x2 +#define R500_WRITEMASK_B 0x4 +#define R500_WRITEMASK_RGB 0x7 +#define R500_WRITEMASK_A 0x8 +#define R500_WRITEMASK_AR 0x9 +#define R500_WRITEMASK_AG 0xA +#define R500_WRITEMASK_ARG 0xB +#define R500_WRITEMASK_AB 0xC +#define R500_WRITEMASK_ARGB 0xF + +/* 1/(2pi), needed for quick modulus in trig insts + * Thanks to glisse for pointing out how to do it! */ +static const GLfloat RCP_2PI[] = {0.15915494309189535, + 0.15915494309189535, + 0.15915494309189535, + 0.15915494309189535}; + +static const GLfloat LIT[] = {127.999999, + 127.999999, + 127.999999, + -127.999999}; + +static inline GLuint make_rgb_swizzle(struct prog_src_register src) { + GLuint swiz = 0x0; + GLuint temp; + /* This could be optimized, but it should be plenty fast already. */ + int i; + for (i = 0; i < 3; i++) { + temp = GET_SWZ(src.Swizzle, i); + /* Fix SWIZZLE_ONE */ + if (temp == 5) temp++; + swiz |= temp << i*3; + } + if (src.NegateBase) + swiz |= (R500_SWIZ_MOD_NEG << 9); + return swiz; +} + +static inline GLuint make_rgba_swizzle(GLuint src) { + GLuint swiz = 0x0; + GLuint temp; + int i; + for (i = 0; i < 4; i++) { + temp = GET_SWZ(src, i); + /* Fix SWIZZLE_ONE */ + if (temp == 5) temp++; + swiz |= temp << i*3; + } + return swiz; +} + +static inline GLuint make_alpha_swizzle(struct prog_src_register src) { + GLuint swiz = GET_SWZ(src.Swizzle, 3); + + if (swiz == 5) swiz++; + + if (src.NegateBase) + swiz |= (R500_SWIZ_MOD_NEG << 3); + + return swiz; +} + +static inline GLuint make_sop_swizzle(struct prog_src_register src) { + GLuint swiz = GET_SWZ(src.Swizzle, 0); + + if (swiz == 5) swiz++; + return swiz; +} + +static inline GLuint make_strq_swizzle(struct prog_src_register src) { + GLuint swiz = 0x0, temp = 0x0; + int i; + for (i = 0; i < 4; i++) { + temp = GET_SWZ(src.Swizzle, i) & 0x3; + swiz |= temp << i*2; + } + return swiz; +} + +static int get_temp(struct r500_pfs_compile_state *cs, int slot) { + + PROG_CODE; + + int r = code->temp_reg_offset + cs->temp_in_use + slot; + + if (r > R500_US_NUM_TEMP_REGS) { + ERROR("Too many temporary registers requested, can't compile!\n"); + } + + return r; +} + +/* Borrowed verbatim from r300_fragprog since it hasn't changed. */ +static GLuint emit_const4fv(struct r500_pfs_compile_state *cs, + const GLfloat * cp) +{ + PROG_CODE; + + GLuint reg = 0x0; + int index; + + for (index = 0; index < code->const_nr; ++index) { + if (code->constant[index] == cp) + break; + } + + if (index >= code->const_nr) { + if (index >= R500_US_NUM_CONST_REGS) { + ERROR("Out of hw constants!\n"); + return reg; + } + + code->const_nr++; + code->constant[index] = cp; + } + + reg = index | REG_CONSTANT; + return reg; +} + +static GLuint make_src(struct r500_pfs_compile_state *cs, struct prog_src_register src) { + PROG_CODE; + GLuint reg; + switch (src.File) { + case PROGRAM_TEMPORARY: + reg = src.Index + code->temp_reg_offset; + break; + case PROGRAM_INPUT: + reg = cs->inputs[src.Index].reg; + break; + case PROGRAM_LOCAL_PARAM: + reg = emit_const4fv(cs, + cs->compiler->fp->mesa_program.Base.LocalParams[src.Index]); + break; + case PROGRAM_ENV_PARAM: + reg = emit_const4fv(cs, + cs->compiler->fp->ctx->FragmentProgram.Parameters[src.Index]); + break; + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_CONSTANT: + reg = emit_const4fv(cs, + cs->compiler->fp->mesa_program.Base.Parameters->ParameterValues[src.Index]); + break; + default: + ERROR("Can't handle src.File %x\n", src.File); + reg = 0x0; + break; + } + return reg; +} + +static GLuint make_dest(struct r500_pfs_compile_state *cs, struct prog_dst_register dest) { + PROG_CODE; + GLuint reg; + switch (dest.File) { + case PROGRAM_TEMPORARY: + reg = dest.Index + code->temp_reg_offset; + break; + case PROGRAM_OUTPUT: + /* Eventually we may need to handle multiple + * rendering targets... */ + reg = dest.Index; + break; + default: + ERROR("Can't handle dest.File %x\n", dest.File); + reg = 0x0; + break; + } + return reg; +} + +static void emit_tex(struct r500_pfs_compile_state *cs, + struct prog_instruction *fpi, int dest, int counter) +{ + PROG_CODE; + int hwsrc, hwdest; + GLuint mask; + + mask = fpi->DstReg.WriteMask << 11; + hwsrc = make_src(cs, fpi->SrcReg[0]); + + if (fpi->DstReg.File == PROGRAM_OUTPUT) { + hwdest = get_temp(cs, 0); + } else { + hwdest = dest; + } + + code->inst[counter].inst0 = R500_INST_TYPE_TEX | mask + | R500_INST_TEX_SEM_WAIT; + + code->inst[counter].inst1 = R500_TEX_ID(fpi->TexSrcUnit) + | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + + if (fpi->TexSrcTarget == TEXTURE_RECT_INDEX) + code->inst[counter].inst1 |= R500_TEX_UNSCALED; + + switch (fpi->Opcode) { + case OPCODE_KIL: + code->inst[counter].inst1 |= R500_TEX_INST_TEXKILL; + break; + case OPCODE_TEX: + code->inst[counter].inst1 |= R500_TEX_INST_LD; + break; + case OPCODE_TXB: + code->inst[counter].inst1 |= R500_TEX_INST_LODBIAS; + break; + case OPCODE_TXP: + code->inst[counter].inst1 |= R500_TEX_INST_PROJ; + break; + default: + ERROR("emit_tex can't handle opcode %x\n", fpi->Opcode); + } + + code->inst[counter].inst2 = R500_TEX_SRC_ADDR(hwsrc) + | MAKE_SWIZ_TEX_STRQ(make_strq_swizzle(fpi->SrcReg[0])) + /* | R500_TEX_SRC_S_SWIZ_R | R500_TEX_SRC_T_SWIZ_G + | R500_TEX_SRC_R_SWIZ_B | R500_TEX_SRC_Q_SWIZ_A */ + | R500_TEX_DST_ADDR(hwdest) + | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A; + + code->inst[counter].inst3 = 0x0; + code->inst[counter].inst4 = 0x0; + code->inst[counter].inst5 = 0x0; + + if (fpi->DstReg.File == PROGRAM_OUTPUT) { + counter++; + code->inst[counter].inst0 = R500_INST_TYPE_OUT + | R500_INST_TEX_SEM_WAIT | (mask << 4); + code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_OMOD_DISABLE; + code->inst[counter].inst4 = R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_ALPHA_SWIZ_A_A) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_ALPHA_SWIZ_A_A) + | R500_ALPHA_OMOD_DISABLE; + code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + } +} + +static void emit_alu(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi) { + PROG_CODE; + /* Ideally, we shouldn't have to explicitly clear memory here! */ + code->inst[counter].inst0 = 0x0; + code->inst[counter].inst1 = 0x0; + code->inst[counter].inst2 = 0x0; + code->inst[counter].inst3 = 0x0; + code->inst[counter].inst4 = 0x0; + code->inst[counter].inst5 = 0x0; + + if (fpi->DstReg.File == PROGRAM_OUTPUT) { + code->inst[counter].inst0 = R500_INST_TYPE_OUT; + + if (fpi->DstReg.Index == FRAG_RESULT_COLR) + code->inst[counter].inst0 |= (fpi->DstReg.WriteMask << 15); + + if (fpi->DstReg.Index == FRAG_RESULT_DEPR) { + code->inst[counter].inst4 |= R500_ALPHA_W_OMASK; + /* Notify the state emission! */ + cs->compiler->fp->writes_depth = GL_TRUE; + } + } else { + code->inst[counter].inst0 = R500_INST_TYPE_ALU + /* pixel_mask */ + | (fpi->DstReg.WriteMask << 11); + } + + code->inst[counter].inst0 |= R500_INST_TEX_SEM_WAIT; +} + +static void emit_mov(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, GLuint src_reg, GLuint swizzle, GLuint dest) { + PROG_CODE; + /* The r3xx shader uses MAD to implement MOV. We are using CMP, since + * it is technically more accurate and recommended by ATI/AMD. */ + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(src_reg); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src_reg); + /* (De)mangle the swizzle from Mesa to R500. */ + swizzle = make_rgba_swizzle(swizzle); + /* 0x1FF is 9 bits, size of an RGB swizzle. */ + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A((swizzle & 0x1ff)) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B((swizzle & 0x1ff)) + | R500_ALU_RGB_OMOD_DISABLE; + code->inst[counter].inst4 |= R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(GET_SWZ(swizzle, 3)) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(GET_SWZ(swizzle, 3)) + | R500_ALPHA_OMOD_DISABLE; + code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); +} + +static void emit_mad(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, int one, int two, int three) { + PROG_CODE; + /* Note: This code was all Corbin's. Corbin is a rather hackish coder. + * If you can make it pretty or fast, please do so! */ + emit_alu(cs, counter, fpi); + /* Common MAD stuff */ + code->inst[counter].inst4 |= R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(make_dest(cs, fpi->DstReg)); + code->inst[counter].inst5 |= R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(make_dest(cs, fpi->DstReg)); + switch (one) { + case 0: + case 1: + case 2: + code->inst[counter].inst1 |= R500_RGB_ADDR0(make_src(cs, fpi->SrcReg[one])); + code->inst[counter].inst2 |= R500_ALPHA_ADDR0(make_src(cs, fpi->SrcReg[one])); + code->inst[counter].inst3 |= R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[one])); + code->inst[counter].inst4 |= R500_ALPHA_SEL_A_SRC0 + | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[one])); + break; + case R500_SWIZZLE_ZERO: + code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO); + code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO); + break; + case R500_SWIZZLE_ONE: + code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE); + code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE); + break; + default: + ERROR("Bad src index in emit_mad: %d\n", one); + break; + } + switch (two) { + case 0: + case 1: + case 2: + code->inst[counter].inst1 |= R500_RGB_ADDR1(make_src(cs, fpi->SrcReg[two])); + code->inst[counter].inst2 |= R500_ALPHA_ADDR1(make_src(cs, fpi->SrcReg[two])); + code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[two])); + code->inst[counter].inst4 |= R500_ALPHA_SEL_B_SRC1 + | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[two])); + break; + case R500_SWIZZLE_ZERO: + code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); + code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); + break; + case R500_SWIZZLE_ONE: + code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); + code->inst[counter].inst4 |= MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); + break; + default: + ERROR("Bad src index in emit_mad: %d\n", two); + break; + } + switch (three) { + case 0: + case 1: + case 2: + code->inst[counter].inst1 |= R500_RGB_ADDR2(make_src(cs, fpi->SrcReg[three])); + code->inst[counter].inst2 |= R500_ALPHA_ADDR2(make_src(cs, fpi->SrcReg[three])); + code->inst[counter].inst5 |= R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[three])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[three])); + break; + case R500_SWIZZLE_ZERO: + code->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + break; + case R500_SWIZZLE_ONE: + code->inst[counter].inst5 |= MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ONE) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ONE); + break; + default: + ERROR("Bad src index in emit_mad: %d\n", three); + break; + } +} + +static void emit_sop(struct r500_pfs_compile_state *cs, int counter, struct prog_instruction *fpi, int opcode, GLuint src, GLuint swiz, GLuint dest) { + PROG_CODE; + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(src); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src); + code->inst[counter].inst4 |= R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(swiz); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_SOP + | R500_ALU_RGBA_ADDRD(dest); + switch (opcode) { + case OPCODE_COS: + code->inst[counter].inst4 |= R500_ALPHA_OP_COS; + break; + case OPCODE_EX2: + code->inst[counter].inst4 |= R500_ALPHA_OP_EX2; + break; + case OPCODE_LG2: + code->inst[counter].inst4 |= R500_ALPHA_OP_LN2; + break; + case OPCODE_RCP: + code->inst[counter].inst4 |= R500_ALPHA_OP_RCP; + break; + case OPCODE_RSQ: + code->inst[counter].inst4 |= R500_ALPHA_OP_RSQ; + break; + case OPCODE_SIN: + code->inst[counter].inst4 |= R500_ALPHA_OP_SIN; + break; + default: + ERROR("Bad opcode in emit_sop: %d\n", opcode); + break; + } +} + +static int do_inst(struct r500_pfs_compile_state *cs, struct prog_instruction *fpi, int counter) { + PROG_CODE; + GLuint src[3], dest = 0; + int temp_swiz = 0; + + if (fpi->Opcode != OPCODE_KIL) { + dest = make_dest(cs, fpi->DstReg); + } + + switch (fpi->Opcode) { + case OPCODE_ABS: + emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); + code->inst[counter].inst3 |= R500_ALU_RGB_MOD_A_ABS + | R500_ALU_RGB_MOD_B_ABS; + code->inst[counter].inst4 |= R500_ALPHA_MOD_A_ABS + | R500_ALPHA_MOD_B_ABS; + break; + case OPCODE_ADD: + /* Variation on MAD: 1*src0+src1 */ + emit_mad(cs, counter, fpi, R500_SWIZZLE_ONE, 0, 1); + break; + case OPCODE_CMP: + /* This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = make_src(cs, fpi->SrcReg[1]); + src[2] = make_src(cs, fpi->SrcReg[2]); + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[2]) + | R500_RGB_ADDR1(src[1]) | R500_RGB_ADDR2(src[0]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[2]) + | R500_ALPHA_ADDR1(src[1]) | R500_ALPHA_ADDR2(src[0]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[2])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst4 |= R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[2])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])); + break; + case OPCODE_COS: + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = emit_const4fv(cs, RCP_2PI); + code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + code->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(cs, 0)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); + code->inst[counter].inst4 = R500_ALPHA_OP_FRC + | R500_ALPHA_ADDRD(get_temp(cs, 1)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; + code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC + | R500_ALU_RGBA_ADDRD(get_temp(cs, 1)); + counter++; + emit_sop(cs, counter, fpi, OPCODE_COS, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_DP3: + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = make_src(cs, fpi->SrcReg[1]); + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst4 |= R500_ALPHA_OP_DP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP3 + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_DP4: + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = make_src(cs, fpi->SrcReg[1]); + /* Based on DP3 */ + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst4 |= R500_ALPHA_OP_DP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_DPH: + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = make_src(cs, fpi->SrcReg[1]); + /* Based on DP3 */ + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst4 |= R500_ALPHA_OP_DP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_DP4 + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_DST: + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = make_src(cs, fpi->SrcReg[1]); + /* [1, src0.y*src1.y, src0.z, src1.w] + * So basically MUL with lotsa swizzling. */ + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | R500_ALU_RGB_SEL_B_SRC1; + /* Select [1, y, z, 1] */ + temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x7) | R500_SWIZZLE_ONE; + code->inst[counter].inst3 |= MAKE_SWIZ_RGB_A(temp_swiz); + /* Select [1, y, 1, w] */ + temp_swiz = (make_rgb_swizzle(fpi->SrcReg[0]) & ~0x1c7) | R500_SWIZZLE_ONE | (R500_SWIZZLE_ONE << 6); + code->inst[counter].inst3 |= MAKE_SWIZ_RGB_B(temp_swiz); + code->inst[counter].inst4 |= R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + break; + case OPCODE_EX2: + src[0] = make_src(cs, fpi->SrcReg[0]); + emit_sop(cs, counter, fpi, OPCODE_EX2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_FLR: + src[0] = make_src(cs, fpi->SrcReg[0]); + code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + code->inst[counter].inst4 |= R500_ALPHA_OP_FRC + | R500_ALPHA_ADDRD(get_temp(cs, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC + | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)); + counter++; + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(get_temp(cs, 0)); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(get_temp(cs, 0)); + code->inst[counter].inst3 = MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | R500_ALU_RGB_SEL_B_SRC0 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); + code->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC1 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC1 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGBA_MOD_C_NEG; + break; + case OPCODE_FRC: + src[0] = make_src(cs, fpi->SrcReg[0]); + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])); + code->inst[counter].inst4 |= R500_ALPHA_OP_FRC + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_LG2: + src[0] = make_src(cs, fpi->SrcReg[0]); + emit_sop(cs, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_LIT: + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = emit_const4fv(cs, LIT); + /* First inst: MAX temp, input, [0, 0, 0, -128] + * Write: RG, A */ + code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARG << 11); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); + code->inst[counter].inst4 = R500_ALPHA_OP_MAX + | R500_ALPHA_ADDRD(get_temp(cs, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX + | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)); + counter++; + /* Second inst: MIN temp, temp, [x, x, x, 128] + * Write: A */ + code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_A << 11); + code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)) | R500_RGB_ADDR1(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)) | R500_ALPHA_ADDR1(src[1]); + /* code->inst[counter].inst3; */ + code->inst[counter].inst4 = R500_ALPHA_OP_MAX + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX + | R500_ALU_RGBA_ADDRD(dest); + counter++; + /* Third-fifth insts: POW temp, temp.y, temp.w + * Write: B */ + emit_sop(cs, counter, fpi, OPCODE_LG2, get_temp(cs, 0), SWIZZLE_Y, get_temp(cs, 1)); + code->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11); + counter++; + code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 1)) + | R500_RGB_ADDR1(get_temp(cs, 0)); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 1)) + | R500_ALPHA_ADDR1(get_temp(cs, 0)); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + code->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(cs, 1)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(cs, 1)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + emit_sop(cs, counter, fpi, OPCODE_EX2, get_temp(cs, 1), SWIZZLE_W, get_temp(cs, 0)); + code->inst[counter].inst0 |= (R500_WRITEMASK_B << 11); + counter++; + /* Sixth inst: CMP dest, temp.xxxx, temp.[1, x, z, 1], temp.[1, x, 0, 1]; + * Write: ARGB + * This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | R500_ALU_RGB_R_SWIZ_A_1 + | R500_ALU_RGB_G_SWIZ_A_R + | R500_ALU_RGB_B_SWIZ_A_B + | R500_ALU_RGB_SEL_B_SRC0 + | R500_ALU_RGB_R_SWIZ_B_1 + | R500_ALU_RGB_G_SWIZ_B_R + | R500_ALU_RGB_B_SWIZ_B_0; + code->inst[counter].inst4 |= R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_1 + | R500_ALPHA_SEL_B_SRC0 | R500_ALPHA_SWIZ_B_1; + code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC0 + | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 + | R500_ALU_RGBA_R_SWIZ_R + | R500_ALU_RGBA_G_SWIZ_R + | R500_ALU_RGBA_B_SWIZ_R + | R500_ALU_RGBA_A_SWIZ_R; + break; + case OPCODE_LRP: + /* src0 * src1 + INV(src0) * src2 + * 1) MUL src0, src1, temp + * 2) PRE 1-src0; MAD srcp, src2, temp */ + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = make_src(cs, fpi->SrcReg[1]); + src[2] = make_src(cs, fpi->SrcReg[2]); + code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | R500_INST_NOP | (R500_WRITEMASK_ARGB << 11); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(cs, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[2]) + | R500_RGB_ADDR2(get_temp(cs, 0)) + | R500_RGB_SRCP_OP_1_MINUS_RGB0; + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[2]) + | R500_ALPHA_ADDR2(get_temp(cs, 0)) + | R500_ALPHA_SRCP_OP_1_MINUS_A0; + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRCP + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + code->inst[counter].inst4 |= R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRCP | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC2 | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[2])) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[2])); + break; + case OPCODE_MAD: + emit_mad(cs, counter, fpi, 0, 1, 2); + break; + case OPCODE_MAX: + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = make_src(cs, fpi->SrcReg[1]); + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst4 |= R500_ALPHA_OP_MAX + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAX + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_MIN: + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = make_src(cs, fpi->SrcReg[1]); + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) | R500_RGB_ADDR1(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) | R500_ALPHA_ADDR1(src[1]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst4 |= R500_ALPHA_OP_MIN + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MIN + | R500_ALU_RGBA_ADDRD(dest); + break; + case OPCODE_MOV: + emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); + break; + case OPCODE_MUL: + /* Variation on MAD: src0*src1+0 */ + emit_mad(cs, counter, fpi, 0, 1, R500_SWIZZLE_ZERO); + break; + case OPCODE_POW: + /* POW(a,b) = EX2(LN2(a)*b) */ + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = make_src(cs, fpi->SrcReg[1]); + emit_sop(cs, counter, fpi, OPCODE_LG2, src[0], make_sop_swizzle(fpi->SrcReg[0]), get_temp(cs, 0)); + code->inst[counter].inst0 |= (R500_WRITEMASK_ARGB << 11); + counter++; + code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)) + | R500_RGB_ADDR1(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)) + | R500_ALPHA_ADDR1(src[1]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(make_rgb_swizzle(fpi->SrcReg[0])) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(cs, 1)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(cs, 1)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + emit_sop(cs, counter, fpi, OPCODE_EX2, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_RCP: + src[0] = make_src(cs, fpi->SrcReg[0]); + emit_sop(cs, counter, fpi, OPCODE_RCP, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_RSQ: + src[0] = make_src(cs, fpi->SrcReg[0]); + emit_sop(cs, counter, fpi, OPCODE_RSQ, src[0], make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_SCS: + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = emit_const4fv(cs, RCP_2PI); + code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + code->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(cs, 0)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); + code->inst[counter].inst4 = R500_ALPHA_OP_FRC + | R500_ALPHA_ADDRD(get_temp(cs, 1)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; + code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC + | R500_ALU_RGBA_ADDRD(get_temp(cs, 1)); + counter++; + /* Do a cosine, then a sine, masking out the channels we want to protect. */ + /* Cosine only goes in R (x) channel. */ + fpi->DstReg.WriteMask = 0x1; + emit_sop(cs, counter, fpi, OPCODE_COS, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); + counter++; + /* Sine only goes in G (y) channel. */ + fpi->DstReg.WriteMask = 0x2; + emit_sop(cs, counter, fpi, OPCODE_SIN, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_SGE: + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = make_src(cs, fpi->SrcReg[1]); + code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + code->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) + | R500_RGB_ADDR2(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) + | R500_ALPHA_ADDR2(src[1]); + code->inst[counter].inst3 = /* 1 */ + MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); + code->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(cs, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_MOD_C_NEG + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_ALPHA_MOD_C_NEG; + counter++; + /* This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ZERO); + code->inst[counter].inst4 |= R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ZERO); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC0 + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 + | R500_ALU_RGBA_A_SWIZ_A; + break; + case OPCODE_SIN: + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = emit_const4fv(cs, RCP_2PI); + code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_RGB); + code->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(cs, 0)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A + | R500_ALPHA_SEL_B_SRC1 | R500_ALPHA_SWIZ_B_A; + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + code->inst[counter].inst0 = R500_INST_TYPE_ALU | (R500_WRITEMASK_ARGB << 11); + code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_RGB); + code->inst[counter].inst4 = R500_ALPHA_OP_FRC + | R500_ALPHA_ADDRD(get_temp(cs, 1)) + | R500_ALPHA_SEL_A_SRC0 | R500_ALPHA_SWIZ_A_A; + code->inst[counter].inst5 = R500_ALU_RGBA_OP_FRC + | R500_ALU_RGBA_ADDRD(get_temp(cs, 1)); + counter++; + emit_sop(cs, counter, fpi, OPCODE_SIN, get_temp(cs, 1), make_sop_swizzle(fpi->SrcReg[0]), dest); + break; + case OPCODE_SLT: + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = make_src(cs, fpi->SrcReg[1]); + code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_ARGB << 11); + code->inst[counter].inst1 = R500_RGB_ADDR1(src[0]) + | R500_RGB_ADDR2(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR1(src[0]) + | R500_ALPHA_ADDR2(src[1]); + code->inst[counter].inst3 = /* 1 */ + MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ONE) + | R500_ALU_RGB_SEL_B_SRC1 | MAKE_SWIZ_RGB_B(make_rgb_swizzle(fpi->SrcReg[0])); + code->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(cs, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ONE) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[0])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(make_rgb_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_MOD_C_NEG + | R500_ALU_RGBA_ALPHA_SEL_C_SRC2 + | MAKE_SWIZ_ALPHA_C(make_alpha_swizzle(fpi->SrcReg[1])) + | R500_ALU_RGBA_ALPHA_MOD_C_NEG; + counter++; + /* This inst's selects need to be swapped as follows: + * 0 -> C ; 1 -> B ; 2 -> A */ + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(get_temp(cs, 0)); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(get_temp(cs, 0)); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(R500_SWIZ_RGB_ZERO) + | R500_ALU_RGB_SEL_B_SRC0 + | MAKE_SWIZ_RGB_B(R500_SWIZ_RGB_ONE); + code->inst[counter].inst4 |= R500_ALPHA_OP_CMP + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(R500_SWIZZLE_ZERO) + | R500_ALPHA_SEL_B_SRC0 | MAKE_SWIZ_ALPHA_B(R500_SWIZZLE_ONE); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_CMP + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC0 + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) + | R500_ALU_RGBA_ALPHA_SEL_C_SRC0 + | R500_ALU_RGBA_A_SWIZ_A; + break; + case OPCODE_SUB: + /* Variation on MAD: 1*src0-src1 */ + fpi->SrcReg[1].NegateBase = 0xF; /* NEG_XYZW */ + emit_mad(cs, counter, fpi, R500_SWIZZLE_ONE, 0, 1); + break; + case OPCODE_SWZ: + /* TODO: The rarer negation masks! */ + emit_mov(cs, counter, fpi, make_src(cs, fpi->SrcReg[0]), fpi->SrcReg[0].Swizzle, dest); + break; + case OPCODE_XPD: + /* src0 * src1 - src1 * src0 + * 1) MUL temp.xyz, src0.yzx, src1.zxy + * 2) MAD src0.zxy, src1.yzx, -temp.xyz */ + src[0] = make_src(cs, fpi->SrcReg[0]); + src[1] = make_src(cs, fpi->SrcReg[1]); + code->inst[counter].inst0 = R500_INST_TYPE_ALU | R500_INST_TEX_SEM_WAIT + | (R500_WRITEMASK_RGB << 11); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]); + /* Select [y, z, x] */ + temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]); + temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(temp_swiz); + /* Select [z, x, y] */ + temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]); + temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6); + code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(temp_swiz); + code->inst[counter].inst4 = R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(get_temp(cs, 0)) + | R500_ALPHA_SEL_A_SRC0 | MAKE_SWIZ_ALPHA_A(make_alpha_swizzle(fpi->SrcReg[0])) + | R500_ALPHA_SEL_B_SRC1 | MAKE_SWIZ_ALPHA_B(make_alpha_swizzle(fpi->SrcReg[1])); + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(get_temp(cs, 0)) + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_ZERO) + | MAKE_SWIZ_ALPHA_C(R500_SWIZZLE_ZERO); + counter++; + emit_alu(cs, counter, fpi); + code->inst[counter].inst1 = R500_RGB_ADDR0(src[0]) + | R500_RGB_ADDR1(src[1]) + | R500_RGB_ADDR2(get_temp(cs, 0)); + code->inst[counter].inst2 = R500_ALPHA_ADDR0(src[0]) + | R500_ALPHA_ADDR1(src[1]) + | R500_ALPHA_ADDR2(get_temp(cs, 0)); + /* Select [z, x, y] */ + temp_swiz = make_rgb_swizzle(fpi->SrcReg[0]); + temp_swiz = (GET_SWZ(temp_swiz, 2) << 0) | (GET_SWZ(temp_swiz, 0) << 3) | (GET_SWZ(temp_swiz, 1) << 6); + code->inst[counter].inst3 = R500_ALU_RGB_SEL_A_SRC0 + | MAKE_SWIZ_RGB_A(temp_swiz); + /* Select [y, z, x] */ + temp_swiz = make_rgb_swizzle(fpi->SrcReg[1]); + temp_swiz = (GET_SWZ(temp_swiz, 1) << 0) | (GET_SWZ(temp_swiz, 2) << 3) | (GET_SWZ(temp_swiz, 0) << 6); + code->inst[counter].inst3 |= R500_ALU_RGB_SEL_B_SRC1 + | MAKE_SWIZ_RGB_B(temp_swiz); + code->inst[counter].inst4 |= R500_ALPHA_OP_MAD + | R500_ALPHA_ADDRD(dest) + | R500_ALPHA_SWIZ_A_1 + | R500_ALPHA_SWIZ_B_1; + code->inst[counter].inst5 = R500_ALU_RGBA_OP_MAD + | R500_ALU_RGBA_ADDRD(dest) + | R500_ALU_RGBA_SEL_C_SRC2 + | MAKE_SWIZ_RGBA_C(R500_SWIZ_RGB_RGB) + | R500_ALU_RGBA_MOD_C_NEG + | R500_ALU_RGBA_A_SWIZ_0; + break; + case OPCODE_KIL: + case OPCODE_TEX: + case OPCODE_TXB: + case OPCODE_TXP: + emit_tex(cs, fpi, dest, counter); + if (fpi->DstReg.File == PROGRAM_OUTPUT) + counter++; + break; + default: + ERROR("unknown fpi->Opcode %s\n", _mesa_opcode_string(fpi->Opcode)); + break; + } + + /* Finishing touches */ + if (fpi->SaturateMode == SATURATE_ZERO_ONE) { + code->inst[counter].inst0 |= R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP; + } + + counter++; + + return counter; +} + +static GLboolean parse_program(struct r500_pfs_compile_state *cs) +{ + PROG_CODE; + int clauseidx, counter = 0; + + for (clauseidx = 0; clauseidx < cs->compiler->compiler.NumClauses; ++clauseidx) { + struct radeon_clause* clause = &cs->compiler->compiler.Clauses[clauseidx]; + int ip; + + for (ip = 0; ip < clause->NumInstructions; ++ip) { + counter = do_inst(cs, clause->Instructions + ip, counter); + + if (cs->compiler->fp->error) + return GL_FALSE; + } + } + + /* Finish him! (If it's an ALU/OUT instruction...) */ + if ((code->inst[counter-1].inst0 & 0x3) == 1) { + code->inst[counter-1].inst0 |= R500_INST_LAST; + } else { + /* We still need to put an output inst, right? */ + WARN_ONCE("Final FP instruction is not an OUT.\n"); + } + + cs->nrslots = counter; + + code->max_temp_idx++; + + return GL_TRUE; +} + +static void init_program(struct r500_pfs_compile_state *cs) +{ + PROG_CODE; + struct gl_fragment_program *mp = &cs->compiler->fp->mesa_program; + struct prog_instruction *fpi; + GLuint InputsRead = mp->Base.InputsRead; + GLuint temps_used = 0; + int i, j; + + /* New compile, reset tracking data */ + cs->compiler->fp->optimization = + driQueryOptioni(&cs->compiler->r300->radeon.optionCache, "fp_optimization"); + cs->compiler->fp->translated = GL_FALSE; + cs->compiler->fp->error = GL_FALSE; + code->const_nr = 0; + /* Size of pixel stack, plus 1. */ + code->max_temp_idx = 1; + /* Temp register offset. */ + code->temp_reg_offset = 0; + /* Whether or not we perform any depth writing. */ + cs->compiler->fp->writes_depth = GL_FALSE; + + for (i = 0; i < PFS_MAX_ALU_INST; i++) { + for (j = 0; j < 3; j++) { + cs->slot[i].vsrc[j] = SRC_CONST; + cs->slot[i].ssrc[j] = SRC_CONST; + } + } + + /* Work out what temps the Mesa inputs correspond to, this must match + * what setup_rs_unit does, which shouldn't be a problem as rs_unit + * configures itself based on the fragprog's InputsRead + * + * NOTE: this depends on get_hw_temp() allocating registers in order, + * starting from register 0, so we're just going to do that instead. + */ + + /* Texcoords come first */ + for (i = 0; i < cs->compiler->fp->ctx->Const.MaxTextureUnits; i++) { + if (InputsRead & (FRAG_BIT_TEX0 << i)) { + cs->inputs[FRAG_ATTRIB_TEX0 + i].refcount = 0; + cs->inputs[FRAG_ATTRIB_TEX0 + i].reg = + code->temp_reg_offset; + code->temp_reg_offset++; + } + } + InputsRead &= ~FRAG_BITS_TEX_ANY; + + /* fragment position treated as a texcoord */ + if (InputsRead & FRAG_BIT_WPOS) { + cs->inputs[FRAG_ATTRIB_WPOS].refcount = 0; + cs->inputs[FRAG_ATTRIB_WPOS].reg = + code->temp_reg_offset; + code->temp_reg_offset++; + } + InputsRead &= ~FRAG_BIT_WPOS; + + /* Then primary colour */ + if (InputsRead & FRAG_BIT_COL0) { + cs->inputs[FRAG_ATTRIB_COL0].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL0].reg = + code->temp_reg_offset; + code->temp_reg_offset++; + } + InputsRead &= ~FRAG_BIT_COL0; + + /* Secondary color */ + if (InputsRead & FRAG_BIT_COL1) { + cs->inputs[FRAG_ATTRIB_COL1].refcount = 0; + cs->inputs[FRAG_ATTRIB_COL1].reg = + code->temp_reg_offset; + code->temp_reg_offset++; + } + InputsRead &= ~FRAG_BIT_COL1; + + /* Anything else */ + if (InputsRead) { + WARN_ONCE("Don't know how to handle inputs 0x%x\n", InputsRead); + /* force read from hwreg 0 for now */ + for (i = 0; i < 32; i++) + if (InputsRead & (1 << i)) + cs->inputs[i].reg = 0; + } + + if (!mp->Base.Instructions) { + ERROR("No instructions found in program, going to go die now.\n"); + return; + } + + for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { + for (i = 0; i < 3; i++) { + if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) { + if (fpi->SrcReg[i].Index >= temps_used) + temps_used = fpi->SrcReg[i].Index + 1; + } + } + } + + cs->temp_in_use = temps_used + 1; + + code->max_temp_idx = code->temp_reg_offset + cs->temp_in_use; + + if (RADEON_DEBUG & DEBUG_PIXEL) + fprintf(stderr, "FP temp indices: code->max_temp_idx: %d cs->temp_in_use: %d\n", code->max_temp_idx, cs->temp_in_use); +} + +static void dumb_shader(struct r500_pfs_compile_state *cs) +{ + PROG_CODE; + code->inst[0].inst0 = R500_INST_TYPE_TEX + | R500_INST_TEX_SEM_WAIT + | R500_INST_RGB_WMASK_R + | R500_INST_RGB_WMASK_G + | R500_INST_RGB_WMASK_B + | R500_INST_ALPHA_WMASK + | R500_INST_RGB_CLAMP + | R500_INST_ALPHA_CLAMP; + code->inst[0].inst1 = R500_TEX_ID(0) + | R500_TEX_INST_LD + | R500_TEX_SEM_ACQUIRE + | R500_TEX_IGNORE_UNCOVERED; + code->inst[0].inst2 = R500_TEX_SRC_ADDR(0) + | R500_TEX_SRC_S_SWIZ_R + | R500_TEX_SRC_T_SWIZ_G + | R500_TEX_DST_ADDR(0) + | R500_TEX_DST_R_SWIZ_R + | R500_TEX_DST_G_SWIZ_G + | R500_TEX_DST_B_SWIZ_B + | R500_TEX_DST_A_SWIZ_A; + code->inst[0].inst3 = R500_DX_ADDR(0) + | R500_DX_S_SWIZ_R + | R500_DX_T_SWIZ_R + | R500_DX_R_SWIZ_R + | R500_DX_Q_SWIZ_R + | R500_DY_ADDR(0) + | R500_DY_S_SWIZ_R + | R500_DY_T_SWIZ_R + | R500_DY_R_SWIZ_R + | R500_DY_Q_SWIZ_R; + code->inst[0].inst4 = 0x0; + code->inst[0].inst5 = 0x0; + + code->inst[1].inst0 = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | + R500_INST_LAST | + R500_INST_RGB_OMASK_R | + R500_INST_RGB_OMASK_G | + R500_INST_RGB_OMASK_B | + R500_INST_ALPHA_OMASK; + code->inst[1].inst1 = R500_RGB_ADDR0(0) | + R500_RGB_ADDR1(0) | + R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | + R500_RGB_ADDR2_CONST | + R500_RGB_SRCP_OP_1_MINUS_2RGB0; + code->inst[1].inst2 = R500_ALPHA_ADDR0(0) | + R500_ALPHA_ADDR1(0) | + R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | + R500_ALPHA_ADDR2_CONST | + R500_ALPHA_SRCP_OP_1_MINUS_2A0; + code->inst[1].inst3 = R500_ALU_RGB_SEL_A_SRC0 | + R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | + R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | + R500_ALU_RGB_R_SWIZ_B_1 | + R500_ALU_RGB_B_SWIZ_B_1 | + R500_ALU_RGB_G_SWIZ_B_1; + code->inst[1].inst4 = R500_ALPHA_OP_MAD | + R500_ALPHA_SWIZ_A_A | + R500_ALPHA_SWIZ_B_1; + code->inst[1].inst5 = R500_ALU_RGBA_OP_MAD | + R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | + R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0; + + cs->nrslots = 2; +} + +GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler) +{ + struct r500_pfs_compile_state cs; + struct r500_fragment_program_code *code = compiler->code; + + _mesa_memset(&cs, 0, sizeof(cs)); + cs.compiler = compiler; + init_program(&cs); + + if (!parse_program(&cs)) { +#if 0 + ERROR("Huh. Couldn't parse program. There should be additional errors explaining why.\nUsing dumb shader...\n"); + dumb_shader(fp); + code->inst_offset = 0; + code->inst_end = cs.nrslots - 1; +#endif + return GL_FALSE; + } + + code->inst_offset = 0; + code->inst_end = cs.nrslots - 1; + + return GL_TRUE; +} -- cgit v1.2.3 From b97e48ffe3561288b5ebbebf7d439d4714fbb6a1 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 14 Jun 2008 08:56:03 -0700 Subject: r5xx: More FP rewriting; fix texrect FP insts. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 77 +------------------------- src/mesa/drivers/dri/r300/r500_fragprog_emit.c | 35 +++++++----- 2 files changed, 23 insertions(+), 89 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 197036008a..7cf32acd4e 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -39,8 +39,6 @@ static void reset_srcreg(struct prog_src_register* reg) * - extract operand swizzles * - introduce a temporary register when write masks are needed * - * \todo If/when r5xx uses the radeon_program architecture, this can probably - * be reused. */ static GLboolean transform_TEX( struct radeon_program_transform_context* context, @@ -58,6 +56,7 @@ static GLboolean transform_TEX( inst.Opcode != OPCODE_KIL) return GL_FALSE; + /* ARB_shadow & EXT_shadow_funcs */ if (inst.Opcode != OPCODE_KIL && compiler->fp->mesa_program.Base.ShadowSamplers & (1 << inst.TexSrcUnit)) { GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func; @@ -84,78 +83,6 @@ static GLboolean transform_TEX( inst.DstReg.WriteMask = WRITEMASK_XYZW; } - - /* Hardware uses [0..1]x[0..1] range for rectangle textures - * instead of [0..Width]x[0..Height]. - * Add a scaling instruction. - */ - if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) { - gl_state_index tokens[STATE_LENGTH] = { - STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0, - 0 - }; - - int tempreg = radeonCompilerAllocateTemporary(context->compiler); - int factor_index; - - tokens[2] = inst.TexSrcUnit; - factor_index = - _mesa_add_state_reference( - compiler->fp->mesa_program.Base.Parameters, tokens); - - tgt = radeonClauseInsertInstructions(context->compiler, context->dest, - context->dest->NumInstructions, 1); - - tgt->Opcode = OPCODE_MAD; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tempreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - tgt->SrcReg[1].File = PROGRAM_STATE_VAR; - tgt->SrcReg[1].Index = factor_index; - tgt->SrcReg[2].File = PROGRAM_BUILTIN; - tgt->SrcReg[2].Swizzle = SWIZZLE_0000; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tempreg; - } - - /* Texture operations do not support swizzles etc. in hardware, - * so emit an additional arithmetic operation if necessary. - */ - if (inst.SrcReg[0].Swizzle != SWIZZLE_NOOP || - inst.SrcReg[0].Abs || inst.SrcReg[0].NegateBase || inst.SrcReg[0].NegateAbs) { - int tempreg = radeonCompilerAllocateTemporary(context->compiler); - - tgt = radeonClauseInsertInstructions(context->compiler, context->dest, - context->dest->NumInstructions, 1); - - tgt->Opcode = OPCODE_MAD; - tgt->DstReg.File = PROGRAM_TEMPORARY; - tgt->DstReg.Index = tempreg; - tgt->SrcReg[0] = inst.SrcReg[0]; - tgt->SrcReg[1].File = PROGRAM_BUILTIN; - tgt->SrcReg[1].Swizzle = SWIZZLE_1111; - tgt->SrcReg[2].File = PROGRAM_BUILTIN; - tgt->SrcReg[2].Swizzle = SWIZZLE_0000; - - reset_srcreg(&inst.SrcReg[0]); - inst.SrcReg[0].File = PROGRAM_TEMPORARY; - inst.SrcReg[0].Index = tempreg; - } - - if (inst.Opcode != OPCODE_KIL) { - if (inst.DstReg.File != PROGRAM_TEMPORARY || - inst.DstReg.WriteMask != WRITEMASK_XYZW) { - int tempreg = radeonCompilerAllocateTemporary(context->compiler); - - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = tempreg; - inst.DstReg.WriteMask = WRITEMASK_XYZW; - destredirect = GL_TRUE; - } - } - tgt = radeonClauseInsertInstructions(context->compiler, context->dest, context->dest->NumInstructions, 1); _mesa_copy_instructions(tgt, &inst, 1); @@ -390,7 +317,7 @@ void r500TranslateFragmentShader(r300ContextPtr r300, radeonCompilerInit(&compiler.compiler, r300->radeon.glCtx, &fp->mesa_program.Base); - insert_WPOS_trailer(&compiler); + /* insert_WPOS_trailer(&compiler); */ struct radeon_program_transformation transformations[1] = { { &transform_TEX, &compiler } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c index 24182f18ca..3b515515ef 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c @@ -315,7 +315,7 @@ static GLuint make_src(struct r500_pfs_compile_state *cs, struct prog_src_regist break; case PROGRAM_ENV_PARAM: reg = emit_const4fv(cs, - cs->compiler->fp->ctx->FragmentProgram.Parameters[src.Index]); + cs->compiler->compiler.Ctx->FragmentProgram.Parameters[src.Index]); break; case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: @@ -1286,14 +1286,17 @@ static GLboolean parse_program(struct r500_pfs_compile_state *cs) PROG_CODE; int clauseidx, counter = 0; - for (clauseidx = 0; clauseidx < cs->compiler->compiler.NumClauses; ++clauseidx) { + for (clauseidx = 0; clauseidx < cs->compiler->compiler.NumClauses; clauseidx++) { struct radeon_clause* clause = &cs->compiler->compiler.Clauses[clauseidx]; + struct prog_instruction* fpi; + int ip; - for (ip = 0; ip < clause->NumInstructions; ++ip) { - counter = do_inst(cs, clause->Instructions + ip, counter); + for (ip = 0; ip < clause->NumInstructions; ip++) { + fpi = clause->Instructions + ip; + counter = do_inst(cs, fpi, counter); - if (cs->compiler->fp->error) + if (cs->compiler->fp->error == GL_TRUE) return GL_FALSE; } } @@ -1397,20 +1400,24 @@ static void init_program(struct r500_pfs_compile_state *cs) cs->inputs[i].reg = 0; } - if (!mp->Base.Instructions) { - ERROR("No instructions found in program, going to go die now.\n"); - return; - } + int clauseidx; + + for (clauseidx = 0; clauseidx < cs->compiler->compiler.NumClauses; ++clauseidx) { + struct radeon_clause* clause = &cs->compiler->compiler.Clauses[clauseidx]; + int ip; - for (fpi = mp->Base.Instructions; fpi->Opcode != OPCODE_END; fpi++) { - for (i = 0; i < 3; i++) { - if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) { - if (fpi->SrcReg[i].Index >= temps_used) - temps_used = fpi->SrcReg[i].Index + 1; + for (ip = 0; ip < clause->NumInstructions; ip++) { + fpi = clause->Instructions + ip; + for (i = 0; i < 3; i++) { + if (fpi->SrcReg[i].File == PROGRAM_TEMPORARY) { + if (fpi->SrcReg[i].Index >= temps_used) + temps_used = fpi->SrcReg[i].Index + 1; + } } } } + cs->temp_in_use = temps_used + 1; code->max_temp_idx = code->temp_reg_offset + cs->temp_in_use; -- cgit v1.2.3 From 825fdfd0c1fc9c82c8a880dea62a7fffc15d7373 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 14 Jun 2008 20:26:10 +0200 Subject: r300_fragprog: Use less complex instructions MOV, ADD and MUL do not fit the hardware as well as MAD, but they are less complex and thus leave more room for future optimizations. --- src/mesa/drivers/dri/r300/r300_fragprog.c | 38 ++++++++----------------------- 1 file changed, 10 insertions(+), 28 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index da2dedece8..f8cc3ae772 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -89,21 +89,15 @@ static GLboolean transform_TEX( tgt = radeonClauseInsertInstructions(context->compiler, context->dest, context->dest->NumInstructions, 1); - tgt->Opcode = OPCODE_MAD; + tgt->Opcode = OPCODE_MOV; tgt->DstReg = inst.DstReg; tgt->SrcReg[0].File = PROGRAM_BUILTIN; - tgt->SrcReg[0].Swizzle = SWIZZLE_0000; - tgt->SrcReg[1].File = PROGRAM_BUILTIN; - tgt->SrcReg[1].Swizzle = SWIZZLE_0000; - tgt->SrcReg[2].File = PROGRAM_BUILTIN; - tgt->SrcReg[2].Swizzle = comparefunc == GL_ALWAYS ? SWIZZLE_1111 : SWIZZLE_0000; + tgt->SrcReg[0].Swizzle = comparefunc == GL_ALWAYS ? SWIZZLE_1111 : SWIZZLE_0000; return GL_TRUE; } - int tempreg = radeonCompilerAllocateTemporary(context->compiler); - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = tempreg; + inst.DstReg.Index = radeonCompilerAllocateTemporary(context->compiler); inst.DstReg.WriteMask = WRITEMASK_XYZW; } @@ -129,14 +123,12 @@ static GLboolean transform_TEX( tgt = radeonClauseInsertInstructions(context->compiler, context->dest, context->dest->NumInstructions, 1); - tgt->Opcode = OPCODE_MAD; + tgt->Opcode = OPCODE_MUL; tgt->DstReg.File = PROGRAM_TEMPORARY; tgt->DstReg.Index = tempreg; tgt->SrcReg[0] = inst.SrcReg[0]; tgt->SrcReg[1].File = PROGRAM_STATE_VAR; tgt->SrcReg[1].Index = factor_index; - tgt->SrcReg[2].File = PROGRAM_BUILTIN; - tgt->SrcReg[2].Swizzle = SWIZZLE_0000; reset_srcreg(&inst.SrcReg[0]); inst.SrcReg[0].File = PROGRAM_TEMPORARY; @@ -153,14 +145,10 @@ static GLboolean transform_TEX( tgt = radeonClauseInsertInstructions(context->compiler, context->dest, context->dest->NumInstructions, 1); - tgt->Opcode = OPCODE_MAD; + tgt->Opcode = OPCODE_MOV; tgt->DstReg.File = PROGRAM_TEMPORARY; tgt->DstReg.Index = tempreg; tgt->SrcReg[0] = inst.SrcReg[0]; - tgt->SrcReg[1].File = PROGRAM_BUILTIN; - tgt->SrcReg[1].Swizzle = SWIZZLE_1111; - tgt->SrcReg[2].File = PROGRAM_BUILTIN; - tgt->SrcReg[2].Swizzle = SWIZZLE_0000; reset_srcreg(&inst.SrcReg[0]); inst.SrcReg[0].File = PROGRAM_TEMPORARY; @@ -191,7 +179,7 @@ static GLboolean transform_TEX( tgt = radeonClauseInsertInstructions(context->compiler, context->dest, context->dest->NumInstructions, 2); - tgt[0].Opcode = OPCODE_MAD; + tgt[0].Opcode = OPCODE_ADD; tgt[0].DstReg = inst.DstReg; tgt[0].DstReg.WriteMask = orig_inst->DstReg.WriteMask; tgt[0].SrcReg[0].File = PROGRAM_TEMPORARY; @@ -200,10 +188,8 @@ static GLboolean transform_TEX( tgt[0].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z); else if (depthmode == 2) /* GL_ALPHA */ tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW; - tgt[0].SrcReg[1].File = PROGRAM_BUILTIN; - tgt[0].SrcReg[1].Swizzle = SWIZZLE_1111; - tgt[0].SrcReg[2] = inst.SrcReg[0]; - tgt[0].SrcReg[2].Swizzle = SWIZZLE_ZZZZ; + tgt[0].SrcReg[1] = inst.SrcReg[0]; + tgt[0].SrcReg[1].Swizzle = SWIZZLE_ZZZZ; /* Recall that SrcReg[0] is tex, SrcReg[2] is r and: * r < tex <=> -tex+r < 0 @@ -211,7 +197,7 @@ static GLboolean transform_TEX( if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL) tgt[0].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW; else - tgt[0].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW; + tgt[0].SrcReg[1].NegateBase = tgt[0].SrcReg[1].NegateBase ^ NEGATE_XYZW; tgt[1].Opcode = OPCODE_CMP; tgt[1].DstReg = orig_inst->DstReg; @@ -231,14 +217,10 @@ static GLboolean transform_TEX( tgt = radeonClauseInsertInstructions(context->compiler, context->dest, context->dest->NumInstructions, 1); - tgt->Opcode = OPCODE_MAD; + tgt->Opcode = OPCODE_MOV; tgt->DstReg = orig_inst->DstReg; tgt->SrcReg[0].File = PROGRAM_TEMPORARY; tgt->SrcReg[0].Index = inst.DstReg.Index; - tgt->SrcReg[1].File = PROGRAM_BUILTIN; - tgt->SrcReg[1].Swizzle = SWIZZLE_1111; - tgt->SrcReg[2].File = PROGRAM_BUILTIN; - tgt->SrcReg[2].Swizzle = SWIZZLE_0000; } return GL_TRUE; -- cgit v1.2.3 From 43da1189610fcaa5ade69620734a7b1e5caf84c4 Mon Sep 17 00:00:00 2001 From: Nicolai Haehnle Date: Sat, 14 Jun 2008 23:09:15 +0200 Subject: r300: Add radeonTransformALU and fix a bug in r300_fragprog DPH This new generic transform replaces "special" instructions by more generic variants. Hopefully, we will be able to share this code between r300 and r500. --- src/mesa/drivers/dri/r300/Makefile | 1 + src/mesa/drivers/dri/r300/r300_fragprog.c | 9 +- src/mesa/drivers/dri/r300/r300_fragprog_emit.c | 101 +-------- src/mesa/drivers/dri/r300/radeon_program_alu.c | 284 +++++++++++++++++++++++++ src/mesa/drivers/dri/r300/radeon_program_alu.h | 38 ++++ 5 files changed, 334 insertions(+), 99 deletions(-) create mode 100644 src/mesa/drivers/dri/r300/radeon_program_alu.c create mode 100644 src/mesa/drivers/dri/r300/radeon_program_alu.h (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile index 7b8f5f1384..d52b2b4c36 100644 --- a/src/mesa/drivers/dri/r300/Makefile +++ b/src/mesa/drivers/dri/r300/Makefile @@ -37,6 +37,7 @@ DRIVER_SOURCES = \ r300_tex.c \ r300_texstate.c \ radeon_program.c \ + radeon_program_alu.c \ r300_vertprog.c \ r300_fragprog.c \ r300_fragprog_emit.c \ diff --git a/src/mesa/drivers/dri/r300/r300_fragprog.c b/src/mesa/drivers/dri/r300/r300_fragprog.c index f8cc3ae772..6d24d266fe 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog.c @@ -49,6 +49,8 @@ #include "r300_fragprog.h" #include "r300_state.h" +#include "radeon_program_alu.h" + static void reset_srcreg(struct prog_src_register* reg) { @@ -396,12 +398,13 @@ void r300TranslateFragmentShader(r300ContextPtr r300, insert_WPOS_trailer(&compiler); - struct radeon_program_transformation transformations[1] = { - { &transform_TEX, &compiler } + struct radeon_program_transformation transformations[] = { + { &transform_TEX, &compiler }, + { &radeonTransformALU, 0 } }; radeonClauseLocalTransform(&compiler.compiler, &compiler.compiler.Clauses[0], - 1, transformations); + 2, transformations); if (RADEON_DEBUG & DEBUG_PIXEL) { _mesa_printf("Compiler state after transformations:\n"); diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c index aec202a129..9ba29feb40 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_emit.c @@ -838,6 +838,10 @@ static GLuint t_src(struct r300_pfs_compile_state *cs, /* no point swizzling ONE/ZERO/HALF constants... */ if (REG_GET_VSWZ(r) < SWIZZLE_111 || REG_GET_SSWZ(r) < SWIZZLE_ZERO) r = do_swizzle(cs, r, fpsrc.Swizzle, fpsrc.NegateBase); + if (fpsrc.Abs) + r = absolute(r); + if (fpsrc.NegateAbs) + r = negate(r); return r; } @@ -1309,7 +1313,7 @@ static int find_and_prepare_slot(struct r300_pfs_compile_state *cs, swz[i] = (s_swiz[REG_GET_SSWZ(src[i])].base + (srcpos[i] * s_swiz[REG_GET_SSWZ(src[i])]. - stride)) | ((src[i] & REG_NEGV_MASK) + stride)) | ((src[i] & REG_NEGS_MASK) ? ARG_NEG : 0) | ((src[i] & REG_ABS_MASK) @@ -1562,11 +1566,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst } switch (fpi->Opcode) { - case OPCODE_ABS: - src[0] = t_src(cs, fpi->SrcReg[0]); - emit_arith(cs, PFS_OP_MAD, dest, mask, - absolute(src[0]), pfs_one, pfs_zero, flags); - break; case OPCODE_ADD: src[0] = t_src(cs, fpi->SrcReg[0]); src[1] = t_src(cs, fpi->SrcReg[1]); @@ -1649,16 +1648,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst emit_arith(cs, PFS_OP_DP4, dest, mask, src[0], src[1], undef, flags); break; - case OPCODE_DPH: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - /* src0.xyz1 -> temp - * DP4 dest, temp, src1 - */ - emit_arith(cs, PFS_OP_DP4, dest, mask, - swizzle(src[0], X, Y, Z, ONE), src[1], - undef, flags); - break; case OPCODE_DST: src[0] = t_src(cs, fpi->SrcReg[0]); src[1] = t_src(cs, fpi->SrcReg[1]); @@ -1685,18 +1674,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst emit_arith(cs, PFS_OP_EX2, dest, mask, src[0], undef, undef, flags); break; - case OPCODE_FLR: - src[0] = t_src(cs, fpi->SrcReg[0]); - temp[0] = get_temp_reg(cs); - /* FRC temp, src0 - * MAD dest, src0, 1.0, -temp - */ - emit_arith(cs, PFS_OP_FRC, temp[0], mask, - keep(src[0]), undef, undef, 0); - emit_arith(cs, PFS_OP_MAD, dest, mask, - src[0], pfs_one, negate(temp[0]), flags); - free_temp(cs, temp[0]); - break; case OPCODE_FRC: src[0] = t_src(cs, fpi->SrcReg[0]); emit_arith(cs, PFS_OP_FRC, dest, mask, @@ -1751,7 +1728,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst src[0], src[1], undef, flags); break; case OPCODE_MOV: - case OPCODE_SWZ: src[0] = t_src(cs, fpi->SrcReg[0]); emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], pfs_one, pfs_zero, flags); @@ -1762,18 +1738,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst emit_arith(cs, PFS_OP_MAD, dest, mask, src[0], src[1], pfs_zero, flags); break; - case OPCODE_POW: - src[0] = t_scalar_src(cs, fpi->SrcReg[0]); - src[1] = t_scalar_src(cs, fpi->SrcReg[1]); - temp[0] = get_temp_reg(cs); - emit_arith(cs, PFS_OP_LG2, temp[0], WRITEMASK_W, - src[0], undef, undef, 0); - emit_arith(cs, PFS_OP_MAD, temp[0], WRITEMASK_W, - temp[0], src[1], pfs_zero, 0); - emit_arith(cs, PFS_OP_EX2, dest, fpi->DstReg.WriteMask, - temp[0], undef, undef, 0); - free_temp(cs, temp[0]); - break; case OPCODE_RCP: src[0] = t_scalar_src(cs, fpi->SrcReg[0]); emit_arith(cs, PFS_OP_RCP, dest, mask, @@ -1852,19 +1816,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst free_temp(cs, temp[0]); free_temp(cs, temp[1]); break; - case OPCODE_SGE: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - temp[0] = get_temp_reg(cs); - /* temp = src0 - src1 - * dest.c = (temp.c < 0.0) ? 0 : 1 - */ - emit_arith(cs, PFS_OP_MAD, temp[0], mask, - src[0], pfs_one, negate(src[1]), 0); - emit_arith(cs, PFS_OP_CMP, dest, mask, - pfs_one, pfs_zero, temp[0], 0); - free_temp(cs, temp[0]); - break; case OPCODE_SIN: /* * using a parabola: @@ -1918,25 +1869,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst free_temp(cs, temp[0]); break; - case OPCODE_SLT: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - temp[0] = get_temp_reg(cs); - /* temp = src0 - src1 - * dest.c = (temp.c < 0.0) ? 1 : 0 - */ - emit_arith(cs, PFS_OP_MAD, temp[0], mask, - src[0], pfs_one, negate(src[1]), 0); - emit_arith(cs, PFS_OP_CMP, dest, mask, - pfs_zero, pfs_one, temp[0], 0); - free_temp(cs, temp[0]); - break; - case OPCODE_SUB: - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - emit_arith(cs, PFS_OP_MAD, dest, mask, - src[0], pfs_one, negate(src[1]), flags); - break; case OPCODE_TEX: emit_tex(cs, fpi, R300_TEX_OP_LD); break; @@ -1946,29 +1878,6 @@ static void emit_instruction(struct r300_pfs_compile_state *cs, struct prog_inst case OPCODE_TXP: emit_tex(cs, fpi, R300_TEX_OP_TXP); break; - case OPCODE_XPD:{ - src[0] = t_src(cs, fpi->SrcReg[0]); - src[1] = t_src(cs, fpi->SrcReg[1]); - temp[0] = get_temp_reg(cs); - /* temp = src0.zxy * src1.yzx */ - emit_arith(cs, PFS_OP_MAD, temp[0], - WRITEMASK_XYZ, swizzle(keep(src[0]), - Z, X, Y, W), - swizzle(keep(src[1]), Y, Z, X, W), - pfs_zero, 0); - /* dest.xyz = src0.yzx * src1.zxy - temp - * dest.w = undefined - * */ - emit_arith(cs, PFS_OP_MAD, dest, - mask & WRITEMASK_XYZ, swizzle(src[0], - Y, Z, - X, W), - swizzle(src[1], Z, X, Y, W), - negate(temp[0]), flags); - /* cleanup */ - free_temp(cs, temp[0]); - break; - } default: ERROR("unknown fpi->Opcode %d\n", fpi->Opcode); break; diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.c b/src/mesa/drivers/dri/r300/radeon_program_alu.c new file mode 100644 index 0000000000..7fe940a7d7 --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.c @@ -0,0 +1,284 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/** + * @file + * + * Shareable transformations that transform "special" ALU instructions + * into ALU instructions that are supported by hardware. + * + */ + +#include "radeon_program_alu.h" + + +static struct prog_instruction *emit1(struct radeon_program_transform_context* ctx, + gl_inst_opcode Opcode, struct prog_dst_register DstReg, + struct prog_src_register SrcReg) +{ + struct prog_instruction *fpi = + radeonClauseInsertInstructions(ctx->compiler, ctx->dest, + ctx->dest->NumInstructions, 1); + + fpi->Opcode = Opcode; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg; + return fpi; +} + +static struct prog_instruction *emit2(struct radeon_program_transform_context* ctx, + gl_inst_opcode Opcode, struct prog_dst_register DstReg, + struct prog_src_register SrcReg0, struct prog_src_register SrcReg1) +{ + struct prog_instruction *fpi = + radeonClauseInsertInstructions(ctx->compiler, ctx->dest, + ctx->dest->NumInstructions, 1); + + fpi->Opcode = Opcode; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg0; + fpi->SrcReg[1] = SrcReg1; + return fpi; +} + +static struct prog_instruction *emit3(struct radeon_program_transform_context* ctx, + gl_inst_opcode Opcode, struct prog_dst_register DstReg, + struct prog_src_register SrcReg0, struct prog_src_register SrcReg1, + struct prog_src_register SrcReg2) +{ + struct prog_instruction *fpi = + radeonClauseInsertInstructions(ctx->compiler, ctx->dest, + ctx->dest->NumInstructions, 1); + + fpi->Opcode = Opcode; + fpi->DstReg = DstReg; + fpi->SrcReg[0] = SrcReg0; + fpi->SrcReg[1] = SrcReg1; + fpi->SrcReg[2] = SrcReg2; + return fpi; +} + +static void set_swizzle(struct prog_src_register *SrcReg, int coordinate, int swz) +{ + SrcReg->Swizzle &= ~(7 << (3*coordinate)); + SrcReg->Swizzle |= swz << (3*coordinate); +} + +static void set_negate_base(struct prog_src_register *SrcReg, int coordinate, int negate) +{ + SrcReg->NegateBase &= ~(1 << coordinate); + SrcReg->NegateBase |= (negate << coordinate); +} + +static struct prog_dst_register dstreg(int file, int index) +{ + struct prog_dst_register dst; + dst.File = file; + dst.Index = index; + dst.WriteMask = WRITEMASK_XYZW; + dst.CondMask = COND_TR; + dst.CondSwizzle = SWIZZLE_NOOP; + dst.CondSrc = 0; + dst.pad = 0; + return dst; +} + +static const struct prog_src_register builtin_zero = { + .File = PROGRAM_BUILTIN, + .Index = 0, + .Swizzle = SWIZZLE_0000 +}; +static const struct prog_src_register builtin_one = { + .File = PROGRAM_BUILTIN, + .Index = 0, + .Swizzle = SWIZZLE_1111 +}; +static const struct prog_src_register srcreg_undefined = { + .File = PROGRAM_UNDEFINED, + .Index = 0, + .Swizzle = SWIZZLE_NOOP +}; + +static struct prog_src_register srcreg(int file, int index) +{ + struct prog_src_register src = srcreg_undefined; + src.File = file; + src.Index = index; + return src; +} + +static struct prog_src_register negate(struct prog_src_register reg) +{ + struct prog_src_register newreg = reg; + newreg.NegateAbs = !newreg.NegateAbs; + return newreg; +} + +static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w) +{ + struct prog_src_register swizzled = reg; + swizzled.Swizzle = MAKE_SWIZZLE4( + GET_SWZ(reg.Swizzle, x), + GET_SWZ(reg.Swizzle, y), + GET_SWZ(reg.Swizzle, z), + GET_SWZ(reg.Swizzle, w)); + return swizzled; +} + +static struct prog_src_register scalar(struct prog_src_register reg) +{ + return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); +} + +static void transform_ABS(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + struct prog_src_register src = inst->SrcReg[0]; + src.Abs = 1; + src.NegateBase = 0; + src.NegateAbs = 0; + emit1(ctx, OPCODE_MOV, inst->DstReg, src); +} + +static void transform_DPH(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + struct prog_src_register src0 = inst->SrcReg[0]; + if (src0.NegateAbs) { + if (src0.Abs) { + int tempreg = radeonCompilerAllocateTemporary(ctx->compiler); + emit1(ctx, OPCODE_MOV, dstreg(PROGRAM_TEMPORARY, tempreg), src0); + src0 = srcreg(src0.File, src0.Index); + } else { + src0.NegateAbs = 0; + src0.NegateBase ^= NEGATE_XYZW; + } + } + set_swizzle(&src0, 3, SWIZZLE_ONE); + set_negate_base(&src0, 3, 0); + emit2(ctx, OPCODE_DP4, inst->DstReg, src0, inst->SrcReg[1]); +} + +static void transform_FLR(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + int tempreg = radeonCompilerAllocateTemporary(ctx->compiler); + emit1(ctx, OPCODE_FRC, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]); + emit2(ctx, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg))); +} + +static void transform_POW(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + int tempreg = radeonCompilerAllocateTemporary(ctx->compiler); + struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg); + struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg); + tempdst.WriteMask = WRITEMASK_W; + tempsrc.Swizzle = SWIZZLE_WWWW; + + emit1(ctx, OPCODE_LG2, tempdst, scalar(inst->SrcReg[0])); + emit2(ctx, OPCODE_MUL, tempdst, tempsrc, scalar(inst->SrcReg[1])); + emit1(ctx, OPCODE_EX2, inst->DstReg, tempsrc); +} + +static void transform_SGE(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + int tempreg = radeonCompilerAllocateTemporary(ctx->compiler); + + emit2(ctx, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); + emit3(ctx, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one); +} + +static void transform_SLT(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + int tempreg = radeonCompilerAllocateTemporary(ctx->compiler); + + emit2(ctx, OPCODE_ADD, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1])); + emit3(ctx, OPCODE_CMP, inst->DstReg, srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero); +} + +static void transform_SUB(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + emit2(ctx, OPCODE_ADD, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1])); +} + +static void transform_SWZ(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + emit1(ctx, OPCODE_MOV, inst->DstReg, inst->SrcReg[0]); +} + +static void transform_XPD(struct radeon_program_transform_context* ctx, + struct prog_instruction* inst) +{ + int tempreg = radeonCompilerAllocateTemporary(ctx->compiler); + + emit2(ctx, OPCODE_MUL, dstreg(PROGRAM_TEMPORARY, tempreg), + swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W)); + emit3(ctx, OPCODE_MAD, inst->DstReg, + swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W), + swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W), + negate(srcreg(PROGRAM_TEMPORARY, tempreg))); +} + + +/** + * Can be used as a transformation for @ref radeonClauseLocalTransform, + * no userData necessary. + * + * Eliminates the following ALU instructions: + * ABS, DPH, FLR, POW, SGE, SLT, SUB, SWZ, XPD + * using: + * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP + * + * @note should be applicable to R300 and R500 fragment programs. + * + * @todo add LIT here as well? + */ +GLboolean radeonTransformALU( + struct radeon_program_transform_context* ctx, + struct prog_instruction* inst, + void* unused) +{ + switch(inst->Opcode) { + case OPCODE_ABS: transform_ABS(ctx, inst); return GL_TRUE; + case OPCODE_DPH: transform_DPH(ctx, inst); return GL_TRUE; + case OPCODE_FLR: transform_FLR(ctx, inst); return GL_TRUE; + case OPCODE_POW: transform_POW(ctx, inst); return GL_TRUE; + case OPCODE_SGE: transform_SGE(ctx, inst); return GL_TRUE; + case OPCODE_SLT: transform_SLT(ctx, inst); return GL_TRUE; + case OPCODE_SUB: transform_SUB(ctx, inst); return GL_TRUE; + case OPCODE_SWZ: transform_SWZ(ctx, inst); return GL_TRUE; + case OPCODE_XPD: transform_XPD(ctx, inst); return GL_TRUE; + default: + return GL_FALSE; + } +} diff --git a/src/mesa/drivers/dri/r300/radeon_program_alu.h b/src/mesa/drivers/dri/r300/radeon_program_alu.h new file mode 100644 index 0000000000..940459624f --- /dev/null +++ b/src/mesa/drivers/dri/r300/radeon_program_alu.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2008 Nicolai Haehnle. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __RADEON_PROGRAM_ALU_H_ +#define __RADEON_PROGRAM_ALU_H_ + +#include "radeon_program.h" + +GLboolean radeonTransformALU( + struct radeon_program_transform_context*, + struct prog_instruction*, + void*); + +#endif /* __RADEON_PROGRAM_ALU_H_ */ -- cgit v1.2.3 From 7413c55d93ccdf7ba58932d6186f1e6230df0e8a Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 14 Jun 2008 17:51:04 -0700 Subject: r5xx: Detangle FP fallback and translation switches. r5xx should fallback if it encounters a bad FP. TODO: Re-enable the dumb shader so we don't have to completely fallback. --- src/mesa/drivers/dri/r300/r500_fragprog.c | 7 +++---- src/mesa/drivers/dri/r300/r500_fragprog_emit.c | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 7cf32acd4e..0483c09ba0 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -331,20 +331,19 @@ void r500TranslateFragmentShader(r300ContextPtr r300, radeonCompilerDump(&compiler.compiler); } - if (!r500FragmentProgramEmit(&compiler)) - fp->error = GL_TRUE; + fp->translated = r500FragmentProgramEmit(&compiler); radeonCompilerCleanup(&compiler.compiler); r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM); - fp->translated = GL_TRUE; if (RADEON_DEBUG & DEBUG_PIXEL) { fprintf(stderr, "Mesa program:\n"); fprintf(stderr, "-------------\n"); _mesa_print_program(&fp->mesa_program.Base); fflush(stdout); - dump_program(&fp->code); + if (fp->translated) + dump_program(&fp->code); } } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c index 3b515515ef..59c9048653 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c @@ -1296,7 +1296,7 @@ static GLboolean parse_program(struct r500_pfs_compile_state *cs) fpi = clause->Instructions + ip; counter = do_inst(cs, fpi, counter); - if (cs->compiler->fp->error == GL_TRUE) + if (cs->compiler->fp->error) return GL_FALSE; } } -- cgit v1.2.3 From f1ae5d4b4471108049080b34c482a083ae74860a Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sat, 14 Jun 2008 19:32:33 -0700 Subject: r5xx: Enable fragment.position, partial ARB_shadow. I don't like PROGRAM_BUILTIN; could we either patch Mesa or just use a different constant? --- src/mesa/drivers/dri/r300/r500_fragprog.c | 18 ++++++---------- src/mesa/drivers/dri/r300/r500_fragprog_emit.c | 30 +++++++++++++++----------- 2 files changed, 25 insertions(+), 23 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r500_fragprog.c b/src/mesa/drivers/dri/r300/r500_fragprog.c index 0483c09ba0..5d72ec2784 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog.c @@ -65,21 +65,17 @@ static GLboolean transform_TEX( tgt = radeonClauseInsertInstructions(context->compiler, context->dest, context->dest->NumInstructions, 1); - tgt->Opcode = OPCODE_MAD; - tgt->DstReg = inst.DstReg; + tgt->Opcode = OPCODE_MOV; + tgt->DstReg.File = inst.DstReg.File; + tgt->DstReg.Index = inst.DstReg.Index; + tgt->DstReg.WriteMask = inst.DstReg.WriteMask; tgt->SrcReg[0].File = PROGRAM_BUILTIN; - tgt->SrcReg[0].Swizzle = SWIZZLE_0000; - tgt->SrcReg[1].File = PROGRAM_BUILTIN; - tgt->SrcReg[1].Swizzle = SWIZZLE_0000; - tgt->SrcReg[2].File = PROGRAM_BUILTIN; - tgt->SrcReg[2].Swizzle = comparefunc == GL_ALWAYS ? SWIZZLE_1111 : SWIZZLE_0000; + tgt->SrcReg[0].Swizzle = comparefunc == GL_ALWAYS ? SWIZZLE_1111 : SWIZZLE_0000; return GL_TRUE; } - int tempreg = radeonCompilerAllocateTemporary(context->compiler); - inst.DstReg.File = PROGRAM_TEMPORARY; - inst.DstReg.Index = tempreg; + inst.DstReg.Index = radeonCompilerAllocateTemporary(context->compiler); inst.DstReg.WriteMask = WRITEMASK_XYZW; } @@ -317,7 +313,7 @@ void r500TranslateFragmentShader(r300ContextPtr r300, radeonCompilerInit(&compiler.compiler, r300->radeon.glCtx, &fp->mesa_program.Base); - /* insert_WPOS_trailer(&compiler); */ + insert_WPOS_trailer(&compiler); struct radeon_program_transformation transformations[1] = { { &transform_TEX, &compiler } diff --git a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c index 59c9048653..e1ad342690 100644 --- a/src/mesa/drivers/dri/r300/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/r500_fragprog_emit.c @@ -323,6 +323,9 @@ static GLuint make_src(struct r500_pfs_compile_state *cs, struct prog_src_regist reg = emit_const4fv(cs, cs->compiler->fp->mesa_program.Base.Parameters->ParameterValues[src.Index]); break; + case PROGRAM_BUILTIN: + reg = 0x0; + break; default: ERROR("Can't handle src.File %x\n", src.File); reg = 0x0; @@ -335,18 +338,21 @@ static GLuint make_dest(struct r500_pfs_compile_state *cs, struct prog_dst_regis PROG_CODE; GLuint reg; switch (dest.File) { - case PROGRAM_TEMPORARY: - reg = dest.Index + code->temp_reg_offset; - break; - case PROGRAM_OUTPUT: - /* Eventually we may need to handle multiple - * rendering targets... */ - reg = dest.Index; - break; - default: - ERROR("Can't handle dest.File %x\n", dest.File); - reg = 0x0; - break; + case PROGRAM_TEMPORARY: + reg = dest.Index + code->temp_reg_offset; + break; + case PROGRAM_OUTPUT: + /* Eventually we may need to handle multiple + * rendering targets... */ + reg = dest.Index; + break; + case PROGRAM_BUILTIN: + reg = 0x0; + break; + default: + ERROR("Can't handle dest.File %x\n", dest.File); + reg = 0x0; + break; } return reg; } -- cgit v1.2.3 From 4fc66be2964efb0fbce101264b898636f8f1c715 Mon Sep 17 00:00:00 2001 From: Dan Nicholson Date: Sun, 15 Jun 2008 09:06:27 -0700 Subject: No more drivers/xorg Don't try to clean in xorg since GLcore is gone. --- src/mesa/Makefile | 1 - 1 file changed, 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 5f84f72bb2..27eb186abe 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -129,7 +129,6 @@ clean: -rm -f depend depend.bak libmesa.a libglapi.a -rm -f drivers/*/*.o -@cd drivers/dri && $(MAKE) clean - -@cd drivers/xorg && $(MAKE) clean -@cd drivers/x11 && $(MAKE) clean -@cd drivers/osmesa && $(MAKE) clean -@cd x86 && $(MAKE) clean -- cgit v1.2.3 From 6e6ca40f29551b6f6cedea954874930e6470b0b9 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 15 Jun 2008 17:14:07 -0700 Subject: r300: Unbreak LOD biasing, a bit. Needs a bit more work on submission. --- src/mesa/drivers/dri/r300/r300_tex.c | 67 ++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 34 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 16321af7d4..4a989fcde9 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -968,6 +968,7 @@ r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level, static void r300TexEnv(GLcontext * ctx, GLenum target, GLenum pname, const GLfloat * param) { + r300ContextPtr rmesa = R300_CONTEXT(ctx); if (RADEON_DEBUG & DEBUG_STATE) { fprintf(stderr, "%s( %s )\n", __FUNCTION__, _mesa_lookup_enum_by_nr(pname)); @@ -978,41 +979,39 @@ static void r300TexEnv(GLcontext * ctx, GLenum target, * between them according to _ReallyEnabled. */ switch (pname) { - case GL_TEXTURE_LOD_BIAS_EXT:{ -#if 0 /* Needs to be relocated in order to make sure we got the right tmu */ - GLfloat bias, min; - GLuint b; - - /* The R300's LOD bias is a signed 2's complement value with a - * range of -16.0 <= bias < 16.0. - * - * NOTE: Add a small bias to the bias for conform mipsel.c test. - */ - bias = *param + .01; - min = - driQueryOptionb(&rmesa->radeon.optionCache, - "no_neg_lod_bias") ? 0.0 : -16.0; - bias = CLAMP(bias, min, 16.0); - - /* 0.0 - 16.0 == 0x0 - 0x1000 */ - /* 0.0 - -16.0 == 0x1001 - 0x1fff */ - b = 0x1000 / 16.0 * bias; - b &= R300_LOD_BIAS_MASK; - - if (b != - (rmesa->hw.tex.unknown1. - cmd[R300_TEX_VALUE_0 + - unit] & R300_LOD_BIAS_MASK)) { - R300_STATECHANGE(rmesa, tex.unknown1); - rmesa->hw.tex.unknown1.cmd[R300_TEX_VALUE_0 + - unit] &= - ~R300_LOD_BIAS_MASK; - rmesa->hw.tex.unknown1.cmd[R300_TEX_VALUE_0 + - unit] |= b; - } -#endif - break; + case GL_TEXTURE_LOD_BIAS_EXT: { + fprintf(stderr, "LOD Bias: %f\n", *param); + /* Needs to be relocated in order to make sure we got the right tmu */ + GLfloat bias, min; + GLuint b; + + /* The R300's LOD bias is a signed 2's complement value with a + * range of -16.0 <= bias < 16.0. + * + * NOTE: Add a small bias to the bias for conform mipsel.c test. + */ + bias = *param + .01; + min = driQueryOptionb(&rmesa->radeon.optionCache, + "no_neg_lod_bias") ? 0.0 : -16.0; + bias = CLAMP(bias, min, 16.0); + + b = (unsigned int)fabsf(ceilf(bias*31)); + if (signbit(bias)) { + b ^= 0x3ff; /* 10 bits */ } + fprintf(stderr, "LOD Bias (hex): 0x%x\n", b); + b <<= 3; + b &= R300_LOD_BIAS_MASK; + + int unit = 1; + + if (b != (rmesa->hw.tex.filter_1.cmd[R300_TEX_CMD_0 + unit] & R300_LOD_BIAS_MASK)) { + R300_STATECHANGE(rmesa, tex.filter_1); + rmesa->hw.tex.filter_1.cmd[R300_TEX_CMD_0 + unit] &= ~R300_LOD_BIAS_MASK; + rmesa->hw.tex.filter_1.cmd[R300_TEX_CMD_0 + unit] |= b; + } + break; + } default: return; -- cgit v1.2.3 From f7b8a13d65e97b8a9812f813cc670e6b712d583d Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 15 Jun 2008 17:30:07 -0700 Subject: Oops, misordered a few instructions. Not like it matters, though, since it's not taking effect yet. --- src/mesa/drivers/dri/r300/r300_tex.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 4a989fcde9..317b961898 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -185,6 +185,7 @@ static GLuint aniso_filter(GLfloat anisotropy) */ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy) { + fprintf(stderr, "Here I am!\n"); t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK); t->filter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY; @@ -1006,9 +1007,9 @@ static void r300TexEnv(GLcontext * ctx, GLenum target, int unit = 1; if (b != (rmesa->hw.tex.filter_1.cmd[R300_TEX_CMD_0 + unit] & R300_LOD_BIAS_MASK)) { - R300_STATECHANGE(rmesa, tex.filter_1); rmesa->hw.tex.filter_1.cmd[R300_TEX_CMD_0 + unit] &= ~R300_LOD_BIAS_MASK; rmesa->hw.tex.filter_1.cmd[R300_TEX_CMD_0 + unit] |= b; + R300_STATECHANGE(rmesa, tex.filter_1); } break; } -- cgit v1.2.3 From 776580a6afff9c6403140737ea50f36e9323528a Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Sun, 15 Jun 2008 23:54:32 -0700 Subject: r300: Enable LOD bias state emission. Properly set t->filter_1 for r300_state to emit. Expect buggies as people see LOD bias enabled for the first time... --- src/mesa/drivers/dri/r300/r300_tex.c | 65 +++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 16 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 317b961898..85a6628e40 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -185,7 +185,6 @@ static GLuint aniso_filter(GLfloat anisotropy) */ static void r300SetTexFilter(r300TexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy) { - fprintf(stderr, "Here I am!\n"); t->filter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK); t->filter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY; @@ -243,6 +242,19 @@ static void r300SetTexBorderColor(r300TexObjPtr t, GLubyte c[4]) t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]); } +static void r300SetTexLodBias(r300TexObjPtr t, GLfloat bias) +{ + GLuint b; + b = (unsigned int)fabsf(ceilf(bias*31)); + if (signbit(bias)) { + b ^= 0x3ff; /* 10 bits */ + } + b <<= 3; + b &= R300_LOD_BIAS_MASK; + + t->filter_1 |= b; +} + /** * Allocate space for and load the mesa images into the texture memory block. * This will happen before drawing with a new texture, or drawing with a @@ -271,6 +283,7 @@ static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj) r300SetTexWrap(t, texObj->WrapS, texObj->WrapT, texObj->WrapR); r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); r300SetTexBorderColor(t, texObj->_BorderChan); + r300SetTexLodBias(t, texObj->LodBias); } return t; @@ -966,6 +979,33 @@ r300TexSubImage3D(GLcontext * ctx, GLenum target, GLint level, t->dirty_images[0] |= (1 << level); } +/* This feels like a prime target for code reuse, so I'm putting it here + * instead of inlining it in TexEnv. */ +static GLenum r300TexUnitTarget(struct gl_texture_unit *unit) { + if (unit->_ReallyEnabled & (TEXTURE_RECT_BIT)) { + return GL_TEXTURE_RECTANGLE_NV; + } else if (unit->_ReallyEnabled & (TEXTURE_1D_BIT)) { + return GL_TEXTURE_1D; + } else if (unit->_ReallyEnabled & (TEXTURE_2D_BIT)) { + return GL_TEXTURE_2D; + } else if (unit->_ReallyEnabled & (TEXTURE_3D_BIT)) { + return GL_TEXTURE_3D; + } else if (unit->_ReallyEnabled & (TEXTURE_CUBE_BIT)) { + return GL_TEXTURE_CUBE_MAP; + } + if (unit->Enabled & (TEXTURE_RECT_BIT)) { + return GL_TEXTURE_RECTANGLE_NV; + } else if (unit->Enabled & (TEXTURE_1D_BIT)) { + return GL_TEXTURE_1D; + } else if (unit->Enabled & (TEXTURE_2D_BIT)) { + return GL_TEXTURE_2D; + } else if (unit->Enabled & (TEXTURE_3D_BIT)) { + return GL_TEXTURE_3D; + } else if (unit->Enabled & (TEXTURE_CUBE_BIT)) { + return GL_TEXTURE_CUBE_MAP; + } +} + static void r300TexEnv(GLcontext * ctx, GLenum target, GLenum pname, const GLfloat * param) { @@ -981,10 +1021,8 @@ static void r300TexEnv(GLcontext * ctx, GLenum target, */ switch (pname) { case GL_TEXTURE_LOD_BIAS_EXT: { - fprintf(stderr, "LOD Bias: %f\n", *param); /* Needs to be relocated in order to make sure we got the right tmu */ GLfloat bias, min; - GLuint b; /* The R300's LOD bias is a signed 2's complement value with a * range of -16.0 <= bias < 16.0. @@ -996,21 +1034,16 @@ static void r300TexEnv(GLcontext * ctx, GLenum target, "no_neg_lod_bias") ? 0.0 : -16.0; bias = CLAMP(bias, min, 16.0); - b = (unsigned int)fabsf(ceilf(bias*31)); - if (signbit(bias)) { - b ^= 0x3ff; /* 10 bits */ - } - fprintf(stderr, "LOD Bias (hex): 0x%x\n", b); - b <<= 3; - b &= R300_LOD_BIAS_MASK; + /* This next part feels quite hackish; + * is there a cleaner way? */ + struct gl_texture_object *texObj; + GLenum target = r300TexUnitTarget(&ctx->Texture.Unit[ctx->Texture.CurrentUnit]); + texObj = _mesa_select_tex_object(ctx, &ctx->Texture.Unit[ctx->Texture.CurrentUnit], target); + r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData; + texObj->LodBias = bias; - int unit = 1; + r300SetTexLodBias(t, texObj->LodBias); - if (b != (rmesa->hw.tex.filter_1.cmd[R300_TEX_CMD_0 + unit] & R300_LOD_BIAS_MASK)) { - rmesa->hw.tex.filter_1.cmd[R300_TEX_CMD_0 + unit] &= ~R300_LOD_BIAS_MASK; - rmesa->hw.tex.filter_1.cmd[R300_TEX_CMD_0 + unit] |= b; - R300_STATECHANGE(rmesa, tex.filter_1); - } break; } -- cgit v1.2.3 From 1738a9a535cbde3a4b9f1b57f2a47139b4affdb9 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 16 Jun 2008 01:02:16 -0700 Subject: r300: Forgot to clear old state before writing new state. Oooops. Hehe. --- src/mesa/drivers/dri/r300/r300_tex.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index 85a6628e40..b672baca1b 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -252,6 +252,7 @@ static void r300SetTexLodBias(r300TexObjPtr t, GLfloat bias) b <<= 3; b &= R300_LOD_BIAS_MASK; + t->filter_1 &= ~R300_LOD_BIAS_MASK; t->filter_1 |= b; } -- cgit v1.2.3 From 588abd66966a672a93e87cd577802255193bebcd Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 16 Jun 2008 01:21:42 -0700 Subject: r300: Fix new incarnation of bug 3195. tests/bug_3195 doesn't render right, but at least it doesn't segfault this way. --- progs/demos/lodbias.c | 2 +- src/mesa/drivers/dri/r300/r300_context.h | 1 + src/mesa/drivers/dri/r300/r300_tex.c | 19 ++++++++++++++----- 3 files changed, 16 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/progs/demos/lodbias.c b/progs/demos/lodbias.c index c5a2a1b457..28215d46c0 100644 --- a/progs/demos/lodbias.c +++ b/progs/demos/lodbias.c @@ -40,7 +40,7 @@ static GLfloat Xrot = 0, Yrot = -30, Zrot = 0; static GLboolean Anim = GL_TRUE; -static GLint Bias = 0, BiasStepSign = +1; /* ints avoid fp precision problem */ +static GLint Bias = 4, BiasStepSign = +1; /* ints avoid fp precision problem */ static GLint BiasMin = -400, BiasMax = 400; diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index 1a90f5cabb..6279a67ab1 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -925,6 +925,7 @@ struct r300_context { driTextureObject swapped; int texture_depth; float initialMaxAnisotropy; + float LODBias; /* Clientdata textures; */ diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index b672baca1b..a3d6f90ef6 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -1005,6 +1005,7 @@ static GLenum r300TexUnitTarget(struct gl_texture_unit *unit) { } else if (unit->Enabled & (TEXTURE_CUBE_BIT)) { return GL_TEXTURE_CUBE_MAP; } + return 0; } static void r300TexEnv(GLcontext * ctx, GLenum target, @@ -1037,13 +1038,17 @@ static void r300TexEnv(GLcontext * ctx, GLenum target, /* This next part feels quite hackish; * is there a cleaner way? */ - struct gl_texture_object *texObj; GLenum target = r300TexUnitTarget(&ctx->Texture.Unit[ctx->Texture.CurrentUnit]); - texObj = _mesa_select_tex_object(ctx, &ctx->Texture.Unit[ctx->Texture.CurrentUnit], target); - r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData; - texObj->LodBias = bias; + if (target) { + struct gl_texture_object *texObj = + _mesa_select_tex_object(ctx, &ctx->Texture.Unit[ctx->Texture.CurrentUnit], target); + r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData; + texObj->LodBias = bias; - r300SetTexLodBias(t, texObj->LodBias); + r300SetTexLodBias(t, texObj->LodBias); + } + + rmesa->LODBias = bias; break; } @@ -1175,6 +1180,10 @@ static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx, return NULL; obj->MaxAnisotropy = rmesa->initialMaxAnisotropy; + /* Attempt to fill LOD bias, if previously set. + * Should start at 0.0, which won't affect the HW. */ + obj->LodBias = rmesa->LODBias; + r300AllocTexObj(obj); return obj; } -- cgit v1.2.3 From bbce58802d9bf94eadbb0d0c0211a0768f8302aa Mon Sep 17 00:00:00 2001 From: "H. Verbeet" Date: Mon, 16 Jun 2008 09:40:26 -0600 Subject: Don't compute vertex fog when fragment program is active. --- src/mesa/tnl/t_context.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c index 3b8dd18bbb..e772a06eda 100644 --- a/src/mesa/tnl/t_context.c +++ b/src/mesa/tnl/t_context.c @@ -105,10 +105,10 @@ _tnl_InvalidateState( GLcontext *ctx, GLuint new_state ) const struct gl_vertex_program *vp = ctx->VertexProgram._Current; const struct gl_fragment_program *fp = ctx->FragmentProgram._Current; - if (new_state & (_NEW_HINT)) { + if (new_state & (_NEW_HINT | _NEW_PROGRAM)) { ASSERT(tnl->AllowVertexFog || tnl->AllowPixelFog); - tnl->_DoVertexFog = (tnl->AllowVertexFog && (ctx->Hint.Fog != GL_NICEST)) - || !tnl->AllowPixelFog; + tnl->_DoVertexFog = ((tnl->AllowVertexFog && (ctx->Hint.Fog != GL_NICEST)) + || !tnl->AllowPixelFog) && !fp; } tnl->pipeline.new_state |= new_state; @@ -202,8 +202,8 @@ _tnl_allow_vertex_fog( GLcontext *ctx, GLboolean value ) { TNLcontext *tnl = TNL_CONTEXT(ctx); tnl->AllowVertexFog = value; - tnl->_DoVertexFog = (tnl->AllowVertexFog && (ctx->Hint.Fog != GL_NICEST)) - || !tnl->AllowPixelFog; + tnl->_DoVertexFog = ((tnl->AllowVertexFog && (ctx->Hint.Fog != GL_NICEST)) + || !tnl->AllowPixelFog) && !ctx->FragmentProgram._Current; } @@ -212,7 +212,7 @@ _tnl_allow_pixel_fog( GLcontext *ctx, GLboolean value ) { TNLcontext *tnl = TNL_CONTEXT(ctx); tnl->AllowPixelFog = value; - tnl->_DoVertexFog = (tnl->AllowVertexFog && (ctx->Hint.Fog != GL_NICEST)) - || !tnl->AllowPixelFog; + tnl->_DoVertexFog = ((tnl->AllowVertexFog && (ctx->Hint.Fog != GL_NICEST)) + || !tnl->AllowPixelFog) && !ctx->FragmentProgram._Current; } -- cgit v1.2.3 From ec2d0decbd739df99cac4baba57fe0005bf4894d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 16 Jun 2008 10:03:05 -0600 Subject: mesa: allocate pixel zoom arrays on heap, not stack Fixes stack overflow on Windows. --- src/mesa/swrast/s_context.c | 2 ++ src/mesa/swrast/s_context.h | 1 + src/mesa/swrast/s_zoom.c | 27 +++++++++++++++++---------- 3 files changed, 20 insertions(+), 10 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c index 8e6b28bb4c..eab9ff3a9e 100644 --- a/src/mesa/swrast/s_context.c +++ b/src/mesa/swrast/s_context.c @@ -832,6 +832,8 @@ _swrast_DestroyContext( GLcontext *ctx ) } FREE( swrast->SpanArrays ); + if (swrast->ZoomedArrays) + FREE( swrast->ZoomedArrays ); FREE( swrast->TexelBuffer ); FREE( swrast ); diff --git a/src/mesa/swrast/s_context.h b/src/mesa/swrast/s_context.h index 3dcc3ed16e..a511d1c9a1 100644 --- a/src/mesa/swrast/s_context.h +++ b/src/mesa/swrast/s_context.h @@ -206,6 +206,7 @@ typedef struct * on some systems. */ SWspanarrays *SpanArrays; + SWspanarrays *ZoomedArrays; /**< For pixel zooming */ /** * Used to buffer N GL_POINTS, instead of rendering one by one. diff --git a/src/mesa/swrast/s_zoom.c b/src/mesa/swrast/s_zoom.c index 9f1a4c6f0a..4473078b78 100644 --- a/src/mesa/swrast/s_zoom.c +++ b/src/mesa/swrast/s_zoom.c @@ -1,8 +1,8 @@ /* * Mesa 3-D graphics library - * Version: 6.5.2 + * Version: 7.1 * - * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -130,8 +130,8 @@ static void zoom_span( GLcontext *ctx, GLint imgX, GLint imgY, const SWspan *span, const GLvoid *src, GLenum format ) { + SWcontext *swrast = SWRAST_CONTEXT(ctx); SWspan zoomed; - SWspanarrays zoomed_arrays; /* this is big! */ GLint x0, x1, y0, y1; GLint zoomedWidth; @@ -140,6 +140,13 @@ zoom_span( GLcontext *ctx, GLint imgX, GLint imgY, const SWspan *span, return; /* totally clipped */ } + if (!swrast->ZoomedArrays) { + /* allocate on demand */ + swrast->ZoomedArrays = (SWspanarrays *) CALLOC(sizeof(SWspanarrays)); + if (!swrast->ZoomedArrays) + return; + } + zoomedWidth = x1 - x0; ASSERT(zoomedWidth > 0); ASSERT(zoomedWidth <= MAX_WIDTH); @@ -151,14 +158,14 @@ zoom_span( GLcontext *ctx, GLint imgX, GLint imgY, const SWspan *span, INIT_SPAN(zoomed, GL_BITMAP); zoomed.x = x0; zoomed.end = zoomedWidth; - zoomed.array = &zoomed_arrays; - zoomed_arrays.ChanType = span->array->ChanType; - if (zoomed_arrays.ChanType == GL_UNSIGNED_BYTE) - zoomed_arrays.rgba = (GLchan (*)[4]) zoomed_arrays.rgba8; - else if (zoomed_arrays.ChanType == GL_UNSIGNED_SHORT) - zoomed_arrays.rgba = (GLchan (*)[4]) zoomed_arrays.rgba16; + zoomed.array = swrast->ZoomedArrays; + zoomed.array->ChanType = span->array->ChanType; + if (zoomed.array->ChanType == GL_UNSIGNED_BYTE) + zoomed.array->rgba = (GLchan (*)[4]) zoomed.array->rgba8; + else if (zoomed.array->ChanType == GL_UNSIGNED_SHORT) + zoomed.array->rgba = (GLchan (*)[4]) zoomed.array->rgba16; else - zoomed_arrays.rgba = (GLchan (*)[4]) zoomed_arrays.attribs[FRAG_ATTRIB_COL0]; + zoomed.array->rgba = (GLchan (*)[4]) zoomed.array->attribs[FRAG_ATTRIB_COL0]; COPY_4V(zoomed.attrStart[FRAG_ATTRIB_WPOS], span->attrStart[FRAG_ATTRIB_WPOS]); COPY_4V(zoomed.attrStepX[FRAG_ATTRIB_WPOS], span->attrStepX[FRAG_ATTRIB_WPOS]); -- cgit v1.2.3 From e9d1889891ce32ffb3d6425f1e70510ea2c3e24b Mon Sep 17 00:00:00 2001 From: "H. Verbeet" Date: Mon, 16 Jun 2008 10:40:32 -0600 Subject: Compute/setup fog coordinates even if vertex program is enabled. Only when fog's enabled, of course. In the usual case, this is just a pass-through operation. --- src/mesa/tnl/t_pipeline.c | 2 +- src/mesa/tnl/t_vb_fog.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/tnl/t_pipeline.c b/src/mesa/tnl/t_pipeline.c index 2a0ed8852a..357ef1e24b 100644 --- a/src/mesa/tnl/t_pipeline.c +++ b/src/mesa/tnl/t_pipeline.c @@ -199,11 +199,11 @@ const struct tnl_pipeline_stage *_tnl_default_pipeline[] = { &_tnl_vertex_transform_stage, &_tnl_normal_transform_stage, &_tnl_lighting_stage, - &_tnl_fog_coordinate_stage, &_tnl_texgen_stage, &_tnl_texture_transform_stage, &_tnl_point_attenuation_stage, &_tnl_vertex_program_stage, + &_tnl_fog_coordinate_stage, &_tnl_render_stage, NULL }; diff --git a/src/mesa/tnl/t_vb_fog.c b/src/mesa/tnl/t_vb_fog.c index d8bca3820b..99e5a09c75 100644 --- a/src/mesa/tnl/t_vb_fog.c +++ b/src/mesa/tnl/t_vb_fog.c @@ -148,11 +148,11 @@ run_fog_stage(GLcontext *ctx, struct tnl_pipeline_stage *stage) struct fog_stage_data *store = FOG_STAGE_DATA(stage); GLvector4f *input; - if (!ctx->Fog.Enabled || ctx->VertexProgram._Current) - return GL_TRUE; + if (!ctx->Fog.Enabled) + return GL_TRUE; - if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) { + if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT && !ctx->VertexProgram._Current) { GLuint i; GLfloat *coord; /* Fog is computed from vertex or fragment Z values */ -- cgit v1.2.3 From 27b7f896d4492021f8b8c35a69c28f5854ab1f90 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Mon, 16 Jun 2008 11:37:32 -0700 Subject: r300: Make LOD bias a persistent environment variable. Per airlied's suggestion. --- src/mesa/drivers/dri/r300/r300_state.c | 15 ++++++++++++++- src/mesa/drivers/dri/r300/r300_tex.c | 13 ------------- 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index a86e4bc344..55d3d55e90 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1381,6 +1381,18 @@ static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings) } } +static GLuint r300CalculateTexLodBias(GLfloat bias) +{ + GLuint b; + b = (unsigned int)fabsf(ceilf(bias*31)); + if (signbit(bias)) { + b ^= 0x3ff; /* 10 bits */ + } + b <<= 3; + b &= R300_LOD_BIAS_MASK; + return b; +} + static void r300SetupTextures(GLcontext * ctx) { int i, mtu; @@ -1444,7 +1456,8 @@ static void r300SetupTextures(GLcontext * ctx) r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 + hw_tmu] = gen_fixed_filter(t->filter) | (hw_tmu << 28); - r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->filter_1; + r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->filter_1 + | r300CalculateTexLodBias(r300->LODBias); r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] = t->size; r300->hw.tex.format.cmd[R300_TEX_VALUE_0 + diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index a3d6f90ef6..5f54bcad9a 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -284,7 +284,6 @@ static r300TexObjPtr r300AllocTexObj(struct gl_texture_object *texObj) r300SetTexWrap(t, texObj->WrapS, texObj->WrapT, texObj->WrapR); r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy); r300SetTexBorderColor(t, texObj->_BorderChan); - r300SetTexLodBias(t, texObj->LodBias); } return t; @@ -1036,18 +1035,6 @@ static void r300TexEnv(GLcontext * ctx, GLenum target, "no_neg_lod_bias") ? 0.0 : -16.0; bias = CLAMP(bias, min, 16.0); - /* This next part feels quite hackish; - * is there a cleaner way? */ - GLenum target = r300TexUnitTarget(&ctx->Texture.Unit[ctx->Texture.CurrentUnit]); - if (target) { - struct gl_texture_object *texObj = - _mesa_select_tex_object(ctx, &ctx->Texture.Unit[ctx->Texture.CurrentUnit], target); - r300TexObjPtr t = (r300TexObjPtr) texObj->DriverData; - texObj->LodBias = bias; - - r300SetTexLodBias(t, texObj->LodBias); - } - rmesa->LODBias = bias; break; -- cgit v1.2.3 From a1523c61eaa47825f0e4ca9076b83dcca19e1bd1 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 17 Jun 2008 16:29:45 +1000 Subject: mesa: make mm.c use unsigned ints for offsets. If you have a GPU using this code and it has the offsets up in this space, this fails. --- src/mesa/main/mm.c | 8 ++++---- src/mesa/main/mm.h | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/mm.c b/src/mesa/main/mm.c index 846c329c70..fb7809ed22 100644 --- a/src/mesa/main/mm.c +++ b/src/mesa/main/mm.c @@ -53,7 +53,7 @@ mmDumpMemInfo(const struct mem_block *heap) } struct mem_block * -mmInit(int ofs, int size) +mmInit(unsigned int ofs, int size) { struct mem_block *heap, *block; @@ -91,7 +91,7 @@ mmInit(int ofs, int size) static struct mem_block * SliceBlock(struct mem_block *p, - int startofs, int size, + unsigned int startofs, int size, int reserved, int alignment) { struct mem_block *newblock; @@ -164,8 +164,8 @@ mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) { struct mem_block *p; const int mask = (1 << align2)-1; - int startofs = 0; - int endofs; + unsigned int startofs = 0; + unsigned int endofs; if (!heap || align2 < 0 || size <= 0) return NULL; diff --git a/src/mesa/main/mm.h b/src/mesa/main/mm.h index 26d59fff13..5ad3ffd6d1 100644 --- a/src/mesa/main/mm.h +++ b/src/mesa/main/mm.h @@ -39,7 +39,8 @@ struct mem_block { struct mem_block *next, *prev; struct mem_block *next_free, *prev_free; struct mem_block *heap; - int ofs,size; + unsigned int ofs; + int size; unsigned int free:1; unsigned int reserved:1; }; @@ -50,7 +51,7 @@ struct mem_block { * input: total size in bytes * return: a heap pointer if OK, NULL if error */ -extern struct mem_block *mmInit(int ofs, int size); +extern struct mem_block *mmInit(unsigned int ofs, int size); /** * Allocate 'size' bytes with 2^align2 bytes alignment, -- cgit v1.2.3 From 5b5bf2187497e8de53ca20db6c4fd4a211714b98 Mon Sep 17 00:00:00 2001 From: Wilfried Holzke Date: Tue, 17 Jun 2008 10:07:47 -0600 Subject: assorted glide driver fixes --- src/mesa/drivers/x11/xm_glide.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/x11/xm_glide.c b/src/mesa/drivers/x11/xm_glide.c index ae4f4282db..cbd69b011a 100644 --- a/src/mesa/drivers/x11/xm_glide.c +++ b/src/mesa/drivers/x11/xm_glide.c @@ -64,7 +64,8 @@ FXcreateContext(XMesaVisual v, XMesaWindow w, XMesaContext c, XMesaBuffer b) attribs[numAttribs++] = FXMESA_NONE; /* [dBorca] we should take an envvar for `fxMesaSelectCurrentBoard'!!! */ - hw = fxMesaSelectCurrentBoard(0); +/* hw = fxMesaSelectCurrentBoard(0); */ + hw = GR_SSTTYPE_Voodoo2; /* if these fail, there's a new bug somewhere */ ASSERT(b->mesa_buffer.Width > 0); -- cgit v1.2.3 From 30640695400b9b27656893753ae6b62f2082ce9b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 17 Jun 2008 16:44:04 -0600 Subject: mesa: fix inconsistent use of GL_UNSIGNED_INT vs. GL_UNSIGNED_INT_24_8_EXT for Z unpacking --- src/mesa/main/image.c | 2 +- src/mesa/main/texstore.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c index 347cec66ef..8034daecae 100644 --- a/src/mesa/main/image.c +++ b/src/mesa/main/image.c @@ -3963,7 +3963,7 @@ _mesa_unpack_depth_span( const GLcontext *ctx, GLuint n, DEPTH_VALUES(GLuint, UINT_TO_FLOAT); break; case GL_UNSIGNED_INT_24_8_EXT: /* GL_EXT_packed_depth_stencil */ - if (dstType == GL_UNSIGNED_INT && + if (dstType == GL_UNSIGNED_INT_24_8_EXT && depthMax == 0xffffff && ctx->Pixel.DepthScale == 1.0 && ctx->Pixel.DepthBias == 0.0) { diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 5363e9e080..89774291f2 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -2322,7 +2322,7 @@ _mesa_texstore_ycbcr(TEXSTORE_PARAMS) GLboolean _mesa_texstore_z24_s8(TEXSTORE_PARAMS) { - const GLuint depthScale = 0xffffff; + const GLfloat depthScale = (GLfloat) 0xffffff; ASSERT(dstFormat == &_mesa_texformat_z24_s8); ASSERT(srcFormat == GL_DEPTH_STENCIL_EXT); @@ -2360,7 +2360,7 @@ _mesa_texstore_z24_s8(TEXSTORE_PARAMS) GLint i; /* the 24 depth bits will be in the high position: */ _mesa_unpack_depth_span(ctx, srcWidth, - GL_UNSIGNED_INT, /* dst type */ + GL_UNSIGNED_INT_24_8_EXT, /* dst type */ dstRow, /* dst addr */ depthScale, srcType, src, srcPacking); -- cgit v1.2.3 From 3e8aadee8beffaabd4e0c60c289b98124e288dcd Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 18 Jun 2008 15:33:33 +0800 Subject: i965: add support for Intel 4 series chipsets --- src/mesa/drivers/dri/intel/intel_chipset.h | 12 ++++++++++-- src/mesa/drivers/dri/intel/intel_context.c | 7 +++++++ 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h index 3c8fd2384e..4a5166263a 100644 --- a/src/mesa/drivers/dri/intel/intel_chipset.h +++ b/src/mesa/drivers/dri/intel/intel_chipset.h @@ -53,7 +53,11 @@ #define PCI_CHIP_I965_GM 0x2A02 #define PCI_CHIP_I965_GME 0x2A12 -#define PCI_CHIP_IGD_GM 0x2A42 +#define PCI_CHIP_IGD_GM 0x2A42 + +#define PCI_CHIP_IGD_E_G 0x2E02 +#define PCI_CHIP_Q45_G 0x2E12 +#define PCI_CHIP_G45_G 0x2E22 #define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \ devid == PCI_CHIP_I915_GM || \ @@ -63,7 +67,11 @@ devid == PCI_CHIP_I965_GME || \ devid == PCI_CHIP_IGD_GM) -#define IS_IGD(devid) (devid == PCI_CHIP_IGD_GM) +#define IS_IGD_GM(devid) (devid == PCI_CHIP_IGD_GM) +#define IS_G4X(devid) (devid == PCI_CHIP_IGD_E_G || \ + devid == PCI_CHIP_Q45_G || \ + devid == PCI_CHIP_G45_G) +#define IS_IGD(devid) (IS_IGD_GM(devid) || IS_G4X(devid)) #define IS_915(devid) (devid == PCI_CHIP_I915_G || \ devid == PCI_CHIP_E7221_G || \ diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 47e7d1afc2..80e2111b83 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -167,8 +167,15 @@ intelGetString(GLcontext * ctx, GLenum name) chipset = "Intel(R) 965GME/GLE"; break; case PCI_CHIP_IGD_GM: + case PCI_CHIP_IGD_E_G: chipset = "Intel(R) Integrated Graphics Device"; break; + case PCI_CHIP_G45_G: + chipset = "Intel(R) G45/G43"; + break; + case PCI_CHIP_Q45_G: + chipset = "Intel(R) Q45/Q43"; + break; default: chipset = "Unknown Intel Chipset"; break; -- cgit v1.2.3 From cf29ab3ba075905cca786b52617d7dc993f58033 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 18 Jun 2008 13:50:13 -0700 Subject: i915: Bug #14313: Fix accelerated (PBO) ReadPixels. Refactoring of mine in 02d5ba849197e19843dad164239b51f18fb16faf broke it by failing to understand that the masking was about sign extension. --- src/mesa/drivers/dri/intel/intel_blit.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index 4890826a19..414300c549 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -326,9 +326,6 @@ intelEmitCopyBlit(struct intel_context *intel, return; } - dst_pitch &= 0xffff; - src_pitch &= 0xffff; - /* Initial y values don't seem to work with negative pitches. If * we adjust the offsets manually (below), it seems to work fine. * @@ -359,7 +356,7 @@ intelEmitCopyBlit(struct intel_context *intel, BEGIN_BATCH(8, NO_LOOP_CLIPRECTS); OUT_BATCH(CMD); - OUT_BATCH(BR13 | dst_pitch); + OUT_BATCH(BR13 | ((uint16_t)dst_pitch)); OUT_BATCH((0 << 16) | dst_x); OUT_BATCH((h << 16) | dst_x2); OUT_RELOC(dst_buffer, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, -- cgit v1.2.3