diff options
Diffstat (limited to 'src')
787 files changed, 176673 insertions, 2695 deletions
diff --git a/src/SConscript b/src/SConscript new file mode 100644 index 0000000000..8e914af4f0 --- /dev/null +++ b/src/SConscript @@ -0,0 +1,8 @@ +Import('*') + +SConscript('gallium/SConscript') + +if 'mesa' in env['statetrackers']: + SConscript('mesa/SConscript') + +SConscript('gallium/winsys/SConscript') diff --git a/src/descrip.mms b/src/descrip.mms deleted file mode 100644 index 71b8ea16ac..0000000000 --- a/src/descrip.mms +++ /dev/null @@ -1,43 +0,0 @@ -# Makefile for Mesa for VMS -# contributed by Jouk Jansen joukj@hrem.stm.tudelft.nl - -.include [-]mms-config. - -all : - set default [.mesa] - $(MMS)$(MMSQUALIFIERS) - set default [-] -.ifdef SHARE - $(MMS)$(MMSQUALIFIERS) [-.lib]$(GL_SHAR) -.endif - set default [.glu] - $(MMS)$(MMSQUALIFIERS) - set default [-.glut.glx] - $(MMS)$(MMSQUALIFIERS) - set default [--] - -[-.lib]$(GL_SHAR) : [-.lib]$(GL_LIB) - @ WRITE_ SYS$OUTPUT " generating libmesa.opt" - @ library/extract=* [-.lib]$(GL_LIB) - @ OPEN_/WRITE FILE libmesa.opt - @ WRITE_ FILE "!" - @ WRITE_ FILE "! libmesa.opt generated by DESCRIP.$(MMS_EXT)" - @ WRITE_ FILE "!" - @ WRITE_ FILE "IDENTIFICATION=""mesa5.1""" - @ WRITE_ FILE "GSMATCH=LEQUAL,5,1 - @ WRITE_ FILE "libmesagl.obj" - @ write_ file "sys$share:decw$xextlibshr/share" - @ write_ file "sys$share:decw$xlibshr/share" - @ write_ file "sys$share:pthread$rtl/share" - @ CLOSE_ FILE - @ $(MMS)$(MMSQUALIFIERS)/ignore=warning mesa_vms - @ WRITE_ SYS$OUTPUT " linking ..." - @ LINK_/NODEB/SHARE=[-.lib]$(GL_SHAR)/MAP=libmesa.map/FULL libmesa.opt/opt,\ - mesa_vms.opt/opt - @ delete libmesagl.obj;* - -mesa_vms : - @ WRITE_ SYS$OUTPUT " generating libmesa.map ..." - @ LINK_/NODEB/NOSHARE/NOEXE/MAP=libmesa.map/FULL libmesa.opt/OPT - @ WRITE_ SYS$OUTPUT " analyzing libmesa.map ..." - @ @[-.vms]analyze_map.com libmesa.map mesa_vms.opt diff --git a/src/egl/Makefile b/src/egl/Makefile index fb4db8f9b9..024453f8bb 100644 --- a/src/egl/Makefile +++ b/src/egl/Makefile @@ -2,7 +2,7 @@ TOP = ../.. -SUBDIRS = main drivers/demo drivers/dri +SUBDIRS = main drivers default: subdirs diff --git a/src/egl/drivers/Makefile b/src/egl/drivers/Makefile new file mode 100644 index 0000000000..f0538f563f --- /dev/null +++ b/src/egl/drivers/Makefile @@ -0,0 +1,25 @@ +# src/egl/drivers/Makefile + +TOP = ../../.. +include $(TOP)/configs/current + +SUBDIRS = $(EGL_DRIVERS_DIRS) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir ; $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir ; $(MAKE) clean) ; \ + fi \ + done diff --git a/src/egl/drivers/demo/demo.c b/src/egl/drivers/demo/demo.c index 45545755c0..1750e976b8 100644 --- a/src/egl/drivers/demo/demo.c +++ b/src/egl/drivers/demo/demo.c @@ -21,7 +21,7 @@ typedef struct demo_driver { _EGLDriver Base; /* base class/object */ - GLuint DemoStuff; + unsigned DemoStuff; } DemoDriver; #define DEMO_DRIVER(D) ((DemoDriver *) (D)) @@ -33,7 +33,7 @@ typedef struct demo_driver typedef struct demo_surface { _EGLSurface Base; /* base class/object */ - GLuint DemoStuff; + unsigned DemoStuff; } DemoSurface; @@ -43,7 +43,7 @@ typedef struct demo_surface typedef struct demo_context { _EGLContext Base; /* base class/object */ - GLuint DemoStuff; + unsigned DemoStuff; } DemoContext; @@ -67,22 +67,22 @@ demoInitialize(_EGLDriver *drv, EGLDisplay dpy, EGLint *major, EGLint *minor) /* Create the display's visual configs - silly example */ for (i = 0; i < 4; i++) { - _EGLConfig config; - _eglInitConfig(&config, i + 1); - _eglSetConfigAttrib(&config, EGL_RED_SIZE, 8); - _eglSetConfigAttrib(&config, EGL_GREEN_SIZE, 8); - _eglSetConfigAttrib(&config, EGL_BLUE_SIZE, 8); - _eglSetConfigAttrib(&config, EGL_ALPHA_SIZE, 8); - _eglSetConfigAttrib(&config, EGL_BUFFER_SIZE, 32); + _EGLConfig *config = calloc(1, sizeof(_EGLConfig)); + _eglInitConfig(config, i + 1); + _eglSetConfigAttrib(config, EGL_RED_SIZE, 8); + _eglSetConfigAttrib(config, EGL_GREEN_SIZE, 8); + _eglSetConfigAttrib(config, EGL_BLUE_SIZE, 8); + _eglSetConfigAttrib(config, EGL_ALPHA_SIZE, 8); + _eglSetConfigAttrib(config, EGL_BUFFER_SIZE, 32); if (i & 1) { - _eglSetConfigAttrib(&config, EGL_DEPTH_SIZE, 32); + _eglSetConfigAttrib(config, EGL_DEPTH_SIZE, 32); } if (i & 2) { - _eglSetConfigAttrib(&config, EGL_STENCIL_SIZE, 8); + _eglSetConfigAttrib(config, EGL_STENCIL_SIZE, 8); } - _eglSetConfigAttrib(&config, EGL_SURFACE_TYPE, + _eglSetConfigAttrib(config, EGL_SURFACE_TYPE, (EGL_WINDOW_BIT | EGL_PIXMAP_BIT | EGL_PBUFFER_BIT)); - _eglAddConfig(disp, &config); + _eglAddConfig(disp, config); } drv->Initialized = EGL_TRUE; @@ -152,9 +152,9 @@ demoCreateContext(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, EGLContext /* generate handle and insert into hash table */ _eglSaveContext(&c->Base); - assert(c->Base.Handle); + assert(_eglGetContextHandle(&c->Base)); - return c->Base.Handle; + return _eglGetContextHandle(&c->Base); } @@ -286,7 +286,7 @@ demoMakeCurrent(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw, EGLSurface rea * plug in API functions. */ _EGLDriver * -_eglMain(_EGLDisplay *dpy) +_eglMain(_EGLDisplay *dpy, const char *args) { DemoDriver *demo; diff --git a/src/egl/drivers/dri/Makefile b/src/egl/drivers/dri/Makefile index af0c40fe2b..d7eba653b2 100644 --- a/src/egl/drivers/dri/Makefile +++ b/src/egl/drivers/dri/Makefile @@ -8,7 +8,7 @@ include $(TOP)/configs/current INCLUDE_DIRS = \ -I. \ -I/usr/include \ - -I/usr/include/drm \ + $(shell pkg-config --cflags-only-I libdrm) \ -I$(TOP)/include \ -I$(TOP)/include/GL/internal \ -I$(TOP)/src/mesa \ @@ -29,6 +29,8 @@ SOURCES = egldri.c OBJECTS = $(SOURCES:.c=.o) +DRM_LIB = `pkg-config --libs libdrm` + .c.o: $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ @@ -44,7 +46,7 @@ library: $(TOP)/$(LIB_DIR)/libEGLdri.so $(TOP)/$(LIB_DIR)/libEGLdri.so: $(OBJECTS) $(MKLIB) -o EGLdri -linker '$(CC)' -ldflags '$(LDFLAGS)' \ -major 1 -minor 0 \ - -install $(TOP)/$(LIB_DIR) -ldl $(OBJECTS) + -install $(TOP)/$(LIB_DIR) -ldl $(OBJECTS) $(LIBS) clean: @@ -53,9 +55,10 @@ clean: depend: $(SOURCES) $(HEADERS) @ echo "running $(MKDEP)" + @ rm -f depend @ touch depend $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) \ - $(SOURCES) $(HEADERS) > /dev/null + $(SOURCES) $(HEADERS) >/dev/null 2>/dev/null include depend # DO NOT DELETE diff --git a/src/egl/drivers/dri/egldri.c b/src/egl/drivers/dri/egldri.c index cab0be2bd1..57661cc3ab 100644 --- a/src/egl/drivers/dri/egldri.c +++ b/src/egl/drivers/dri/egldri.c @@ -1,5 +1,6 @@ /** - * Generic EGL driver for DRI. + * Generic EGL driver for DRI. This is basically an "adaptor" driver + * that allows libEGL to load/use regular DRI drivers. * * This file contains all the code needed to interface DRI-based drivers * with libEGL. @@ -23,6 +24,7 @@ #include "egldisplay.h" #include "eglcontext.h" #include "eglconfig.h" +#include "eglconfigutil.h" #include "eglsurface.h" #include "eglscreen.h" #include "eglglobals.h" @@ -32,18 +34,72 @@ #include "egldri.h" const char *sysfs = "/sys/class"; -#define None 0 + static const int empty_attribute_list[1] = { None }; + +/** + * Given a card number, return the name of the DRI driver to use. + * This generally means reading the contents of + * /sys/class/drm/cardX/dri_library_name, where X is the card number + */ +static EGLBoolean +driver_name_from_card_number(int card, char *driverName, int maxDriverName) +{ + char path[2000]; + FILE *f; + int length; + + snprintf(path, sizeof(path), "%s/drm/card%d/dri_library_name", sysfs, card); + + f = fopen(path, "r"); + if (!f) + return EGL_FALSE; + + fgets(driverName, maxDriverName, f); + fclose(f); + + if ((length = strlen(driverName)) > 1) { + /* remove the trailing newline from sysfs */ + driverName[length - 1] = '\0'; + strncat(driverName, "_dri", maxDriverName); + return EGL_TRUE; + } + else { + return EGL_FALSE; + } +} + + + /** * The bootstrap function. * Return a new driDriver object and plug in API functions. * This function, in turn, loads a specific DRI driver (ex: r200_dri.so). */ _EGLDriver * -_eglMain(_EGLDisplay *dpy) +_eglMain(_EGLDisplay *dpy, const char *args) { +#if 1 + const int card = args ? atoi(args) : 0; + _EGLDriver *driver = NULL; + char driverName[1000]; + + if (!driver_name_from_card_number(card, driverName, sizeof(driverName))) { + _eglLog(_EGL_WARNING, + "Unable to determine driver name for card %d\n", card); + return NULL; + } + + _eglLog(_EGL_DEBUG, "Driver name: %s\n", driverName); + + driver = _eglOpenDriver(dpy, driverName, args); + + return driver; + +#else + int length; char path[NAME_MAX]; struct dirent *dirent; @@ -58,14 +114,19 @@ _eglMain(_EGLDisplay *dpy) _eglLog(_EGL_WARNING, "%s DRM devices not found.", path); return EGL_FALSE; } + + /* loop over dir entries looking for cardX where "X" is in the + * dpy->DriverName ":X" string. + */ while ((dirent = readdir(dir))) { if (strncmp(&dirent->d_name[0], "card", 4) != 0) continue; - if (strcmp(&dirent->d_name[4], &dpy->Name[1]) != 0) + if (strcmp(&dirent->d_name[4], &driverName[1]) != 0) continue; - snprintf(path, sizeof(path), "%s/drm/card%s/dri_library_name", sysfs, &dpy->Name[1]); + snprintf(path, sizeof(path), "%s/drm/card%s/dri_library_name", + sysfs, &driverName[1]); _eglLog(_EGL_INFO, "Opening %s", path); #if 1 file = fopen(path, "r"); @@ -89,6 +150,7 @@ _eglMain(_EGLDisplay *dpy) closedir(dir); return driver; +#endif } @@ -141,7 +203,7 @@ _eglDRICreateContext(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, /* generate handle and insert into hash table */ _eglSaveContext(&c->Base); - return c->Base.Handle; + return _eglGetContextHandle(&c->Base); } @@ -152,13 +214,15 @@ _eglDRIMakeCurrent(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw, driDisplay *disp = Lookup_driDisplay(dpy); driContext *ctx = Lookup_driContext(context); EGLBoolean b; + __DRIid drawBuf = (__DRIid) draw; + __DRIid readBuf = (__DRIid) read; b = _eglMakeCurrent(drv, dpy, draw, read, context); if (!b) return EGL_FALSE; if (ctx) { - ctx->driContext.bindContext(disp, 0, read, draw, &ctx->driContext); + ctx->driContext.bindContext(disp, 0, drawBuf, readBuf, &ctx->driContext); } else { /* what's this??? */ @@ -190,7 +254,7 @@ _eglDRICreatePbufferSurface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, #if 0 GLcontext *ctx = NULL; /* this _should_ be OK */ #endif - GLvisual visMode; + __GLcontextModes visMode; _EGLConfig *conf = _eglLookupConfig(drv, dpy, config); assert(conf); /* bad config should be caught earlier */ _eglConfigToContextModesRec(conf, &visMode); @@ -267,7 +331,8 @@ _eglDRICreateScreenSurfaceMESA(_EGLDriver *drv, EGLDisplay dpy, EGLConfig cfg, _EGLConfig *config = _eglLookupConfig(drv, dpy, cfg); driDisplay *disp = Lookup_driDisplay(dpy); driSurface *surface; - GLvisual visMode; + __GLcontextModes visMode; + __DRIid drawBuf; surface = (driSurface *) calloc(1, sizeof(*surface)); if (!surface) { @@ -292,8 +357,10 @@ _eglDRICreateScreenSurfaceMESA(_EGLDriver *drv, EGLDisplay dpy, EGLConfig cfg, /* convert EGLConfig to GLvisual */ _eglConfigToContextModesRec(config, &visMode); + drawBuf = (__DRIid) _eglGetSurfaceHandle(&surface->Base); + /* Create a new DRI drawable */ - if (!disp->driScreen.createNewDrawable(disp, &visMode, surface->Base.Handle, + if (!disp->driScreen.createNewDrawable(disp, &visMode, drawBuf, &surface->drawable, GLX_WINDOW_BIT, empty_attribute_list)) { _eglRemoveSurface(&surface->Base); @@ -715,7 +782,7 @@ __eglGetDrawableInfo(__DRInativeDisplay * ndpy, int screen, __DRIid drawable, { __DRIscreen *pDRIScreen; __DRIscreenPrivate *psp; - driSurface *surf = Lookup_driSurface(drawable); + driSurface *surf = Lookup_driSurface((EGLSurface) drawable); pDRIScreen = __eglFindDRIScreen(ndpy, screen); @@ -1019,8 +1086,10 @@ _eglDRICreateDisplay(driDisplay *dpy, __DRIframebuffer *framebuffer) api_ver, & interface_methods, NULL); - if (!dpy->driScreen.private) + if (!dpy->driScreen.private) { + _eglLog(_EGL_WARNING, "egldri.c: DRI create new screen failed"); return EGL_FALSE; + } DRM_UNLOCK( dpy->drmFD, dpy->pSAREA, dpy->serverContext ); @@ -1080,6 +1149,7 @@ _eglDRIInitialize(_EGLDriver *drv, EGLDisplay dpy, { _EGLDisplay *disp = _eglLookupDisplay(dpy); driDisplay *display; + const char *driverName = (const char *) disp->NativeDisplay; assert(disp); @@ -1088,13 +1158,13 @@ _eglDRIInitialize(_EGLDriver *drv, EGLDisplay dpy, */ display = calloc(1, sizeof(*display)); display->Base = *disp; - _eglHashInsert(_eglGlobal.Displays, disp->Handle, display); + _eglSaveDisplay(&display->Base); free(disp); *major = 1; *minor = 0; - sscanf(&disp->Name[1], "%d", &display->minor); + sscanf(driverName + 1, "%d", &display->minor); drv->Initialized = EGL_TRUE; return EGL_TRUE; diff --git a/src/egl/drivers/dri/egldri.h b/src/egl/drivers/dri/egldri.h index 34b12d64fc..54a9a4ea26 100644 --- a/src/egl/drivers/dri/egldri.h +++ b/src/egl/drivers/dri/egldri.h @@ -1,11 +1,14 @@ #ifndef EGLDRI_INCLUDED #define EGLDRI_INCLUDED +#include <stdlib.h> +#include <string.h> +#include <stdint.h> #include "egldisplay.h" #include "eglscreen.h" #include "eglsurface.h" #include "eglcontext.h" -#include "mtypes.h" + #include "dri_util.h" #include "drm_sarea.h" @@ -14,7 +17,7 @@ */ typedef struct dri_display { - _EGLDisplay Base; /* base class/object */ + _EGLDisplay Base; /**< base class */ void *pFB; int drmFD; /**< \brief DRM device file descriptor */ int minor; @@ -32,7 +35,7 @@ typedef struct dri_display unsigned long FBStart; /**< \brief physical address of the framebuffer */ void *driverClientMsg; int driverClientMsgSize; - int chipset; + unsigned chipset; void *driverPrivate; drm_magic_t magic; diff --git a/src/egl/drivers/glx/Makefile b/src/egl/drivers/glx/Makefile new file mode 100644 index 0000000000..3cd5276513 --- /dev/null +++ b/src/egl/drivers/glx/Makefile @@ -0,0 +1,74 @@ +# src/egl/drivers/glx/Makefile + +# Build XEGL DRI driver loader library: egl_glx.so + + +TOP = ../../../.. +include $(TOP)/configs/current + + +EXTRA_DEFINES = -DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" + +DRIVER_NAME = egl_glx.so + + +INCLUDE_DIRS = \ + -I. \ + -I/usr/include \ + $(shell pkg-config --cflags-only-I libdrm) \ + -I$(TOP)/include \ + -I$(TOP)/include/GL/internal \ + -I$(TOP)/src/mesa/glapi \ + -I$(TOP)/src/mesa/drivers/dri/common \ + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/egl/main \ + -I$(TOP)/src/glx/x11 + +SOURCES = egl_glx.c + +OBJECTS = $(SOURCES:.c=.o) + +DRM_LIB = `pkg-config --libs libdrm` + +MISC_LIBS = -ldl -lXext -lGL + + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(EXTRA_DEFINES) $< -o $@ + + +.PHONY: library + + +default: depend library Makefile + + +library: $(TOP)/$(LIB_DIR)/$(DRIVER_NAME) + + +# Make the egl_glx.so library +$(TOP)/$(LIB_DIR)/$(DRIVER_NAME): $(OBJECTS) + $(TOP)/bin/mklib -o $(DRIVER_NAME) \ + -noprefix \ + -major 1 -minor 0 \ + -L$(TOP)/$(LIB_DIR) \ + -install $(TOP)/$(LIB_DIR) \ + $(OBJECTS) $(DRM_LIB) $(MISC_LIBS) + + +clean: + rm -f *.o + rm -f *.so + rm -f depend depend.bak + + +depend: $(SOURCES) $(HEADERS) + @ echo "running $(MKDEP)" + @ rm -f depend + @ touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) \ + $(SOURCES) $(HEADERS) >/dev/null 2>/dev/null + +include depend +# DO NOT DELETE diff --git a/src/egl/drivers/glx/egl_glx.c b/src/egl/drivers/glx/egl_glx.c new file mode 100644 index 0000000000..7e4967411e --- /dev/null +++ b/src/egl/drivers/glx/egl_glx.c @@ -0,0 +1,584 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * This is an EGL driver that wraps GLX. This gives the benefit of being + * completely agnostic of the direct rendering implementation. + * + * Authors: Alan Hourihane <alanh@tungstengraphics.com> + */ + +/* + * TODO: + * + * Add GLXFBConfig support + * Pbuffer & Pixmap support + * test eglBind/ReleaseTexImage + */ + + +#include <assert.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "dlfcn.h" +#include <X11/Xlib.h> +#include <GL/gl.h> +#include "glxclient.h" + +#define _EGL_PLATFORM_X + +#include "eglconfig.h" +#include "eglcontext.h" +#include "egldisplay.h" +#include "egldriver.h" +#include "eglglobals.h" +#include "eglhash.h" +#include "egllog.h" +#include "eglsurface.h" + +#include <GL/gl.h> + +#define CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T)) + +static const EGLint all_apis = (EGL_OPENGL_ES_BIT | + EGL_OPENGL_ES2_BIT | + EGL_OPENVG_BIT | + EGL_OPENGL_BIT); + +struct visual_attribs +{ + /* X visual attribs */ + int id; + int klass; + int depth; + int redMask, greenMask, blueMask; + int colormapSize; + int bitsPerRGB; + + /* GL visual attribs */ + int supportsGL; + int transparentType; + int transparentRedValue; + int transparentGreenValue; + int transparentBlueValue; + int transparentAlphaValue; + int transparentIndexValue; + int bufferSize; + int level; + int render_type; + int doubleBuffer; + int stereo; + int auxBuffers; + int redSize, greenSize, blueSize, alphaSize; + int depthSize; + int stencilSize; + int accumRedSize, accumGreenSize, accumBlueSize, accumAlphaSize; + int numSamples, numMultisample; + int visualCaveat; +}; + +/** subclass of _EGLDriver */ +struct GLX_egl_driver +{ + _EGLDriver Base; /**< base class */ + + XVisualInfo *visuals; + + /* GLXFBConfig *fbconfigs - todo */ +}; + + +/** subclass of _EGLContext */ +struct GLX_egl_context +{ + _EGLContext Base; /**< base class */ + + GLXContext context; +}; + + +/** subclass of _EGLSurface */ +struct GLX_egl_surface +{ + _EGLSurface Base; /**< base class */ + + GLXDrawable drawable; +}; + + +/** subclass of _EGLConfig */ +struct GLX_egl_config +{ + _EGLConfig Base; /**< base class */ +}; + +/** cast wrapper */ +static struct GLX_egl_driver * +GLX_egl_driver(_EGLDriver *drv) +{ + return (struct GLX_egl_driver *) drv; +} + +static struct GLX_egl_context * +GLX_egl_context(_EGLContext *ctx) +{ + return (struct GLX_egl_context *) ctx; +} + +static struct GLX_egl_surface * +GLX_egl_surface(_EGLSurface *surf) +{ + return (struct GLX_egl_surface *) surf; +} + +static GLboolean +get_visual_attribs(Display *dpy, XVisualInfo *vInfo, + struct visual_attribs *attribs) +{ + const char *ext = glXQueryExtensionsString(dpy, vInfo->screen); + int rgba; + + memset(attribs, 0, sizeof(struct visual_attribs)); + + attribs->id = vInfo->visualid; +#if defined(__cplusplus) || defined(c_plusplus) + attribs->klass = vInfo->c_class; +#else + attribs->klass = vInfo->class; +#endif + attribs->depth = vInfo->depth; + attribs->redMask = vInfo->red_mask; + attribs->greenMask = vInfo->green_mask; + attribs->blueMask = vInfo->blue_mask; + attribs->colormapSize = vInfo->colormap_size; + attribs->bitsPerRGB = vInfo->bits_per_rgb; + + if (glXGetConfig(dpy, vInfo, GLX_USE_GL, &attribs->supportsGL) != 0 || + !attribs->supportsGL) + return GL_FALSE; + glXGetConfig(dpy, vInfo, GLX_BUFFER_SIZE, &attribs->bufferSize); + glXGetConfig(dpy, vInfo, GLX_LEVEL, &attribs->level); + glXGetConfig(dpy, vInfo, GLX_RGBA, &rgba); + if (!rgba) + return GL_FALSE; + attribs->render_type = GLX_RGBA_BIT; + + glXGetConfig(dpy, vInfo, GLX_DOUBLEBUFFER, &attribs->doubleBuffer); + if (!attribs->doubleBuffer) + return GL_FALSE; + + glXGetConfig(dpy, vInfo, GLX_STEREO, &attribs->stereo); + glXGetConfig(dpy, vInfo, GLX_AUX_BUFFERS, &attribs->auxBuffers); + glXGetConfig(dpy, vInfo, GLX_RED_SIZE, &attribs->redSize); + glXGetConfig(dpy, vInfo, GLX_GREEN_SIZE, &attribs->greenSize); + glXGetConfig(dpy, vInfo, GLX_BLUE_SIZE, &attribs->blueSize); + glXGetConfig(dpy, vInfo, GLX_ALPHA_SIZE, &attribs->alphaSize); + glXGetConfig(dpy, vInfo, GLX_DEPTH_SIZE, &attribs->depthSize); + glXGetConfig(dpy, vInfo, GLX_STENCIL_SIZE, &attribs->stencilSize); + glXGetConfig(dpy, vInfo, GLX_ACCUM_RED_SIZE, &attribs->accumRedSize); + glXGetConfig(dpy, vInfo, GLX_ACCUM_GREEN_SIZE, &attribs->accumGreenSize); + glXGetConfig(dpy, vInfo, GLX_ACCUM_BLUE_SIZE, &attribs->accumBlueSize); + glXGetConfig(dpy, vInfo, GLX_ACCUM_ALPHA_SIZE, &attribs->accumAlphaSize); + + /* get transparent pixel stuff */ + glXGetConfig(dpy, vInfo,GLX_TRANSPARENT_TYPE, &attribs->transparentType); + if (attribs->transparentType == GLX_TRANSPARENT_RGB) { + glXGetConfig(dpy, vInfo, GLX_TRANSPARENT_RED_VALUE, &attribs->transparentRedValue); + glXGetConfig(dpy, vInfo, GLX_TRANSPARENT_GREEN_VALUE, &attribs->transparentGreenValue); + glXGetConfig(dpy, vInfo, GLX_TRANSPARENT_BLUE_VALUE, &attribs->transparentBlueValue); + glXGetConfig(dpy, vInfo, GLX_TRANSPARENT_ALPHA_VALUE, &attribs->transparentAlphaValue); + } + else if (attribs->transparentType == GLX_TRANSPARENT_INDEX) { + glXGetConfig(dpy, vInfo, GLX_TRANSPARENT_INDEX_VALUE, &attribs->transparentIndexValue); + } + + /* multisample attribs */ +#ifdef GLX_ARB_multisample + if (ext && strstr(ext, "GLX_ARB_multisample")) { + glXGetConfig(dpy, vInfo, GLX_SAMPLE_BUFFERS_ARB, &attribs->numMultisample); + glXGetConfig(dpy, vInfo, GLX_SAMPLES_ARB, &attribs->numSamples); + } +#endif + else { + attribs->numSamples = 0; + attribs->numMultisample = 0; + } + +#if defined(GLX_EXT_visual_rating) + if (ext && strstr(ext, "GLX_EXT_visual_rating")) { + glXGetConfig(dpy, vInfo, GLX_VISUAL_CAVEAT_EXT, &attribs->visualCaveat); + } + else { + attribs->visualCaveat = GLX_NONE_EXT; + } +#else + attribs->visualCaveat = 0; +#endif + + return GL_TRUE; +} + +static EGLBoolean +create_configs(_EGLDisplay *disp, struct GLX_egl_driver *GLX_drv) +{ + XVisualInfo theTemplate; + int numVisuals; + long mask; + int i; + struct visual_attribs attribs; + + /* get list of all visuals on this screen */ + theTemplate.screen = DefaultScreen(disp->Xdpy); + mask = VisualScreenMask; + GLX_drv->visuals = XGetVisualInfo(disp->Xdpy, mask, &theTemplate, &numVisuals); + + for (i = 0; i < numVisuals; i++) { + struct GLX_egl_config *config; + + if (!get_visual_attribs(disp->Xdpy, &GLX_drv->visuals[i], &attribs)) + continue; + + config = CALLOC_STRUCT(GLX_egl_config); + + _eglInitConfig(&config->Base, i+1); + SET_CONFIG_ATTRIB(&config->Base, EGL_NATIVE_VISUAL_ID, attribs.id); + SET_CONFIG_ATTRIB(&config->Base, EGL_BUFFER_SIZE, attribs.bufferSize); + SET_CONFIG_ATTRIB(&config->Base, EGL_RED_SIZE, attribs.redSize); + SET_CONFIG_ATTRIB(&config->Base, EGL_GREEN_SIZE, attribs.greenSize); + SET_CONFIG_ATTRIB(&config->Base, EGL_BLUE_SIZE, attribs.blueSize); + SET_CONFIG_ATTRIB(&config->Base, EGL_ALPHA_SIZE, attribs.alphaSize); + SET_CONFIG_ATTRIB(&config->Base, EGL_DEPTH_SIZE, attribs.depthSize); + SET_CONFIG_ATTRIB(&config->Base, EGL_STENCIL_SIZE, attribs.stencilSize); + SET_CONFIG_ATTRIB(&config->Base, EGL_SAMPLES, attribs.numSamples); + SET_CONFIG_ATTRIB(&config->Base, EGL_SAMPLE_BUFFERS, attribs.numMultisample); + SET_CONFIG_ATTRIB(&config->Base, EGL_CONFORMANT, all_apis); + SET_CONFIG_ATTRIB(&config->Base, EGL_RENDERABLE_TYPE, all_apis); + SET_CONFIG_ATTRIB(&config->Base, EGL_SURFACE_TYPE, + (EGL_WINDOW_BIT /*| EGL_PBUFFER_BIT | EGL_PIXMAP_BIT*/)); + + /* XXX possibly other things to init... */ + + _eglAddConfig(disp, &config->Base); + } + + return EGL_TRUE; +} + +/** + * Called via eglInitialize(), GLX_drv->API.Initialize(). + */ +static EGLBoolean +GLX_eglInitialize(_EGLDriver *drv, EGLDisplay dpy, + EGLint *minor, EGLint *major) +{ + struct GLX_egl_driver *GLX_drv = GLX_egl_driver(drv); + _EGLDisplay *disp = _eglLookupDisplay(dpy); + + _eglLog(_EGL_DEBUG, "XDRI: eglInitialize"); + + if (!disp->Xdpy) { + disp->Xdpy = XOpenDisplay(NULL); + if (!disp->Xdpy) { + _eglLog(_EGL_WARNING, "XDRI: XOpenDisplay failed"); + return EGL_FALSE; + } + } + + GLX_drv->Base.Initialized = EGL_TRUE; + + GLX_drv->Base.Name = "GLX"; + + /* we're supporting EGL 1.4 */ + *minor = 1; + *major = 4; + + create_configs(disp, GLX_drv); + + return EGL_TRUE; +} + + +/** + * Called via eglTerminate(), drv->API.Terminate(). + */ +static EGLBoolean +GLX_eglTerminate(_EGLDriver *drv, EGLDisplay dpy) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + + _eglLog(_EGL_DEBUG, "XDRI: eglTerminate"); + +// XCloseDisplay(disp->Xdpy); + + return EGL_TRUE; +} + + +/** + * Called via eglCreateContext(), drv->API.CreateContext(). + */ +static EGLContext +GLX_eglCreateContext(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, + EGLContext share_list, const EGLint *attrib_list) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + struct GLX_egl_context *GLX_ctx = CALLOC_STRUCT(GLX_egl_context); + struct GLX_egl_driver *GLX_drv = GLX_egl_driver(drv); + struct GLX_egl_context *GLX_ctx_shared = NULL; + _EGLConfig *conf; + + if (!GLX_ctx) + return EGL_NO_CONTEXT; + + if (!_eglInitContext(drv, dpy, &GLX_ctx->Base, config, attrib_list)) { + free(GLX_ctx); + return EGL_NO_CONTEXT; + } + + if (share_list != EGL_NO_CONTEXT) { + _EGLContext *shareCtx = _eglLookupContext(share_list); + if (!shareCtx) { + _eglError(EGL_BAD_CONTEXT, "eglCreateContext(share_list)"); + return EGL_FALSE; + } + GLX_ctx_shared = GLX_egl_context(shareCtx); + } + + conf = _eglLookupConfig(drv, dpy, config); + assert(conf); + + GLX_ctx->context = glXCreateContext(disp->Xdpy, &GLX_drv->visuals[(int)config-1], GLX_ctx_shared ? GLX_ctx_shared->context : NULL, GL_TRUE); + if (!GLX_ctx->context) + return EGL_FALSE; + + /* need to have a direct rendering context */ + if (!glXIsDirect(disp->Xdpy, GLX_ctx->context)) + return EGL_FALSE; + + return _eglGetContextHandle(&GLX_ctx->Base); +} + + +/** + * Called via eglMakeCurrent(), drv->API.MakeCurrent(). + */ +static EGLBoolean +GLX_eglMakeCurrent(_EGLDriver *drv, EGLDisplay dpy, EGLSurface d, + EGLSurface r, EGLContext context) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + _EGLContext *ctx = _eglLookupContext(context); + _EGLSurface *dsurf = _eglLookupSurface(d); + _EGLSurface *rsurf = _eglLookupSurface(r); + struct GLX_egl_surface *GLX_dsurf = GLX_egl_surface(dsurf); + struct GLX_egl_surface *GLX_rsurf = GLX_egl_surface(rsurf); + struct GLX_egl_context *GLX_ctx = GLX_egl_context(ctx); + + if (!_eglMakeCurrent(drv, dpy, d, r, context)) + return EGL_FALSE; + +// if (!glXMakeContextCurrent(disp->Xdpy, GLX_dsurf->drawable, GLX_rsurf->drawable, GLX_ctx->context)) + if (!glXMakeCurrent(disp->Xdpy, GLX_dsurf ? GLX_dsurf->drawable : 0, GLX_ctx ? GLX_ctx->context : NULL)) + return EGL_FALSE; + + return EGL_TRUE; +} + +/** Get size of given window */ +static Status +get_drawable_size(Display *dpy, Drawable d, uint *width, uint *height) +{ + Window root; + Status stat; + int xpos, ypos; + unsigned int w, h, bw, depth; + stat = XGetGeometry(dpy, d, &root, &xpos, &ypos, &w, &h, &bw, &depth); + *width = w; + *height = h; + return stat; +} + +/** + * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface(). + */ +static EGLSurface +GLX_eglCreateWindowSurface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, + NativeWindowType window, const EGLint *attrib_list) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + struct GLX_egl_surface *GLX_surf; + uint width, height; + + GLX_surf = CALLOC_STRUCT(GLX_egl_surface); + if (!GLX_surf) + return EGL_NO_SURFACE; + + if (!_eglInitSurface(drv, dpy, &GLX_surf->Base, EGL_WINDOW_BIT, + config, attrib_list)) { + free(GLX_surf); + return EGL_FALSE; + } + + _eglSaveSurface(&GLX_surf->Base); + + GLX_surf->drawable = window; + get_drawable_size(disp->Xdpy, window, &width, &height); + GLX_surf->Base.Width = width; + GLX_surf->Base.Height = height; + + return _eglGetSurfaceHandle(&GLX_surf->Base); +} + +static EGLBoolean +GLX_eglDestroySurface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + _EGLSurface *surf = _eglLookupSurface(surface); + return EGL_TRUE; + if (surf) { + _eglHashRemove(_eglGlobal.Surfaces, (EGLuint) surface); + if (surf->IsBound) { + surf->DeletePending = EGL_TRUE; + } + else { + free(surf); + } + + return EGL_TRUE; + } + else { + _eglError(EGL_BAD_SURFACE, "eglDestroySurface"); + return EGL_FALSE; + } +} + + +static EGLBoolean +GLX_eglBindTexImage(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface, + EGLint buffer) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + _EGLSurface *surf = _eglLookupSurface(surface); + struct GLX_egl_surface *GLX_surf = GLX_egl_surface(surf); + + /* buffer ?? */ + glXBindTexImageEXT(disp->Xdpy, GLX_surf->drawable, GLX_FRONT_LEFT_EXT, NULL); + + return EGL_TRUE; +} + + +static EGLBoolean +GLX_eglReleaseTexImage(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface, + EGLint buffer) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + _EGLSurface *surf = _eglLookupSurface(surface); + struct GLX_egl_surface *GLX_surf = GLX_egl_surface(surf); + + /* buffer ?? */ + glXReleaseTexImageEXT(disp->Xdpy, GLX_surf->drawable, GLX_FRONT_LEFT_EXT); + + return EGL_TRUE; +} + + +static EGLBoolean +GLX_eglSwapBuffers(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + _EGLSurface *surf = _eglLookupSurface(draw); + struct GLX_egl_surface *GLX_surf = GLX_egl_surface(surf); + + _eglLog(_EGL_DEBUG, "XDRI: EGL SwapBuffers 0x%x",draw); + + /* error checking step: */ + if (!_eglSwapBuffers(drv, dpy, draw)) + return EGL_FALSE; + + glXSwapBuffers(disp->Xdpy, GLX_surf->drawable); + + return EGL_TRUE; +} + +/* + * Called from eglGetProcAddress() via drv->API.GetProcAddress(). + */ +static _EGLProc +GLX_eglGetProcAddress(const char *procname) +{ + return (_EGLProc)glXGetProcAddress((const GLubyte *)procname); +} + + +/** + * This is the main entrypoint into the driver, called by libEGL. + * Create a new _EGLDriver object and init its dispatch table. + */ +_EGLDriver * +_eglMain(_EGLDisplay *disp, const char *args) +{ + struct GLX_egl_driver *GLX_drv = CALLOC_STRUCT(GLX_egl_driver); + char *env; + + if (!GLX_drv) + return NULL; + + _eglInitDriverFallbacks(&GLX_drv->Base); + GLX_drv->Base.API.Initialize = GLX_eglInitialize; + GLX_drv->Base.API.Terminate = GLX_eglTerminate; + GLX_drv->Base.API.CreateContext = GLX_eglCreateContext; + GLX_drv->Base.API.MakeCurrent = GLX_eglMakeCurrent; + GLX_drv->Base.API.CreateWindowSurface = GLX_eglCreateWindowSurface; + GLX_drv->Base.API.DestroySurface = GLX_eglDestroySurface; + GLX_drv->Base.API.BindTexImage = GLX_eglBindTexImage; + GLX_drv->Base.API.ReleaseTexImage = GLX_eglReleaseTexImage; + GLX_drv->Base.API.SwapBuffers = GLX_eglSwapBuffers; + GLX_drv->Base.API.GetProcAddress = GLX_eglGetProcAddress; + + GLX_drv->Base.ClientAPIsMask = all_apis; + GLX_drv->Base.Name = "GLX"; + + _eglLog(_EGL_DEBUG, "GLX: main(%s)", args); + + /* set new DRI path to pick up EGL version (which doesn't contain any mesa + * code), but don't override if one is already set. + */ + env = getenv("LIBGL_DRIVERS_PATH"); + if (env) { + if (!strstr(env, "egl")) { + sprintf(env, "%s/egl", env); + setenv("LIBGL_DRIVERS_PATH", env, 1); + } + } else + setenv("LIBGL_DRIVERS_PATH", DEFAULT_DRIVER_DIR"/egl", 0); + + return &GLX_drv->Base; +} diff --git a/src/egl/drivers/xdri/Makefile b/src/egl/drivers/xdri/Makefile new file mode 100644 index 0000000000..a721b997e6 --- /dev/null +++ b/src/egl/drivers/xdri/Makefile @@ -0,0 +1,70 @@ +# src/egl/drivers/xdri/Makefile + +# Build XEGL DRI driver loader library: egl_xdri.so + + +TOP = ../../../.. +include $(TOP)/configs/current + + +DRIVER_NAME = egl_xdri.so + + +INCLUDE_DIRS = \ + -I. \ + -I/usr/include \ + $(shell pkg-config --cflags-only-I libdrm) \ + -I$(TOP)/include \ + -I$(TOP)/include/GL/internal \ + -I$(TOP)/src/mesa/glapi \ + -I$(TOP)/src/mesa/drivers/dri/common \ + -I$(TOP)/src/egl/main \ + -I$(TOP)/src/glx/x11 + +SOURCES = egl_xdri.c + +OBJECTS = $(SOURCES:.c=.o) + +DRM_LIB = `pkg-config --libs libdrm` + +MISC_LIBS = -ldl -lXext -lGL + + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + + +.PHONY: library + + +default: depend library Makefile + + +library: $(TOP)/$(LIB_DIR)/$(DRIVER_NAME) + + +# Make the egl_xdri.so library +$(TOP)/$(LIB_DIR)/$(DRIVER_NAME): $(OBJECTS) + $(TOP)/bin/mklib -o $(DRIVER_NAME) \ + -noprefix \ + -major 1 -minor 0 \ + -L$(TOP)/$(LIB_DIR) \ + -install $(TOP)/$(LIB_DIR) \ + $(OBJECTS) $(DRM_LIB) $(MISC_LIBS) + + +clean: + rm -f *.o + rm -f *.so + rm -f depend depend.bak + + +depend: $(SOURCES) $(HEADERS) + @ echo "running $(MKDEP)" + @ rm -f depend + @ touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) \ + $(SOURCES) $(HEADERS) >/dev/null 2>/dev/null + +include depend +# DO NOT DELETE diff --git a/src/egl/drivers/xdri/egl_xdri.c b/src/egl/drivers/xdri/egl_xdri.c new file mode 100644 index 0000000000..3b3e312746 --- /dev/null +++ b/src/egl/drivers/xdri/egl_xdri.c @@ -0,0 +1,1177 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Code to interface a DRI driver to libEGL. + * Note that unlike previous DRI/EGL interfaces, this one is meant to + * be used _with_ X. Applications will use eglCreateWindowSurface() + * to render into X-created windows. + * + * This is an EGL driver that, in turn, loads a regular DRI driver. + * There are some dependencies on code in libGL, but those could be + * removed with some effort. + * + * Authors: Brian Paul + */ + + +#include <assert.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "dlfcn.h" +#include <X11/Xlib.h> +#include <GL/gl.h> +#include "xf86dri.h" +#include "glxclient.h" +#include "dri_util.h" +#include "drm_sarea.h" + +#define _EGL_PLATFORM_X + +#include "eglconfig.h" +#include "eglcontext.h" +#include "egldisplay.h" +#include "egldriver.h" +#include "eglglobals.h" +#include "eglhash.h" +#include "egllog.h" +#include "eglsurface.h" + +#include <GL/gl.h> + +typedef void (*glGetIntegerv_t)(GLenum, GLint *); +typedef void (*glBindTexture_t)(GLenum, GLuint); +typedef void (*glCopyTexImage2D_t)(GLenum, GLint, GLenum, GLint, GLint, + GLint, GLint, GLint); + + +#define CALLOC_STRUCT(T) (struct T *) calloc(1, sizeof(struct T)) + + +/** subclass of _EGLDriver */ +struct xdri_egl_driver +{ + _EGLDriver Base; /**< base class */ + + const char *dri_driver_name; /**< name of DRI driver to load */ + void *dri_driver_handle; /**< returned by dlopen(dri_driver_name) */ + + __GLXdisplayPrivate *glx_priv; + + + /* XXX we're not actually using these at this time: */ + int chipset; + int minor; + int drmFD; + + __DRIframebuffer framebuffer; + drm_handle_t hSAREA; + drmAddress pSAREA; + char *busID; + drm_magic_t magic; +}; + + +/** subclass of _EGLContext */ +struct xdri_egl_context +{ + _EGLContext Base; /**< base class */ + + __DRIcontext driContext; + + GLint bound_tex_object; +}; + + +/** subclass of _EGLSurface */ +struct xdri_egl_surface +{ + _EGLSurface Base; /**< base class */ + + __DRIid driDrawable; /**< DRI surface */ + drm_drawable_t hDrawable; +}; + + +/** subclass of _EGLConfig */ +struct xdri_egl_config +{ + _EGLConfig Base; /**< base class */ + + const __GLcontextModes *mode; /**< corresponding GLX mode */ +}; + + + +/** cast wrapper */ +static struct xdri_egl_driver * +xdri_egl_driver(_EGLDriver *drv) +{ + return (struct xdri_egl_driver *) drv; +} + + +/** Map EGLSurface handle to xdri_egl_surface object */ +static struct xdri_egl_surface * +lookup_surface(EGLSurface surf) +{ + _EGLSurface *surface = _eglLookupSurface(surf); + return (struct xdri_egl_surface *) surface; +} + + +/** Map EGLContext handle to xdri_egl_context object */ +static struct xdri_egl_context * +lookup_context(EGLContext c) +{ + _EGLContext *context = _eglLookupContext(c); + return (struct xdri_egl_context *) context; +} + +static struct xdri_egl_context * +current_context(void) +{ + return (struct xdri_egl_context *) _eglGetCurrentContext(); +} + +/** Map EGLConfig handle to xdri_egl_config object */ +static struct xdri_egl_config * +lookup_config(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config) +{ + _EGLConfig *conf = _eglLookupConfig(drv, dpy, config); + return (struct xdri_egl_config *) conf; +} + + + +/** Get size of given window */ +static Status +get_drawable_size(Display *dpy, Drawable d, uint *width, uint *height) +{ + Window root; + Status stat; + int xpos, ypos; + unsigned int w, h, bw, depth; + stat = XGetGeometry(dpy, d, &root, &xpos, &ypos, &w, &h, &bw, &depth); + *width = w; + *height = h; + return stat; +} + + +/** + * Produce a set of EGL configs. + * Note that we get the list of GLcontextModes from the GLX library. + * This dependency on GLX lib will be removed someday. + */ +static void +create_configs(_EGLDisplay *disp, __GLXdisplayPrivate *glx_priv) +{ + static const EGLint all_apis = (EGL_OPENGL_ES_BIT | + EGL_OPENGL_ES2_BIT | + EGL_OPENVG_BIT | + EGL_OPENGL_BIT); + __GLXscreenConfigs *scrn = glx_priv->screenConfigs; + const __GLcontextModes *m; + int id = 1; + + for (m = scrn->configs; m; m = m->next) { + /* EGL requires double-buffered configs */ + if (m->doubleBufferMode) { + struct xdri_egl_config *config = CALLOC_STRUCT(xdri_egl_config); + + _eglInitConfig(&config->Base, id++); + + SET_CONFIG_ATTRIB(&config->Base, EGL_BUFFER_SIZE, m->rgbBits); + SET_CONFIG_ATTRIB(&config->Base, EGL_RED_SIZE, m->redBits); + SET_CONFIG_ATTRIB(&config->Base, EGL_GREEN_SIZE, m->greenBits); + SET_CONFIG_ATTRIB(&config->Base, EGL_BLUE_SIZE, m->blueBits); + SET_CONFIG_ATTRIB(&config->Base, EGL_ALPHA_SIZE, m->alphaBits); + SET_CONFIG_ATTRIB(&config->Base, EGL_DEPTH_SIZE, m->depthBits); + SET_CONFIG_ATTRIB(&config->Base, EGL_STENCIL_SIZE, m->stencilBits); + SET_CONFIG_ATTRIB(&config->Base, EGL_SAMPLES, m->samples); + SET_CONFIG_ATTRIB(&config->Base, EGL_SAMPLE_BUFFERS, m->sampleBuffers); + SET_CONFIG_ATTRIB(&config->Base, EGL_NATIVE_VISUAL_ID, m->visualID); + SET_CONFIG_ATTRIB(&config->Base, EGL_NATIVE_VISUAL_TYPE, m->visualType); + SET_CONFIG_ATTRIB(&config->Base, EGL_CONFORMANT, all_apis); + SET_CONFIG_ATTRIB(&config->Base, EGL_RENDERABLE_TYPE, all_apis); + /* XXX only window rendering allowed ATM */ + SET_CONFIG_ATTRIB(&config->Base, EGL_SURFACE_TYPE, + (EGL_WINDOW_BIT | EGL_PBUFFER_BIT)); + + /* XXX possibly other things to init... */ + + /* Ptr from EGL config to GLcontextMode. Used in CreateContext(). */ + config->mode = m; + + _eglAddConfig(disp, &config->Base); + } + } +} + + +/** + * Called via __DRIinterfaceMethods object + */ +static __DRIfuncPtr +dri_get_proc_address(const char * proc_name) +{ + return NULL; +} + + +static void +dri_context_modes_destroy(__GLcontextModes *modes) +{ + _eglLog(_EGL_DEBUG, "%s", __FUNCTION__); + + while (modes) { + __GLcontextModes * const next = modes->next; + free(modes); + modes = next; + } +} + + +/** + * Create a linked list of 'count' GLcontextModes. + * These are used during the client/server visual negotiation phase, + * then discarded. + */ +static __GLcontextModes * +dri_context_modes_create(unsigned count, size_t minimum_size) +{ + /* This code copied from libGLX, and modified */ + const size_t size = (minimum_size > sizeof(__GLcontextModes)) + ? minimum_size : sizeof(__GLcontextModes); + __GLcontextModes * head = NULL; + __GLcontextModes ** next; + unsigned i; + + next = & head; + for (i = 0 ; i < count ; i++) { + *next = (__GLcontextModes *) calloc(1, size); + if (*next == NULL) { + dri_context_modes_destroy(head); + head = NULL; + break; + } + + (*next)->doubleBufferMode = 1; + (*next)->visualID = GLX_DONT_CARE; + (*next)->visualType = GLX_DONT_CARE; + (*next)->visualRating = GLX_NONE; + (*next)->transparentPixel = GLX_NONE; + (*next)->transparentRed = GLX_DONT_CARE; + (*next)->transparentGreen = GLX_DONT_CARE; + (*next)->transparentBlue = GLX_DONT_CARE; + (*next)->transparentAlpha = GLX_DONT_CARE; + (*next)->transparentIndex = GLX_DONT_CARE; + (*next)->xRenderable = GLX_DONT_CARE; + (*next)->fbconfigID = GLX_DONT_CARE; + (*next)->swapMethod = GLX_SWAP_UNDEFINED_OML; + (*next)->bindToTextureRgb = GLX_DONT_CARE; + (*next)->bindToTextureRgba = GLX_DONT_CARE; + (*next)->bindToMipmapTexture = GLX_DONT_CARE; + (*next)->bindToTextureTargets = 0; + (*next)->yInverted = GLX_DONT_CARE; + + next = & ((*next)->next); + } + + return head; +} + + +static __DRIscreen * +dri_find_dri_screen(__DRInativeDisplay *ndpy, int scrn) +{ + __GLXdisplayPrivate *priv = __glXInitialize(ndpy); + __GLXscreenConfigs *scrnConf = priv->screenConfigs; + return &scrnConf->driScreen; +} + + +static GLboolean +dri_window_exists(__DRInativeDisplay *ndpy, __DRIid draw) +{ + return EGL_TRUE; +} + + +static GLboolean +dri_create_context(__DRInativeDisplay *ndpy, int screenNum, int configID, + void * contextID, drm_context_t * hw_context) +{ + assert(configID >= 0); + return XF86DRICreateContextWithConfig(ndpy, screenNum, + configID, contextID, hw_context); +} + + +static GLboolean +dri_destroy_context(__DRInativeDisplay * ndpy, int screen, __DRIid context) +{ + return XF86DRIDestroyContext(ndpy, screen, context); +} + + +static GLboolean +dri_create_drawable(__DRInativeDisplay * ndpy, int screen, + __DRIid drawable, drm_drawable_t * hHWDrawable) +{ + _eglLog(_EGL_DEBUG, "XDRI: %s", __FUNCTION__); + + /* Create DRI drawable for given window ID (drawable) */ + if (!XF86DRICreateDrawable(ndpy, screen, drawable, hHWDrawable)) + return EGL_FALSE; + + return EGL_TRUE; +} + + +static GLboolean +dri_destroy_drawable(__DRInativeDisplay * ndpy, int screen, __DRIid drawable) +{ + _eglLog(_EGL_DEBUG, "XDRI: %s", __FUNCTION__); + return XF86DRIDestroyDrawable(ndpy, screen, drawable); +} + + +static GLboolean +dri_get_drawable_info(__DRInativeDisplay *ndpy, int scrn, + __DRIid draw, unsigned int * index, unsigned int * stamp, + int * x, int * y, int * width, int * height, + int * numClipRects, drm_clip_rect_t ** pClipRects, + int * backX, int * backY, + int * numBackClipRects, + drm_clip_rect_t ** pBackClipRects) +{ + _eglLog(_EGL_DEBUG, "XDRI: %s", __FUNCTION__); + + if (!XF86DRIGetDrawableInfo(ndpy, scrn, draw, index, stamp, + x, y, width, height, + numClipRects, pClipRects, + backX, backY, + numBackClipRects, pBackClipRects)) { + return EGL_FALSE; + } + + return EGL_TRUE; +} + + +/** + * Table of functions exported by the loader to the driver. + */ +static const __DRIinterfaceMethods interface_methods = { + dri_get_proc_address, + + dri_context_modes_create, + dri_context_modes_destroy, + + dri_find_dri_screen, + dri_window_exists, + + dri_create_context, + dri_destroy_context, + + dri_create_drawable, + dri_destroy_drawable, + dri_get_drawable_info, + + NULL,/*__eglGetUST,*/ + NULL,/*__eglGetMSCRate,*/ +}; + + + +static EGLBoolean +init_drm(struct xdri_egl_driver *xdri_drv, _EGLDisplay *disp) +{ + __DRIversion ddx_version; + __DRIversion dri_version; + __DRIversion drm_version; + drmVersionPtr version; + drm_handle_t hFB; + int newlyopened; + int status; + int scrn = DefaultScreen(disp->Xdpy); + +#if 0 + createNewScreen = (PFNCREATENEWSCREENFUNC) + dlsym(xdri_drv->dri_driver_handle, createNewScreenName); + if (!createNewScreen) { + _eglLog(_EGL_WARNING, "XDRI: Couldn't find %s function in the driver.", + createNewScreenName); + return EGL_FALSE; + } + else { + _eglLog(_EGL_DEBUG, "XDRI: Found %s", createNewScreenName); + } +#endif + + /* + * Get the DRI X extension version. + */ + dri_version.major = 4; + dri_version.minor = 0; + dri_version.patch = 0; + + if (!XF86DRIOpenConnection(disp->Xdpy, scrn, + &xdri_drv->hSAREA, &xdri_drv->busID)) { + _eglLog(_EGL_WARNING, "XF86DRIOpenConnection failed"); + } + + xdri_drv->drmFD = drmOpenOnce(NULL, xdri_drv->busID, &newlyopened); + if (xdri_drv->drmFD < 0) { + perror("drmOpenOnce failed: "); + return EGL_FALSE; + } + else { + _eglLog(_EGL_DEBUG, "XDRI: drmOpenOnce returned %d", xdri_drv->drmFD); + } + + + if (drmGetMagic(xdri_drv->drmFD, &xdri_drv->magic)) { + perror("drmGetMagic failed: "); + return EGL_FALSE; + } + + version = drmGetVersion(xdri_drv->drmFD); + if (version) { + drm_version.major = version->version_major; + drm_version.minor = version->version_minor; + drm_version.patch = version->version_patchlevel; + drmFreeVersion(version); + _eglLog(_EGL_DEBUG, "XDRI: Got DRM version %d.%d.%d", + drm_version.major, + drm_version.minor, + drm_version.patch); + } + else { + drm_version.major = -1; + drm_version.minor = -1; + drm_version.patch = -1; + _eglLog(_EGL_WARNING, "XDRI: drmGetVersion() failed"); + return EGL_FALSE; + } + + /* Authenticate w/ server. + */ + if (!XF86DRIAuthConnection(disp->Xdpy, scrn, xdri_drv->magic)) { + _eglLog(_EGL_WARNING, "XDRI: XF86DRIAuthConnection() failed"); + return EGL_FALSE; + } + else { + _eglLog(_EGL_DEBUG, "XDRI: XF86DRIAuthConnection() success"); + } + + /* Get ddx version. + */ + { + char *driverName; + + /* + * Get device name (like "tdfx") and the ddx version + * numbers. We'll check the version in each DRI driver's + * "createNewScreen" function. + */ + if (!XF86DRIGetClientDriverName(disp->Xdpy, scrn, + &ddx_version.major, + &ddx_version.minor, + &ddx_version.patch, + &driverName)) { + _eglLog(_EGL_WARNING, "XDRI: XF86DRIGetClientDriverName failed"); + return EGL_FALSE; + } + else { + _eglLog(_EGL_DEBUG, "XDRI: XF86DRIGetClientDriverName returned %s", driverName); + } + } + + /* Get framebuffer info. + */ + { + int junk; + if (!XF86DRIGetDeviceInfo(disp->Xdpy, scrn, + &hFB, + &junk, + &xdri_drv->framebuffer.size, + &xdri_drv->framebuffer.stride, + &xdri_drv->framebuffer.dev_priv_size, + &xdri_drv->framebuffer.dev_priv)) { + _eglLog(_EGL_WARNING, "XDRI: XF86DRIGetDeviceInfo() failed"); + return EGL_FALSE; + } + else { + _eglLog(_EGL_DEBUG, "XDRI: XF86DRIGetDeviceInfo() success"); + } + xdri_drv->framebuffer.width = DisplayWidth(disp->Xdpy, scrn); + xdri_drv->framebuffer.height = DisplayHeight(disp->Xdpy, scrn); + } + + /* Map the framebuffer region. (this may not be needed) + */ + status = drmMap(xdri_drv->drmFD, hFB, xdri_drv->framebuffer.size, + (drmAddressPtr) &xdri_drv->framebuffer.base); + if (status != 0) { + _eglLog(_EGL_WARNING, "XDRI: drmMap(framebuffer) failed"); + return EGL_FALSE; + } + else { + _eglLog(_EGL_DEBUG, "XDRI: drmMap(framebuffer) success"); + } + + /* Map the SAREA region. + */ + status = drmMap(xdri_drv->drmFD, xdri_drv->hSAREA, SAREA_MAX, &xdri_drv->pSAREA); + if (status != 0) { + _eglLog(_EGL_WARNING, "XDRI: drmMap(sarea) failed"); + return EGL_FALSE; + } + else { + _eglLog(_EGL_DEBUG, "XDRI: drmMap(sarea) success"); + } + + return EGL_TRUE; +} + + +/** + * Load the DRI driver named by "xdri_drv->dri_driver_name". + * Basically, dlopen() the library to set "xdri_drv->dri_driver_handle". + * + * Later, we'll call dlsym(createNewScreenName) to get a pointer to + * the driver's createNewScreen() function which is the bootstrap function. + * + * \return EGL_TRUE for success, EGL_FALSE for failure + */ +static EGLBoolean +load_dri_driver(struct xdri_egl_driver *xdri_drv) +{ + char filename[100]; + int flags = RTLD_NOW; + + /* try "egl_xxx_dri.so" first */ + snprintf(filename, sizeof(filename), "egl_%s.so", xdri_drv->dri_driver_name); + _eglLog(_EGL_DEBUG, "XDRI: dlopen(%s)", filename); + xdri_drv->dri_driver_handle = dlopen(filename, flags); + if (xdri_drv->dri_driver_handle) { + _eglLog(_EGL_DEBUG, "XDRI: dlopen(%s) OK", filename); + return EGL_TRUE; + } + else { + _eglLog(_EGL_DEBUG, "XDRI: dlopen(%s) fail (%s)", filename, dlerror()); + } + + /* try regular "xxx_dri.so" next */ + snprintf(filename, sizeof(filename), "%s.so", xdri_drv->dri_driver_name); + _eglLog(_EGL_DEBUG, "XDRI: dlopen(%s)", filename); + xdri_drv->dri_driver_handle = dlopen(filename, flags); + if (xdri_drv->dri_driver_handle) { + _eglLog(_EGL_DEBUG, "XDRI: dlopen(%s) OK", filename); + return EGL_TRUE; + } + + _eglLog(_EGL_WARNING, "XDRI Could not open %s (%s)", filename, dlerror()); + return EGL_FALSE; +} + + +/** + * Called via eglInitialize(), xdri_drv->API.Initialize(). + */ +static EGLBoolean +xdri_eglInitialize(_EGLDriver *drv, EGLDisplay dpy, + EGLint *minor, EGLint *major) +{ + struct xdri_egl_driver *xdri_drv = xdri_egl_driver(drv); + _EGLDisplay *disp = _eglLookupDisplay(dpy); + static char name[100]; + + _eglLog(_EGL_DEBUG, "XDRI: eglInitialize"); + + if (!disp->Xdpy) { + disp->Xdpy = XOpenDisplay(NULL); + if (!disp->Xdpy) { + _eglLog(_EGL_WARNING, "XDRI: XOpenDisplay failed"); + return EGL_FALSE; + } + } + +#if 0 + /* choose the DRI driver to load */ + xdri_drv->dri_driver_name = _eglChooseDRMDriver(0); + if (!load_dri_driver(xdri_drv)) + return EGL_FALSE; +#else + (void) load_dri_driver; +#endif + +#if 0 + if (!init_drm(xdri_drv, disp)) + return EGL_FALSE; +#else + (void) init_drm; +#endif + + /* + * NOTE: this call to __glXInitialize() bootstraps the whole GLX/DRI + * interface, loads the DRI driver, etc. + * This replaces the load_dri_driver() and init_drm() code above. + */ + xdri_drv->glx_priv = __glXInitialize(disp->Xdpy); + + create_configs(disp, xdri_drv->glx_priv); + + xdri_drv->Base.Initialized = EGL_TRUE; + + if (xdri_drv->dri_driver_name) + snprintf(name, sizeof(name), "X/DRI:%s", xdri_drv->dri_driver_name); + else + snprintf(name, sizeof(name), "X/DRI"); + xdri_drv->Base.Name = name; + + /* we're supporting EGL 1.4 */ + *minor = 1; + *major = 4; + + return EGL_TRUE; +} + + +/* + * Do some clean-up that normally occurs in XCloseDisplay(). + * We do this here because we're about to unload a dynamic library + * that has added some per-display extension data and callbacks. + * If we don't do this here we'll crash in XCloseDisplay() because it'll + * try to call functions that went away when the driver library was unloaded. + */ +static void +FreeDisplayExt(Display *dpy) +{ + _XExtension *ext, *next; + + for (ext = dpy->ext_procs; ext; ext = next) { + next = ext->next; + if (ext->close_display) { + ext->close_display(dpy, &ext->codes); + ext->close_display = NULL; + } + if (ext->name) + Xfree(ext->name); + Xfree(ext); + } + dpy->ext_procs = NULL; + + _XFreeExtData (dpy->ext_data); + dpy->ext_data = NULL; +} + + +/** + * Called via eglTerminate(), drv->API.Terminate(). + */ +static EGLBoolean +xdri_eglTerminate(_EGLDriver *drv, EGLDisplay dpy) +{ + struct xdri_egl_driver *xdri_drv = xdri_egl_driver(drv); + _EGLDisplay *disp = _eglLookupDisplay(dpy); + + _eglLog(_EGL_DEBUG, "XDRI: eglTerminate"); + + _eglLog(_EGL_DEBUG, "XDRI: Closing %s", xdri_drv->dri_driver_name); + + FreeDisplayExt(disp->Xdpy); + +#if 0 + /* this causes a segfault for some reason */ + dlclose(xdri_drv->dri_driver_handle); +#endif + xdri_drv->dri_driver_handle = NULL; + + free((void*) xdri_drv->dri_driver_name); + + return EGL_TRUE; +} + + +/* + * Called from eglGetProcAddress() via drv->API.GetProcAddress(). + */ +static _EGLProc +xdri_eglGetProcAddress(const char *procname) +{ +#if 0 + _EGLDriver *drv = NULL; + + struct xdri_egl_driver *xdri_drv = xdri_egl_driver(drv); + /*_EGLDisplay *disp = _eglLookupDisplay(dpy);*/ + _EGLProc *proc = xdri_drv->driScreen.getProcAddress(procname); + return proc; +#elif 1 + /* This is a bit of a hack to get at the gallium/Mesa state tracker + * function st_get_proc_address(). This will probably change at + * some point. + */ + _EGLProc (*st_get_proc_addr)(const char *procname); + st_get_proc_addr = dlsym(NULL, "st_get_proc_address"); + if (st_get_proc_addr) { + return st_get_proc_addr(procname); + } + return NULL; +#else + return NULL; +#endif +} + + +/** + * Called via eglCreateContext(), drv->API.CreateContext(). + */ +static EGLContext +xdri_eglCreateContext(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, + EGLContext share_list, const EGLint *attrib_list) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + struct xdri_egl_config *xdri_config = lookup_config(drv, dpy, config); + void *shared = NULL; + int renderType = GLX_RGBA_BIT; + + struct xdri_egl_context *xdri_ctx = CALLOC_STRUCT(xdri_egl_context); + if (!xdri_ctx) + return EGL_NO_CONTEXT; + + if (!_eglInitContext(drv, dpy, &xdri_ctx->Base, config, attrib_list)) { + free(xdri_ctx); + return EGL_NO_CONTEXT; + } + + assert(xdri_config); + + { + struct xdri_egl_driver *xdri_drv = xdri_egl_driver(drv); + __GLXscreenConfigs *scrnConf = xdri_drv->glx_priv->screenConfigs; + xdri_ctx->driContext.private = + scrnConf->driScreen.createNewContext(disp->Xdpy, + xdri_config->mode, renderType, + shared, &xdri_ctx->driContext); + } + + if (!xdri_ctx->driContext.private) { + _eglLog(_EGL_DEBUG, "driScreen.createNewContext failed"); + free(xdri_ctx); + return EGL_NO_CONTEXT; + } + + xdri_ctx->driContext.mode = xdri_config->mode; + + return _eglGetContextHandle(&xdri_ctx->Base); +} + + +/** + * Called via eglMakeCurrent(), drv->API.MakeCurrent(). + */ +static EGLBoolean +xdri_eglMakeCurrent(_EGLDriver *drv, EGLDisplay dpy, EGLSurface d, + EGLSurface r, EGLContext context) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + struct xdri_egl_context *xdri_ctx = lookup_context(context); + struct xdri_egl_surface *xdri_draw = lookup_surface(d); + struct xdri_egl_surface *xdri_read = lookup_surface(r); + __DRIid draw = xdri_draw ? xdri_draw->driDrawable : 0; + __DRIid read = xdri_read ? xdri_read->driDrawable : 0; + int scrn = DefaultScreen(disp->Xdpy); + + if (!_eglMakeCurrent(drv, dpy, d, r, context)) + return EGL_FALSE; + + + if (xdri_ctx && + !xdri_ctx->driContext.bindContext(disp->Xdpy, scrn, draw, read, + &xdri_ctx->driContext)) { + return EGL_FALSE; + } + + return EGL_TRUE; +} + + +/** + * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface(). + */ +static EGLSurface +xdri_eglCreateWindowSurface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, + NativeWindowType window, const EGLint *attrib_list) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + struct xdri_egl_surface *xdri_surf; + int scrn = DefaultScreen(disp->Xdpy); + uint width, height; + + xdri_surf = CALLOC_STRUCT(xdri_egl_surface); + if (!xdri_surf) + return EGL_NO_SURFACE; + + if (!_eglInitSurface(drv, dpy, &xdri_surf->Base, EGL_WINDOW_BIT, + config, attrib_list)) { + free(xdri_surf); + return EGL_FALSE; + } + + if (!XF86DRICreateDrawable(disp->Xdpy, scrn, window, &xdri_surf->hDrawable)) { + free(xdri_surf); + return EGL_FALSE; + } + + xdri_surf->driDrawable = window; + + _eglSaveSurface(&xdri_surf->Base); + + get_drawable_size(disp->Xdpy, window, &width, &height); + xdri_surf->Base.Width = width; + xdri_surf->Base.Height = height; + + _eglLog(_EGL_DEBUG, + "XDRI: CreateWindowSurface win 0x%x handle %d hDrawable %d", + (int) window, _eglGetSurfaceHandle(&xdri_surf->Base), + (int) xdri_surf->hDrawable); + + return _eglGetSurfaceHandle(&xdri_surf->Base); +} + + +/** + * Called via eglCreatePbufferSurface(), drv->API.CreatePbufferSurface(). + */ +static EGLSurface +xdri_eglCreatePbufferSurface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, + const EGLint *attrib_list) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + struct xdri_egl_surface *xdri_surf; + struct xdri_egl_config *xdri_config = lookup_config(drv, dpy, config); + int scrn = DefaultScreen(disp->Xdpy); + Window window; + + xdri_surf = CALLOC_STRUCT(xdri_egl_surface); + if (!xdri_surf) + return EGL_NO_SURFACE; + + if (!_eglInitSurface(drv, dpy, &xdri_surf->Base, EGL_PBUFFER_BIT, + config, attrib_list)) { + free(xdri_surf); + return EGL_FALSE; + } + + /* Create a dummy X window */ + { + Window root = RootWindow(disp->Xdpy, scrn); + XSetWindowAttributes attr; + XVisualInfo *visInfo, visTemplate; + unsigned mask; + int nvis; + + visTemplate.visualid = xdri_config->mode->visualID; + visInfo = XGetVisualInfo(disp->Xdpy, VisualIDMask, &visTemplate, &nvis); + if (!visInfo) { + return EGL_NO_SURFACE; + } + + attr.background_pixel = 0; + attr.border_pixel = 0; + attr.colormap = XCreateColormap(disp->Xdpy, root, + visInfo->visual, AllocNone); + attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask; + mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask; + + window = XCreateWindow(disp->Xdpy, root, 0, 0, + xdri_surf->Base.Width, xdri_surf->Base.Height, + 0, visInfo->depth, InputOutput, + visInfo->visual, mask, &attr); + + /*XMapWindow(disp->Xdpy, window);*/ + XFree(visInfo); + + /* set hints and properties */ + /* + sizehints.width = xdri_surf->Base.Width; + sizehints.height = xdri_surf->Base.Height; + sizehints.flags = USPosition; + XSetNormalHints(disp->Xdpy, window, &sizehints); + */ + } + + if (!XF86DRICreateDrawable(disp->Xdpy, scrn, window, &xdri_surf->hDrawable)) { + free(xdri_surf); + return EGL_FALSE; + } + + xdri_surf->driDrawable = window; + + _eglSaveSurface(&xdri_surf->Base); + + _eglLog(_EGL_DEBUG, + "XDRI: CreatePbufferSurface handle %d hDrawable %d", + _eglGetSurfaceHandle(&xdri_surf->Base), + (int) xdri_surf->hDrawable); + + return _eglGetSurfaceHandle(&xdri_surf->Base); +} + + + +static EGLBoolean +xdri_eglDestroySurface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface) +{ + struct xdri_egl_surface *xdri_surf = lookup_surface(surface); + if (xdri_surf) { + _eglHashRemove(_eglGlobal.Surfaces, (EGLuint) surface); + if (xdri_surf->Base.IsBound) { + xdri_surf->Base.DeletePending = EGL_TRUE; + } + else { + /* + st_unreference_framebuffer(surf->Framebuffer); + */ + free(xdri_surf); + } + return EGL_TRUE; + } + else { + _eglError(EGL_BAD_SURFACE, "eglDestroySurface"); + return EGL_FALSE; + } +} + + +static EGLBoolean +xdri_eglBindTexImage(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface, + EGLint buffer) +{ + typedef int (*bind_teximage)(__DRInativeDisplay *dpy, + __DRIid surface, __DRIscreen *psc, + int buffer, int target, int format, + int level, int mipmap); + + bind_teximage egl_dri_bind_teximage; + + _EGLDisplay *disp = _eglLookupDisplay(dpy); + + struct xdri_egl_context *xdri_ctx = current_context(); + struct xdri_egl_driver *xdri_drv = xdri_egl_driver(drv); + struct xdri_egl_surface *xdri_surf = lookup_surface(surface); + + __DRIid dri_surf = xdri_surf ? xdri_surf->driDrawable : 0; + + __GLXscreenConfigs *scrnConf = xdri_drv->glx_priv->screenConfigs; + __DRIscreen *psc = &scrnConf->driScreen; + + /* this call just does error checking */ + if (!_eglBindTexImage(drv, dpy, surface, buffer)) { + return EGL_FALSE; + } + + egl_dri_bind_teximage = + (bind_teximage) dlsym(NULL, "egl_dri_bind_teximage"); + if (egl_dri_bind_teximage) { + return egl_dri_bind_teximage(disp->Xdpy, dri_surf, psc, + buffer, + xdri_surf->Base.TextureTarget, + xdri_surf->Base.TextureFormat, + xdri_surf->Base.MipmapLevel, + xdri_surf->Base.MipmapTexture); + } + else { + /* fallback path based on glCopyTexImage() */ + /* Get/save currently bound 2D texobj name */ + glGetIntegerv_t glGetIntegerv_func = + (glGetIntegerv_t) dlsym(NULL, "glGetIntegerv"); + GLint curTexObj = 0; + if (glGetIntegerv_func) { + (*glGetIntegerv_func)(GL_TEXTURE_BINDING_2D, &curTexObj); + } + xdri_ctx->bound_tex_object = curTexObj; + } + + return EGL_FALSE; +} + + +static EGLBoolean +xdri_eglReleaseTexImage(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface, + EGLint buffer) +{ + typedef int (*release_teximage)(__DRInativeDisplay *dpy, + __DRIid surface, __DRIscreen *psc, + int buffer, int target, int format, + int level, int mipmap); + release_teximage egl_dri_release_teximage; + + _EGLDisplay *disp = _eglLookupDisplay(dpy); + + struct xdri_egl_context *xdri_ctx = current_context(); + struct xdri_egl_driver *xdri_drv = xdri_egl_driver(drv); + struct xdri_egl_surface *xdri_surf = lookup_surface(surface); + + __DRIid dri_surf = xdri_surf ? xdri_surf->driDrawable : 0; + + __GLXscreenConfigs *scrnConf = xdri_drv->glx_priv->screenConfigs; + __DRIscreen *psc = &scrnConf->driScreen; + + /* this call just does error checking */ + if (!_eglReleaseTexImage(drv, dpy, surface, buffer)) { + return EGL_FALSE; + } + + egl_dri_release_teximage = + (release_teximage) dlsym(NULL, "egl_dri_release_teximage"); + if (egl_dri_release_teximage) { + return egl_dri_release_teximage(disp->Xdpy, dri_surf, psc, + buffer, + xdri_surf->Base.TextureTarget, + xdri_surf->Base.TextureFormat, + xdri_surf->Base.MipmapLevel, + xdri_surf->Base.MipmapTexture); + } + else { + /* fallback path based on glCopyTexImage() */ + glGetIntegerv_t glGetIntegerv_func = + (glGetIntegerv_t) dlsym(NULL, "glGetIntegerv"); + glBindTexture_t glBindTexture_func = + (glBindTexture_t) dlsym(NULL, "glBindTexture"); + glCopyTexImage2D_t glCopyTexImage2D_func = + (glCopyTexImage2D_t) dlsym(NULL, "glCopyTexImage2D"); + GLint curTexObj; + GLenum intFormat; + GLint level, width, height; + + if (xdri_surf->Base.TextureFormat == EGL_TEXTURE_RGBA) + intFormat = GL_RGBA; + else + intFormat = GL_RGB; + level = xdri_surf->Base.MipmapLevel; + width = xdri_surf->Base.Width >> level; + height = xdri_surf->Base.Height >> level; + + if (width > 0 && height > 0 && + glGetIntegerv_func && glBindTexture_func && glCopyTexImage2D_func) { + glGetIntegerv_func(GL_TEXTURE_BINDING_2D, &curTexObj); + /* restore texobj from time of eglBindTexImage() call */ + if (curTexObj != xdri_ctx->bound_tex_object) + glBindTexture_func(GL_TEXTURE_2D, xdri_ctx->bound_tex_object); + /* copy pbuffer image to texture */ + glCopyTexImage2D_func(GL_TEXTURE_2D, + level, + intFormat, + 0, 0, width, height, 0); + /* restore current texture */ + if (curTexObj != xdri_ctx->bound_tex_object) + glBindTexture_func(GL_TEXTURE_2D, curTexObj); + } + xdri_ctx->bound_tex_object = -1; + } + + return EGL_FALSE; +} + + +static EGLBoolean +xdri_eglSwapBuffers(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + + _eglLog(_EGL_DEBUG, "XDRI: EGL SwapBuffers"); + + /* error checking step: */ + if (!_eglSwapBuffers(drv, dpy, draw)) + return EGL_FALSE; + + { + struct xdri_egl_surface *xdri_surf = lookup_surface(draw); + struct xdri_egl_driver *xdri_drv = xdri_egl_driver(drv); + __GLXscreenConfigs *scrnConf = xdri_drv->glx_priv->screenConfigs; + __DRIscreen *psc = &scrnConf->driScreen; + __DRIdrawable * const pdraw = psc->getDrawable(disp->Xdpy, + xdri_surf->driDrawable, + psc->private); + + if (pdraw) + pdraw->swapBuffers(disp->Xdpy, pdraw->private); + else + _eglLog(_EGL_WARNING, "pdraw is null in SwapBuffers"); + } + + return EGL_TRUE; +} + + +/** + * This is the main entrypoint into the driver, called by libEGL. + * Create a new _EGLDriver object and init its dispatch table. + */ +_EGLDriver * +_eglMain(_EGLDisplay *disp, const char *args) +{ + struct xdri_egl_driver *xdri_drv = CALLOC_STRUCT(xdri_egl_driver); + if (!xdri_drv) + return NULL; + + /* Tell libGL to prefer the EGL drivers over regular DRI drivers */ + __glXPreferEGL(1); + + _eglInitDriverFallbacks(&xdri_drv->Base); + xdri_drv->Base.API.Initialize = xdri_eglInitialize; + xdri_drv->Base.API.Terminate = xdri_eglTerminate; + + xdri_drv->Base.API.GetProcAddress = xdri_eglGetProcAddress; + + xdri_drv->Base.API.CreateContext = xdri_eglCreateContext; + xdri_drv->Base.API.MakeCurrent = xdri_eglMakeCurrent; + xdri_drv->Base.API.CreateWindowSurface = xdri_eglCreateWindowSurface; + xdri_drv->Base.API.CreatePbufferSurface = xdri_eglCreatePbufferSurface; + xdri_drv->Base.API.DestroySurface = xdri_eglDestroySurface; + xdri_drv->Base.API.BindTexImage = xdri_eglBindTexImage; + xdri_drv->Base.API.ReleaseTexImage = xdri_eglReleaseTexImage; + xdri_drv->Base.API.SwapBuffers = xdri_eglSwapBuffers; + + xdri_drv->Base.ClientAPIsMask = (EGL_OPENGL_BIT | + EGL_OPENGL_ES_BIT | + EGL_OPENGL_ES2_BIT | + EGL_OPENVG_BIT); + xdri_drv->Base.Name = "X/DRI"; + + _eglLog(_EGL_DEBUG, "XDRI: main(%s)", args); + + return &xdri_drv->Base; +} diff --git a/src/egl/main/Makefile b/src/egl/main/Makefile index 7ab78197c7..8ea60c0199 100644 --- a/src/egl/main/Makefile +++ b/src/egl/main/Makefile @@ -8,34 +8,49 @@ INCLUDE_DIRS = -I$(TOP)/include -I$(TOP)/src/mesa/glapi HEADERS = \ eglconfig.h \ + eglconfigutil.h \ eglcontext.h \ + egldefines.h \ egldisplay.h \ egldriver.h \ eglglobals.h \ egllog.h \ eglhash.h \ + eglmisc.h \ eglmode.h \ eglscreen.h \ - eglsurface.h + eglstring.h \ + eglsurface.h \ + eglx.h SOURCES = \ eglapi.c \ eglconfig.c \ + eglconfigutil.c \ eglcontext.c \ egldisplay.c \ egldriver.c \ eglglobals.c \ egllog.c \ eglhash.c \ + eglmisc.c \ eglmode.c \ eglscreen.c \ - eglsurface.c + eglstring.c \ + eglsurface.c \ + eglx.c OBJECTS = $(SOURCES:.c=.o) +# Undefined for now +LOCAL_CFLAGS = -D_EGL_PLATFORM_X=1 + +LIBS = -lX11 + + .c.o: - $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(LOCAL_CFLAGS) $< -o $@ @@ -48,7 +63,7 @@ library: $(TOP)/$(LIB_DIR)/libEGL.so $(TOP)/$(LIB_DIR)/libEGL.so: $(OBJECTS) $(MKLIB) -o EGL -linker '$(CC)' -ldflags '$(LDFLAGS)' \ -major 1 -minor 0 \ - -install $(TOP)/$(LIB_DIR) -ldl $(OBJECTS) + -install $(TOP)/$(LIB_DIR) -ldl $(OBJECTS) $(LIBS) @@ -59,9 +74,11 @@ clean: depend: $(SOURCES) $(HEADERS) @ echo "running $(MKDEP)" + @ rm -f depend @ touch depend $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) \ - $(SOURCES) $(HEADERS) > /dev/null + $(SOURCES) $(HEADERS) > /dev/null 2>/dev/null + include depend # DO NOT DELETE diff --git a/src/egl/main/README.txt b/src/egl/main/README.txt new file mode 100644 index 0000000000..b3d253dd13 --- /dev/null +++ b/src/egl/main/README.txt @@ -0,0 +1,71 @@ + + +Notes about the EGL library: + + +The EGL code here basically consists of two things: + +1. An EGL API dispatcher. This directly routes all the eglFooBar() API + calls into driver-specific functions. + +2. Fallbacks for EGL API functions. A driver _could_ implement all the + EGL API calls from scratch. But in many cases, the fallbacks provided + in libEGL (such as eglChooseConfig()) will do the job. + + + +Bootstrapping: + +When the apps calls eglOpenDisplay() a device driver is selected and loaded +(look for dlsym() or LoadLibrary() in egldriver.c). + +The driver's _eglMain() function is then called. This driver function +allocates, initializes and returns a new _EGLDriver object (usually a +subclass of that type). + +As part of initialization, the dispatch table in _EGLDriver->API must be +populated with all the EGL entrypoints. Typically, _eglInitDriverFallbacks() +can be used to plug in default/fallback functions. Some functions like +driver->API.Initialize and driver->API.Terminate _must_ be implemented +with driver-specific code (no default/fallback function is possible). + + +A bit later, the app will call eglInitialize(). This will get routed +to the driver->API.Initialize() function. Any additional driver +initialization that wasn't done in _eglMain() should be done at this +point. Typically, this will involve setting up visual configs, etc. + + + +Special Functions: + +Certain EGL functions _must_ be implemented by the driver. This includes: + +eglCreateContext +eglCreateWindowSurface +eglCreatePixmapSurface +eglCreatePBufferSurface +eglMakeCurrent +eglSwapBuffers + +Most of the EGLConfig-related functions can be implemented with the +defaults/fallbacks. Same thing for the eglGet/Query functions. + + + + +Teardown: + +When eglTerminate() is called, the driver->API.Terminate() function is +called. The driver should clean up after itself. eglTerminate() will +then close/unload the driver (shared library). + + + + +Subclassing: + +The internal libEGL data structures such as _EGLDisplay, _EGLContext, +_EGLSurface, etc should be considered base classes from which drivers +will derive subclasses. + diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index bfa580e6c3..9df938e188 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -43,36 +43,63 @@ /** - * NOTE: displayName is treated as a string in _eglChooseDriver()!!! - * This will probably change! - * See _eglChooseDriver() for details! + * This is typically the first EGL function that an application calls. + * We initialize our global vars and create a private _EGLDisplay object. */ -EGLDisplay APIENTRY -eglGetDisplay(NativeDisplayType displayName) +EGLDisplay EGLAPIENTRY +eglGetDisplay(NativeDisplayType nativeDisplay) { _EGLDisplay *dpy; _eglInitGlobals(); - dpy = _eglNewDisplay(displayName); - if (dpy) - return dpy->Handle; - else - return EGL_NO_DISPLAY; + dpy = _eglNewDisplay(nativeDisplay); + return _eglGetDisplayHandle(dpy); } -EGLBoolean APIENTRY +/** + * This is typically the second EGL function that an application calls. + * Here we load/initialize the actual hardware driver. + */ +EGLBoolean EGLAPIENTRY eglInitialize(EGLDisplay dpy, EGLint *major, EGLint *minor) { + EGLint major_int, minor_int; + if (dpy) { - _EGLDriver *drv = _eglChooseDriver(dpy); - if (drv) - return drv->API.Initialize(drv, dpy, major, minor); + EGLBoolean retVal; + _EGLDisplay *dpyPriv = _eglLookupDisplay(dpy); + if (!dpyPriv) { + return EGL_FALSE; + } + dpyPriv->Driver = _eglOpenDriver(dpyPriv, + dpyPriv->DriverName, + dpyPriv->DriverArgs); + if (!dpyPriv->Driver) { + return EGL_FALSE; + } + /* Initialize the particular driver now */ + retVal = dpyPriv->Driver->API.Initialize(dpyPriv->Driver, dpy, + &major_int, &minor_int); + + dpyPriv->Driver->APImajor = major_int; + dpyPriv->Driver->APIminor = minor_int; + snprintf(dpyPriv->Driver->Version, sizeof(dpyPriv->Driver->Version), + "%d.%d (%s)", major_int, minor_int, dpyPriv->Driver->Name); + + /* Update applications version of major and minor if not NULL */ + if((major != NULL) && (minor != NULL)) + { + *major = major_int; + *minor = minor_int; + } + + return retVal; } return EGL_FALSE; } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglTerminate(EGLDisplay dpy) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -83,7 +110,7 @@ eglTerminate(EGLDisplay dpy) } -const char * APIENTRY +const char * EGLAPIENTRY eglQueryString(EGLDisplay dpy, EGLint name) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -94,7 +121,7 @@ eglQueryString(EGLDisplay dpy, EGLint name) } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglGetConfigs(EGLDisplay dpy, EGLConfig *configs, EGLint config_size, EGLint *num_config) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -103,7 +130,7 @@ eglGetConfigs(EGLDisplay dpy, EGLConfig *configs, EGLint config_size, EGLint *nu } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglChooseConfig(EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, EGLint config_size, EGLint *num_config) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -111,7 +138,7 @@ eglChooseConfig(EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, E } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglGetConfigAttrib(EGLDisplay dpy, EGLConfig config, EGLint attribute, EGLint *value) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -119,7 +146,7 @@ eglGetConfigAttrib(EGLDisplay dpy, EGLConfig config, EGLint attribute, EGLint *v } -EGLContext APIENTRY +EGLContext EGLAPIENTRY eglCreateContext(EGLDisplay dpy, EGLConfig config, EGLContext share_list, const EGLint *attrib_list) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -127,7 +154,7 @@ eglCreateContext(EGLDisplay dpy, EGLConfig config, EGLContext share_list, const } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglDestroyContext(EGLDisplay dpy, EGLContext ctx) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -135,7 +162,7 @@ eglDestroyContext(EGLDisplay dpy, EGLContext ctx) } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -143,7 +170,7 @@ eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx) } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglQueryContext(EGLDisplay dpy, EGLContext ctx, EGLint attribute, EGLint *value) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -151,7 +178,7 @@ eglQueryContext(EGLDisplay dpy, EGLContext ctx, EGLint attribute, EGLint *value) } -EGLSurface APIENTRY +EGLSurface EGLAPIENTRY eglCreateWindowSurface(EGLDisplay dpy, EGLConfig config, NativeWindowType window, const EGLint *attrib_list) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -159,7 +186,7 @@ eglCreateWindowSurface(EGLDisplay dpy, EGLConfig config, NativeWindowType window } -EGLSurface APIENTRY +EGLSurface EGLAPIENTRY eglCreatePixmapSurface(EGLDisplay dpy, EGLConfig config, NativePixmapType pixmap, const EGLint *attrib_list) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -167,7 +194,7 @@ eglCreatePixmapSurface(EGLDisplay dpy, EGLConfig config, NativePixmapType pixmap } -EGLSurface APIENTRY +EGLSurface EGLAPIENTRY eglCreatePbufferSurface(EGLDisplay dpy, EGLConfig config, const EGLint *attrib_list) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -175,7 +202,7 @@ eglCreatePbufferSurface(EGLDisplay dpy, EGLConfig config, const EGLint *attrib_l } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglDestroySurface(EGLDisplay dpy, EGLSurface surface) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -183,7 +210,7 @@ eglDestroySurface(EGLDisplay dpy, EGLSurface surface) } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglQuerySurface(EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint *value) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -191,7 +218,7 @@ eglQuerySurface(EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint *va } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglSurfaceAttrib(EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint value) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -199,7 +226,7 @@ eglSurfaceAttrib(EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint va } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglBindTexImage(EGLDisplay dpy, EGLSurface surface, EGLint buffer) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -207,7 +234,7 @@ eglBindTexImage(EGLDisplay dpy, EGLSurface surface, EGLint buffer) } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglReleaseTexImage(EGLDisplay dpy, EGLSurface surface, EGLint buffer) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -215,7 +242,7 @@ eglReleaseTexImage(EGLDisplay dpy, EGLSurface surface, EGLint buffer) } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglSwapInterval(EGLDisplay dpy, EGLint interval) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -223,7 +250,7 @@ eglSwapInterval(EGLDisplay dpy, EGLint interval) } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglSwapBuffers(EGLDisplay dpy, EGLSurface draw) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -231,7 +258,7 @@ eglSwapBuffers(EGLDisplay dpy, EGLSurface draw) } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglCopyBuffers(EGLDisplay dpy, EGLSurface surface, NativePixmapType target) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -239,7 +266,7 @@ eglCopyBuffers(EGLDisplay dpy, EGLSurface surface, NativePixmapType target) } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglWaitGL(void) { EGLDisplay dpy = eglGetCurrentDisplay(); @@ -252,7 +279,7 @@ eglWaitGL(void) } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglWaitNative(EGLint engine) { EGLDisplay dpy = eglGetCurrentDisplay(); @@ -265,40 +292,31 @@ eglWaitNative(EGLint engine) } -EGLDisplay APIENTRY +EGLDisplay EGLAPIENTRY eglGetCurrentDisplay(void) { _EGLDisplay *dpy = _eglGetCurrentDisplay(); - if (dpy) - return dpy->Handle; - else - return EGL_NO_DISPLAY; + return _eglGetDisplayHandle(dpy); } -EGLContext APIENTRY +EGLContext EGLAPIENTRY eglGetCurrentContext(void) { _EGLContext *ctx = _eglGetCurrentContext(); - if (ctx) - return ctx->Handle; - else - return EGL_NO_CONTEXT; + return _eglGetContextHandle(ctx); } -EGLSurface APIENTRY +EGLSurface EGLAPIENTRY eglGetCurrentSurface(EGLint readdraw) { _EGLSurface *s = _eglGetCurrentSurface(readdraw); - if (s) - return s->Handle; - else - return EGL_NO_SURFACE; + return _eglGetSurfaceHandle(s); } -EGLint APIENTRY +EGLint EGLAPIENTRY eglGetError(void) { _EGLThreadInfo *t = _eglGetCurrentThread(); @@ -308,7 +326,7 @@ eglGetError(void) } -void (* APIENTRY eglGetProcAddress(const char *procname))() +void (* EGLAPIENTRY eglGetProcAddress(const char *procname))() { typedef void (*genericFunc)(); struct name_function { @@ -376,12 +394,15 @@ void (* APIENTRY eglGetProcAddress(const char *procname))() return (genericFunc) egl_functions[i].function; } } -#if 0 - /* XXX enable this code someday */ - return (genericFunc) _glapi_get_proc_address(procname); -#else + + /* now loop over drivers to query their procs */ + for (i = 0; i < _eglGlobal.NumDrivers; i++) { + _EGLProc p = _eglGlobal.Drivers[i]->API.GetProcAddress(procname); + if (p) + return p; + } + return NULL; -#endif } @@ -389,7 +410,7 @@ void (* APIENTRY eglGetProcAddress(const char *procname))() * EGL_MESA_screen extension */ -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglChooseModeMESA(EGLDisplay dpy, EGLScreenMESA screen, const EGLint *attrib_list, EGLModeMESA *modes, EGLint modes_size, EGLint *num_modes) @@ -402,7 +423,7 @@ eglChooseModeMESA(EGLDisplay dpy, EGLScreenMESA screen, } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglGetModesMESA(EGLDisplay dpy, EGLScreenMESA screen, EGLModeMESA *modes, EGLint mode_size, EGLint *num_mode) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -413,7 +434,7 @@ eglGetModesMESA(EGLDisplay dpy, EGLScreenMESA screen, EGLModeMESA *modes, EGLint } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglGetModeAttribMESA(EGLDisplay dpy, EGLModeMESA mode, EGLint attribute, EGLint *value) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -424,7 +445,7 @@ eglGetModeAttribMESA(EGLDisplay dpy, EGLModeMESA mode, EGLint attribute, EGLint } -EGLBoolean APIENTRY +EGLBoolean EGLAPIENTRY eglCopyContextMESA(EGLDisplay dpy, EGLContext source, EGLContext dest, EGLint mask) { _EGLDriver *drv = _eglLookupDriver(dpy); @@ -508,21 +529,42 @@ eglQueryModeStringMESA(EGLDisplay dpy, EGLModeMESA mode) #ifdef EGL_VERSION_1_2 + +/** + * Specify the client API to use for subsequent calls including: + * eglCreateContext() + * eglGetCurrentContext() + * eglGetCurrentDisplay() + * eglGetCurrentSurface() + * eglMakeCurrent(when the ctx parameter is EGL NO CONTEXT) + * eglWaitClient() + * eglWaitNative() + * See section 3.7 "Rendering Context" in the EGL specification for details. + */ EGLBoolean eglBindAPI(EGLenum api) { _EGLThreadInfo *t = _eglGetCurrentThread(); switch (api) { +#ifdef EGL_VERSION_1_4 + case EGL_OPENGL_API: + if (_eglGlobal.ClientAPIsMask & EGL_OPENGL_BIT) { + t->CurrentAPI = api; + return EGL_TRUE; + } + _eglError(EGL_BAD_PARAMETER, "eglBindAPI"); + return EGL_FALSE; +#endif case EGL_OPENGL_ES_API: - if (_eglGlobal.OpenGLESAPISupported) { + if (_eglGlobal.ClientAPIsMask & (EGL_OPENGL_ES_BIT | EGL_OPENGL_ES2_BIT)) { t->CurrentAPI = api; return EGL_TRUE; } _eglError(EGL_BAD_PARAMETER, "eglBindAPI"); return EGL_FALSE; case EGL_OPENVG_API: - if (_eglGlobal.OpenVGAPISupported) { + if (_eglGlobal.ClientAPIsMask & EGL_OPENVG_BIT) { t->CurrentAPI = api; return EGL_TRUE; } @@ -535,6 +577,18 @@ eglBindAPI(EGLenum api) } +/** + * Return the last value set with eglBindAPI(). + */ +EGLenum +eglQueryAPI(void) +{ + /* returns one of EGL_OPENGL_API, EGL_OPENGL_ES_API or EGL_OPENVG_API */ + _EGLThreadInfo *t = _eglGetCurrentThread(); + return t->CurrentAPI; +} + + EGLSurface eglCreatePbufferFromClientBuffer(EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, EGLConfig config, @@ -546,15 +600,6 @@ eglCreatePbufferFromClientBuffer(EGLDisplay dpy, EGLenum buftype, } -EGLenum -eglQueryAPI(void) -{ - /* returns one of EGL_OPENGL_ES_API or EGL_OPENVG_API */ - _EGLThreadInfo *t = _eglGetCurrentThread(); - return t->CurrentAPI; -} - - EGLBoolean eglReleaseThread(void) { diff --git a/src/egl/main/eglapi.h b/src/egl/main/eglapi.h index 555aa5dd9e..f6163a0c7a 100644 --- a/src/egl/main/eglapi.h +++ b/src/egl/main/eglapi.h @@ -2,10 +2,15 @@ #define EGLAPI_INCLUDED /** - * Typedefs for all EGL API entrypoint functions. + * A generic function ptr type */ +typedef void (*_EGLProc)(); +/** + * Typedefs for all EGL API entrypoint functions. + */ + /* driver funcs */ typedef EGLBoolean (*Initialize_t)(_EGLDriver *, EGLDisplay dpy, EGLint *major, EGLint *minor); typedef EGLBoolean (*Terminate_t)(_EGLDriver *, EGLDisplay dpy); @@ -39,6 +44,9 @@ typedef const char *(*QueryString_t)(_EGLDriver *drv, EGLDisplay dpy, EGLint nam typedef EGLBoolean (*WaitGL_t)(_EGLDriver *drv, EGLDisplay dpy); typedef EGLBoolean (*WaitNative_t)(_EGLDriver *drv, EGLDisplay dpy, EGLint engine); +typedef _EGLProc (*GetProcAddress_t)(const char *procname); + + #ifdef EGL_MESA_screen_surface typedef EGLBoolean (*ChooseModeMESA_t)(_EGLDriver *drv, EGLDisplay dpy, EGLScreenMESA screen, const EGLint *attrib_list, EGLModeMESA *modes, EGLint modes_size, EGLint *num_modes); @@ -95,6 +103,7 @@ struct _egl_api QueryString_t QueryString; WaitGL_t WaitGL; WaitNative_t WaitNative; + GetProcAddress_t GetProcAddress; /* EGL_MESA_screen extension */ ChooseModeMESA_t ChooseModeMESA; diff --git a/src/egl/main/eglconfig.c b/src/egl/main/eglconfig.c index 2a62f95b54..b19988f49a 100644 --- a/src/egl/main/eglconfig.c +++ b/src/egl/main/eglconfig.c @@ -17,48 +17,6 @@ #define MIN2(A, B) (((A) < (B)) ? (A) : (B)) -/** - * Convert an _EGLConfig to a __GLcontextModes object. - * NOTE: This routine may be incomplete - we're only making sure that - * the fields needed by Mesa (for _mesa_create_context/framebuffer) are - * set correctly. - */ -void -_eglConfigToContextModesRec(const _EGLConfig *config, __GLcontextModes *mode) -{ - memset(mode, 0, sizeof(*mode)); - - mode->rgbMode = GL_TRUE; /* no color index */ - mode->colorIndexMode = GL_FALSE; - mode->doubleBufferMode = GL_TRUE; /* always DB for now */ - mode->stereoMode = GL_FALSE; - - mode->redBits = GET_CONFIG_ATTRIB(config, EGL_RED_SIZE); - mode->greenBits = GET_CONFIG_ATTRIB(config, EGL_GREEN_SIZE); - mode->blueBits = GET_CONFIG_ATTRIB(config, EGL_BLUE_SIZE); - mode->alphaBits = GET_CONFIG_ATTRIB(config, EGL_ALPHA_SIZE); - mode->rgbBits = GET_CONFIG_ATTRIB(config, EGL_BUFFER_SIZE); - - /* no rgba masks - fix? */ - - mode->depthBits = GET_CONFIG_ATTRIB(config, EGL_DEPTH_SIZE); - mode->haveDepthBuffer = mode->depthBits > 0; - - mode->stencilBits = GET_CONFIG_ATTRIB(config, EGL_STENCIL_SIZE); - mode->haveStencilBuffer = mode->stencilBits > 0; - - /* no accum */ - - mode->level = GET_CONFIG_ATTRIB(config, EGL_LEVEL); - mode->samples = GET_CONFIG_ATTRIB(config, EGL_SAMPLES); - mode->sampleBuffers = GET_CONFIG_ATTRIB(config, EGL_SAMPLE_BUFFERS); - - /* surface type - not really needed */ - mode->visualType = GLX_TRUE_COLOR; - mode->renderType = GLX_RGBA_BIT; -} - - void _eglSetConfigAttrib(_EGLConfig *config, EGLint attr, EGLint val) { @@ -76,7 +34,7 @@ void _eglInitConfig(_EGLConfig *config, EGLint id) { memset(config, 0, sizeof(*config)); - config->Handle = id; + config->Handle = (EGLConfig) id; _eglSetConfigAttrib(config, EGL_CONFIG_ID, id); _eglSetConfigAttrib(config, EGL_BIND_TO_TEXTURE_RGB, EGL_DONT_CARE); _eglSetConfigAttrib(config, EGL_BIND_TO_TEXTURE_RGBA, EGL_DONT_CARE); @@ -85,9 +43,7 @@ _eglInitConfig(_EGLConfig *config, EGLint id) _eglSetConfigAttrib(config, EGL_NATIVE_VISUAL_TYPE, EGL_DONT_CARE); _eglSetConfigAttrib(config, EGL_MIN_SWAP_INTERVAL, EGL_DONT_CARE); _eglSetConfigAttrib(config, EGL_MAX_SWAP_INTERVAL, EGL_DONT_CARE); - _eglSetConfigAttrib(config, EGL_SURFACE_TYPE, - EGL_SCREEN_BIT_MESA | EGL_PBUFFER_BIT | - EGL_PIXMAP_BIT | EGL_WINDOW_BIT); + _eglSetConfigAttrib(config, EGL_SURFACE_TYPE, EGL_WINDOW_BIT); _eglSetConfigAttrib(config, EGL_TRANSPARENT_TYPE, EGL_NONE); _eglSetConfigAttrib(config, EGL_TRANSPARENT_RED_VALUE, EGL_DONT_CARE); _eglSetConfigAttrib(config, EGL_TRANSPARENT_GREEN_VALUE, EGL_DONT_CARE); @@ -100,7 +56,19 @@ _eglInitConfig(_EGLConfig *config, EGLint id) /** + * Return the public handle for an internal _EGLConfig. + * This is the inverse of _eglLookupConfig(). + */ +EGLConfig +_eglGetConfigHandle(_EGLConfig *config) +{ + return config ? config->Handle : 0; +} + + +/** * Given an EGLConfig handle, return the corresponding _EGLConfig object. + * This is the inverse of _eglGetConfigHandle(). */ _EGLConfig * _eglLookupConfig(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config) @@ -108,8 +76,8 @@ _eglLookupConfig(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config) EGLint i; _EGLDisplay *disp = _eglLookupDisplay(dpy); for (i = 0; i < disp->NumConfigs; i++) { - if (disp->Configs[i].Handle == config) { - return disp->Configs + i; + if (disp->Configs[i]->Handle == config) { + return disp->Configs[i]; } } return NULL; @@ -118,23 +86,32 @@ _eglLookupConfig(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config) /** * Add the given _EGLConfig to the given display. + * Note that we just save the ptr to the config (we don't copy the config). */ _EGLConfig * -_eglAddConfig(_EGLDisplay *display, const _EGLConfig *config) +_eglAddConfig(_EGLDisplay *display, _EGLConfig *config) { - _EGLConfig *newConfigs; + _EGLConfig **newConfigs; EGLint n; + /* do some sanity checks on the config's attribs */ + assert(GET_CONFIG_ATTRIB(config, EGL_CONFIG_ID) > 0); + assert(GET_CONFIG_ATTRIB(config, EGL_RENDERABLE_TYPE) != 0x0); + assert(GET_CONFIG_ATTRIB(config, EGL_SURFACE_TYPE) != 0x0); + assert(GET_CONFIG_ATTRIB(config, EGL_RED_SIZE) > 0); + assert(GET_CONFIG_ATTRIB(config, EGL_GREEN_SIZE) > 0); + assert(GET_CONFIG_ATTRIB(config, EGL_BLUE_SIZE) > 0); + n = display->NumConfigs; - newConfigs = (_EGLConfig *) realloc(display->Configs, - (n + 1) * sizeof(_EGLConfig)); + /* realloc array of ptrs */ + newConfigs = (_EGLConfig **) realloc(display->Configs, + (n + 1) * sizeof(_EGLConfig *)); if (newConfigs) { display->Configs = newConfigs; - display->Configs[n] = *config; /* copy struct */ - display->Configs[n].Handle = n; + display->Configs[n] = config; display->NumConfigs++; - return display->Configs + n; + return config; } else { return NULL; @@ -176,7 +153,7 @@ _eglParseConfigAttribs(_EGLConfig *config, const EGLint *attrib_list) } else if (attr == EGL_RENDERABLE_TYPE) { EGLint renType = attrib_list[++i]; - if (renType & ~(EGL_OPENGL_ES_BIT | EGL_OPENVG_BIT)) { + if (renType & ~(EGL_OPENGL_ES_BIT | EGL_OPENGL_ES2_BIT | EGL_OPENVG_BIT)) { _eglError(EGL_BAD_ATTRIBUTE, "eglChooseConfig"); return EGL_FALSE; } @@ -360,8 +337,8 @@ _eglChooseConfig(_EGLDriver *drv, EGLDisplay dpy, const EGLint *attrib_list, /* make array of pointers to qualifying configs */ for (i = count = 0; i < disp->NumConfigs && count < config_size; i++) { - if (_eglConfigQualifies(disp->Configs + i, &criteria)) { - configList[count++] = disp->Configs + i; + if (_eglConfigQualifies(disp->Configs[i], &criteria)) { + configList[count++] = disp->Configs[i]; } } @@ -369,8 +346,10 @@ _eglChooseConfig(_EGLDriver *drv, EGLDisplay dpy, const EGLint *attrib_list, qsort(configList, count, sizeof(_EGLConfig *), _eglCompareConfigs); /* copy config handles to output array */ - for (i = 0; i < count; i++) { - configs[i] = configList[i]->Handle; + if (configs) { + for (i = 0; i < count; i++) { + configs[i] = configList[i]->Handle; + } } free(configList); @@ -419,7 +398,7 @@ _eglGetConfigs(_EGLDriver *drv, EGLDisplay dpy, EGLConfig *configs, EGLint i; *num_config = MIN2(disp->NumConfigs, config_size); for (i = 0; i < *num_config; i++) { - configs[i] = disp->Configs[i].Handle; + configs[i] = disp->Configs[i]->Handle; } } else { @@ -429,210 +408,3 @@ _eglGetConfigs(_EGLDriver *drv, EGLDisplay dpy, EGLConfig *configs, return EGL_TRUE; } - - -/** - * Creates a set of \c __GLcontextModes that a driver will expose. - * - * A set of \c __GLcontextModes will be created based on the supplied - * parameters. The number of modes processed will be 2 * - * \c num_depth_stencil_bits * \c num_db_modes. - * - * For the most part, data is just copied from \c depth_bits, \c stencil_bits, - * \c db_modes, and \c visType into each \c __GLcontextModes element. - * However, the meanings of \c fb_format and \c fb_type require further - * explanation. The \c fb_format specifies which color components are in - * each pixel and what the default order is. For example, \c GL_RGB specifies - * that red, green, blue are available and red is in the "most significant" - * position and blue is in the "least significant". The \c fb_type specifies - * the bit sizes of each component and the actual ordering. For example, if - * \c GL_UNSIGNED_SHORT_5_6_5_REV is specified with \c GL_RGB, bits [15:11] - * are the blue value, bits [10:5] are the green value, and bits [4:0] are - * the red value. - * - * One sublte issue is the combination of \c GL_RGB or \c GL_BGR and either - * of the \c GL_UNSIGNED_INT_8_8_8_8 modes. The resulting mask values in the - * \c __GLcontextModes structure is \b identical to the \c GL_RGBA or - * \c GL_BGRA case, except the \c alphaMask is zero. This means that, as - * far as this routine is concerned, \c GL_RGB with \c GL_UNSIGNED_INT_8_8_8_8 - * still uses 32-bits. - * - * If in doubt, look at the tables used in the function. - * - * \param ptr_to_modes Pointer to a pointer to a linked list of - * \c __GLcontextModes. Upon completion, a pointer to - * the next element to be process will be stored here. - * If the function fails and returns \c GL_FALSE, this - * value will be unmodified, but some elements in the - * linked list may be modified. - * \param fb_format Format of the framebuffer. Currently only \c GL_RGB, - * \c GL_RGBA, \c GL_BGR, and \c GL_BGRA are supported. - * \param fb_type Type of the pixels in the framebuffer. Currently only - * \c GL_UNSIGNED_SHORT_5_6_5, - * \c GL_UNSIGNED_SHORT_5_6_5_REV, - * \c GL_UNSIGNED_INT_8_8_8_8, and - * \c GL_UNSIGNED_INT_8_8_8_8_REV are supported. - * \param depth_bits Array of depth buffer sizes to be exposed. - * \param stencil_bits Array of stencil buffer sizes to be exposed. - * \param num_depth_stencil_bits Number of entries in both \c depth_bits and - * \c stencil_bits. - * \param db_modes Array of buffer swap modes. If an element has a - * value of \c GLX_NONE, then it represents a - * single-buffered mode. Other valid values are - * \c GLX_SWAP_EXCHANGE_OML, \c GLX_SWAP_COPY_OML, and - * \c GLX_SWAP_UNDEFINED_OML. See the - * GLX_OML_swap_method extension spec for more details. - * \param num_db_modes Number of entries in \c db_modes. - * \param visType GLX visual type. Usually either \c GLX_TRUE_COLOR or - * \c GLX_DIRECT_COLOR. - * - * \returns - * \c GL_TRUE on success or \c GL_FALSE on failure. Currently the only - * cause of failure is a bad parameter (i.e., unsupported \c fb_format or - * \c fb_type). - * - * \todo - * There is currently no way to support packed RGB modes (i.e., modes with - * exactly 3 bytes per pixel) or floating-point modes. This could probably - * be done by creating some new, private enums with clever names likes - * \c GL_UNSIGNED_3BYTE_8_8_8, \c GL_4FLOAT_32_32_32_32, - * \c GL_4HALF_16_16_16_16, etc. We can cross that bridge when we come to it. - */ -GLboolean -_eglFillInConfigs(_EGLConfig * configs, - GLenum fb_format, GLenum fb_type, - const uint8_t * depth_bits, const uint8_t * stencil_bits, - unsigned num_depth_stencil_bits, - const GLenum * db_modes, unsigned num_db_modes, - int visType) -{ - static const uint8_t bits_table[3][4] = { - /* R G B A */ - { 5, 6, 5, 0 }, /* Any GL_UNSIGNED_SHORT_5_6_5 */ - { 8, 8, 8, 0 }, /* Any RGB with any GL_UNSIGNED_INT_8_8_8_8 */ - { 8, 8, 8, 8 } /* Any RGBA with any GL_UNSIGNED_INT_8_8_8_8 */ - }; - - /* The following arrays are all indexed by the fb_type masked with 0x07. - * Given the four supported fb_type values, this results in valid array - * indices of 3, 4, 5, and 7. - */ - static const uint32_t masks_table_rgb[8][4] = { - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x0000F800, 0x000007E0, 0x0000001F, 0x00000000}, /* 5_6_5 */ - {0x0000001F, 0x000007E0, 0x0000F800, 0x00000000}, /* 5_6_5_REV */ - {0xFF000000, 0x00FF0000, 0x0000FF00, 0x00000000}, /* 8_8_8_8 */ - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x000000FF, 0x0000FF00, 0x00FF0000, 0x00000000} /* 8_8_8_8_REV */ - }; - - static const uint32_t masks_table_rgba[8][4] = { - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x0000F800, 0x000007E0, 0x0000001F, 0x00000000}, /* 5_6_5 */ - {0x0000001F, 0x000007E0, 0x0000F800, 0x00000000}, /* 5_6_5_REV */ - {0xFF000000, 0x00FF0000, 0x0000FF00, 0x000000FF}, /* 8_8_8_8 */ - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000}, /* 8_8_8_8_REV */ - }; - - static const uint32_t masks_table_bgr[8][4] = { - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x0000001F, 0x000007E0, 0x0000F800, 0x00000000}, /* 5_6_5 */ - {0x0000F800, 0x000007E0, 0x0000001F, 0x00000000}, /* 5_6_5_REV */ - {0x0000FF00, 0x00FF0000, 0xFF000000, 0x00000000}, /* 8_8_8_8 */ - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x00FF0000, 0x0000FF00, 0x000000FF, 0x00000000}, /* 8_8_8_8_REV */ - }; - - static const uint32_t masks_table_bgra[8][4] = { - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x0000001F, 0x000007E0, 0x0000F800, 0x00000000}, /* 5_6_5 */ - {0x0000F800, 0x000007E0, 0x0000001F, 0x00000000}, /* 5_6_5_REV */ - {0x0000FF00, 0x00FF0000, 0xFF000000, 0x000000FF}, /* 8_8_8_8 */ - {0x00000000, 0x00000000, 0x00000000, 0x00000000}, - {0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000}, /* 8_8_8_8_REV */ - }; - - static const uint8_t bytes_per_pixel[8] = { - 0, 0, 0, 2, 2, 4, 0, 4 - }; - - const uint8_t * bits; - const uint32_t * masks; - const int index = fb_type & 0x07; - _EGLConfig *config; - unsigned i; - unsigned j; - unsigned k; - - if ( bytes_per_pixel[index] == 0 ) { - _eglLog(_EGL_INFO, - "[%s:%u] Framebuffer type 0x%04x has 0 bytes per pixel.", - __FUNCTION__, __LINE__, fb_type); - return GL_FALSE; - } - - /* Valid types are GL_UNSIGNED_SHORT_5_6_5 and GL_UNSIGNED_INT_8_8_8_8 and - * the _REV versions. - * - * Valid formats are GL_RGBA, GL_RGB, and GL_BGRA. - */ - switch ( fb_format ) { - case GL_RGB: - bits = (bytes_per_pixel[index] == 2) ? bits_table[0] : bits_table[1]; - masks = masks_table_rgb[index]; - break; - - case GL_RGBA: - bits = (bytes_per_pixel[index] == 2) ? bits_table[0] : bits_table[2]; - masks = masks_table_rgba[index]; - break; - - case GL_BGR: - bits = (bytes_per_pixel[index] == 2) ? bits_table[0] : bits_table[1]; - masks = masks_table_bgr[index]; - break; - - case GL_BGRA: - bits = (bytes_per_pixel[index] == 2) ? bits_table[0] : bits_table[2]; - masks = masks_table_bgra[index]; - break; - - default: - _eglLog(_EGL_WARNING, - "[%s:%u] Framebuffer format 0x%04x is not GL_RGB, GL_RGBA, GL_BGR, or GL_BGRA.", - __FUNCTION__, __LINE__, fb_format); - return GL_FALSE; - } - - config = configs; - for (k = 0; k < num_depth_stencil_bits; k++) { - for (i = 0; i < num_db_modes; i++) { - for (j = 0; j < 2; j++) { - _eglSetConfigAttrib(config, EGL_RED_SIZE, bits[0]); - _eglSetConfigAttrib(config, EGL_GREEN_SIZE, bits[1]); - _eglSetConfigAttrib(config, EGL_BLUE_SIZE, bits[2]); - _eglSetConfigAttrib(config, EGL_ALPHA_SIZE, bits[3]); - _eglSetConfigAttrib(config, EGL_BUFFER_SIZE, - bits[0] + bits[1] + bits[2] + bits[3]); - - _eglSetConfigAttrib(config, EGL_STENCIL_SIZE, stencil_bits[k]); - _eglSetConfigAttrib(config, EGL_DEPTH_SIZE, depth_bits[i]); - - _eglSetConfigAttrib(config, EGL_SURFACE_TYPE, EGL_SCREEN_BIT_MESA | - EGL_PBUFFER_BIT | EGL_PIXMAP_BIT | EGL_WINDOW_BIT); - - config++; - } - } - } - return GL_TRUE; -} diff --git a/src/egl/main/eglconfig.h b/src/egl/main/eglconfig.h index 441b3f4d20..db1c4c10e0 100644 --- a/src/egl/main/eglconfig.h +++ b/src/egl/main/eglconfig.h @@ -3,10 +3,10 @@ #include "egltypedefs.h" -#include "GL/internal/glcore.h" +#include <GLES/gl.h> -#define MAX_ATTRIBS 100 +#define MAX_ATTRIBS 128 #define FIRST_ATTRIB EGL_BUFFER_SIZE @@ -17,7 +17,11 @@ struct _egl_config }; -#define SET_CONFIG_ATTRIB(CONF, ATTR, VAL) ((CONF)->Attrib[(ATTR) - FIRST_ATTRIB] = VAL) +#define SET_CONFIG_ATTRIB(CONF, ATTR, VAL) \ + assert((ATTR) - FIRST_ATTRIB < MAX_ATTRIBS); \ + ((CONF)->Attrib[(ATTR) - FIRST_ATTRIB] = VAL) + + #define GET_CONFIG_ATTRIB(CONF, ATTR) ((CONF)->Attrib[(ATTR) - FIRST_ATTRIB]) @@ -25,12 +29,16 @@ extern void _eglInitConfig(_EGLConfig *config, EGLint id); +extern EGLConfig +_eglGetConfigHandle(_EGLConfig *config); + + extern _EGLConfig * _eglLookupConfig(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config); extern _EGLConfig * -_eglAddConfig(_EGLDisplay *display, const _EGLConfig *config); +_eglAddConfig(_EGLDisplay *display, _EGLConfig *config); extern EGLBoolean @@ -52,16 +60,5 @@ _eglGetConfigs(_EGLDriver *drv, EGLDisplay dpy, EGLConfig *configs, EGLint confi extern void _eglSetConfigAttrib(_EGLConfig *config, EGLint attr, EGLint val); -extern GLboolean -_eglFillInConfigs( _EGLConfig *configs, - GLenum fb_format, GLenum fb_type, - const uint8_t * depth_bits, const uint8_t * stencil_bits, - unsigned num_depth_stencil_bits, - const GLenum * db_modes, unsigned num_db_modes, - int visType ); - -extern void -_eglConfigToContextModesRec(const _EGLConfig *config, __GLcontextModes *mode); - #endif /* EGLCONFIG_INCLUDED */ diff --git a/src/egl/main/eglconfigutil.c b/src/egl/main/eglconfigutil.c new file mode 100644 index 0000000000..7061df691b --- /dev/null +++ b/src/egl/main/eglconfigutil.c @@ -0,0 +1,258 @@ +/** + * Extra utility functions related to EGL configs. + */ + + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "eglconfigutil.h" +#include "egllog.h" + + +/** + * Convert an _EGLConfig to a __GLcontextModes object. + * NOTE: This routine may be incomplete - we're only making sure that + * the fields needed by Mesa (for _mesa_create_context/framebuffer) are + * set correctly. + */ +void +_eglConfigToContextModesRec(const _EGLConfig *config, __GLcontextModes *mode) +{ + memset(mode, 0, sizeof(*mode)); + + mode->rgbMode = GL_TRUE; /* no color index */ + mode->colorIndexMode = GL_FALSE; + mode->doubleBufferMode = GL_TRUE; /* always DB for now */ + mode->stereoMode = GL_FALSE; + + mode->redBits = GET_CONFIG_ATTRIB(config, EGL_RED_SIZE); + mode->greenBits = GET_CONFIG_ATTRIB(config, EGL_GREEN_SIZE); + mode->blueBits = GET_CONFIG_ATTRIB(config, EGL_BLUE_SIZE); + mode->alphaBits = GET_CONFIG_ATTRIB(config, EGL_ALPHA_SIZE); + mode->rgbBits = GET_CONFIG_ATTRIB(config, EGL_BUFFER_SIZE); + + /* no rgba masks - fix? */ + + mode->depthBits = GET_CONFIG_ATTRIB(config, EGL_DEPTH_SIZE); + mode->haveDepthBuffer = mode->depthBits > 0; + + mode->stencilBits = GET_CONFIG_ATTRIB(config, EGL_STENCIL_SIZE); + mode->haveStencilBuffer = mode->stencilBits > 0; + + /* no accum */ + + mode->level = GET_CONFIG_ATTRIB(config, EGL_LEVEL); + mode->samples = GET_CONFIG_ATTRIB(config, EGL_SAMPLES); + mode->sampleBuffers = GET_CONFIG_ATTRIB(config, EGL_SAMPLE_BUFFERS); + + /* surface type - not really needed */ + mode->visualType = GLX_TRUE_COLOR; + mode->renderType = GLX_RGBA_BIT; +} + + + +/** + * Creates a set of \c _EGLConfigs that a driver will expose. + * + * A set of \c __GLcontextModes will be created based on the supplied + * parameters. The number of modes processed will be 2 * + * \c num_depth_stencil_bits * \c num_db_modes. + * + * For the most part, data is just copied from \c depth_bits, \c stencil_bits, + * \c db_modes, and \c visType into each \c __GLcontextModes element. + * However, the meanings of \c fb_format and \c fb_type require further + * explanation. The \c fb_format specifies which color components are in + * each pixel and what the default order is. For example, \c GL_RGB specifies + * that red, green, blue are available and red is in the "most significant" + * position and blue is in the "least significant". The \c fb_type specifies + * the bit sizes of each component and the actual ordering. For example, if + * \c GL_UNSIGNED_SHORT_5_6_5_REV is specified with \c GL_RGB, bits [15:11] + * are the blue value, bits [10:5] are the green value, and bits [4:0] are + * the red value. + * + * One sublte issue is the combination of \c GL_RGB or \c GL_BGR and either + * of the \c GL_UNSIGNED_INT_8_8_8_8 modes. The resulting mask values in the + * \c __GLcontextModes structure is \b identical to the \c GL_RGBA or + * \c GL_BGRA case, except the \c alphaMask is zero. This means that, as + * far as this routine is concerned, \c GL_RGB with \c GL_UNSIGNED_INT_8_8_8_8 + * still uses 32-bits. + * + * If in doubt, look at the tables used in the function. + * + * \param configs the array of configs generated + * \param fb_format Format of the framebuffer. Currently only \c GL_RGB, + * \c GL_RGBA, \c GL_BGR, and \c GL_BGRA are supported. + * \param fb_type Type of the pixels in the framebuffer. Currently only + * \c GL_UNSIGNED_SHORT_5_6_5, + * \c GL_UNSIGNED_SHORT_5_6_5_REV, + * \c GL_UNSIGNED_INT_8_8_8_8, and + * \c GL_UNSIGNED_INT_8_8_8_8_REV are supported. + * \param depth_bits Array of depth buffer sizes to be exposed. + * \param stencil_bits Array of stencil buffer sizes to be exposed. + * \param num_depth_stencil_bits Number of entries in both \c depth_bits and + * \c stencil_bits. + * \param db_modes Array of buffer swap modes. If an element has a + * value of \c GLX_NONE, then it represents a + * single-buffered mode. Other valid values are + * \c GLX_SWAP_EXCHANGE_OML, \c GLX_SWAP_COPY_OML, and + * \c GLX_SWAP_UNDEFINED_OML. See the + * GLX_OML_swap_method extension spec for more details. + * \param num_db_modes Number of entries in \c db_modes. + * \param visType GLX visual type. Usually either \c GLX_TRUE_COLOR or + * \c GLX_DIRECT_COLOR. + * + * \returns + * \c GL_TRUE on success or \c GL_FALSE on failure. Currently the only + * cause of failure is a bad parameter (i.e., unsupported \c fb_format or + * \c fb_type). + * + * \todo + * There is currently no way to support packed RGB modes (i.e., modes with + * exactly 3 bytes per pixel) or floating-point modes. This could probably + * be done by creating some new, private enums with clever names likes + * \c GL_UNSIGNED_3BYTE_8_8_8, \c GL_4FLOAT_32_32_32_32, + * \c GL_4HALF_16_16_16_16, etc. We can cross that bridge when we come to it. + */ +EGLBoolean +_eglFillInConfigs(_EGLConfig * configs, + GLenum fb_format, GLenum fb_type, + const uint8_t * depth_bits, const uint8_t * stencil_bits, + unsigned num_depth_stencil_bits, + const GLenum * db_modes, unsigned num_db_modes, + int visType) +{ + static const uint8_t bits_table[3][4] = { + /* R G B A */ + { 5, 6, 5, 0 }, /* Any GL_UNSIGNED_SHORT_5_6_5 */ + { 8, 8, 8, 0 }, /* Any RGB with any GL_UNSIGNED_INT_8_8_8_8 */ + { 8, 8, 8, 8 } /* Any RGBA with any GL_UNSIGNED_INT_8_8_8_8 */ + }; + + /* The following arrays are all indexed by the fb_type masked with 0x07. + * Given the four supported fb_type values, this results in valid array + * indices of 3, 4, 5, and 7. + */ + static const uint32_t masks_table_rgb[8][4] = { + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x0000F800, 0x000007E0, 0x0000001F, 0x00000000}, /* 5_6_5 */ + {0x0000001F, 0x000007E0, 0x0000F800, 0x00000000}, /* 5_6_5_REV */ + {0xFF000000, 0x00FF0000, 0x0000FF00, 0x00000000}, /* 8_8_8_8 */ + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x000000FF, 0x0000FF00, 0x00FF0000, 0x00000000} /* 8_8_8_8_REV */ + }; + + static const uint32_t masks_table_rgba[8][4] = { + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x0000F800, 0x000007E0, 0x0000001F, 0x00000000}, /* 5_6_5 */ + {0x0000001F, 0x000007E0, 0x0000F800, 0x00000000}, /* 5_6_5_REV */ + {0xFF000000, 0x00FF0000, 0x0000FF00, 0x000000FF}, /* 8_8_8_8 */ + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x000000FF, 0x0000FF00, 0x00FF0000, 0xFF000000}, /* 8_8_8_8_REV */ + }; + + static const uint32_t masks_table_bgr[8][4] = { + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x0000001F, 0x000007E0, 0x0000F800, 0x00000000}, /* 5_6_5 */ + {0x0000F800, 0x000007E0, 0x0000001F, 0x00000000}, /* 5_6_5_REV */ + {0x0000FF00, 0x00FF0000, 0xFF000000, 0x00000000}, /* 8_8_8_8 */ + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x00FF0000, 0x0000FF00, 0x000000FF, 0x00000000}, /* 8_8_8_8_REV */ + }; + + static const uint32_t masks_table_bgra[8][4] = { + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x0000001F, 0x000007E0, 0x0000F800, 0x00000000}, /* 5_6_5 */ + {0x0000F800, 0x000007E0, 0x0000001F, 0x00000000}, /* 5_6_5_REV */ + {0x0000FF00, 0x00FF0000, 0xFF000000, 0x000000FF}, /* 8_8_8_8 */ + {0x00000000, 0x00000000, 0x00000000, 0x00000000}, + {0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000}, /* 8_8_8_8_REV */ + }; + + static const uint8_t bytes_per_pixel[8] = { + 0, 0, 0, 2, 2, 4, 0, 4 + }; + + const uint8_t * bits; + const uint32_t * masks; + const int index = fb_type & 0x07; + _EGLConfig *config; + unsigned i; + unsigned j; + unsigned k; + + if ( bytes_per_pixel[index] == 0 ) { + _eglLog(_EGL_INFO, + "[%s:%u] Framebuffer type 0x%04x has 0 bytes per pixel.", + __FUNCTION__, __LINE__, fb_type); + return GL_FALSE; + } + + /* Valid types are GL_UNSIGNED_SHORT_5_6_5 and GL_UNSIGNED_INT_8_8_8_8 and + * the _REV versions. + * + * Valid formats are GL_RGBA, GL_RGB, and GL_BGRA. + */ + switch ( fb_format ) { + case GL_RGB: + bits = (bytes_per_pixel[index] == 2) ? bits_table[0] : bits_table[1]; + masks = masks_table_rgb[index]; + break; + + case GL_RGBA: + bits = (bytes_per_pixel[index] == 2) ? bits_table[0] : bits_table[2]; + masks = masks_table_rgba[index]; + break; + +#if 0 + case GL_BGR: + bits = (bytes_per_pixel[index] == 2) ? bits_table[0] : bits_table[1]; + masks = masks_table_bgr[index]; + break; + + case GL_BGRA: + bits = (bytes_per_pixel[index] == 2) ? bits_table[0] : bits_table[2]; + masks = masks_table_bgra[index]; + break; +#endif + + default: + _eglLog(_EGL_WARNING, + "[%s:%u] Framebuffer format 0x%04x is not GL_RGB, GL_RGBA, GL_BGR, or GL_BGRA.", + __FUNCTION__, __LINE__, fb_format); + return GL_FALSE; + } + + config = configs; + for (k = 0; k < num_depth_stencil_bits; k++) { + for (i = 0; i < num_db_modes; i++) { + for (j = 0; j < 2; j++) { + _eglSetConfigAttrib(config, EGL_RED_SIZE, bits[0]); + _eglSetConfigAttrib(config, EGL_GREEN_SIZE, bits[1]); + _eglSetConfigAttrib(config, EGL_BLUE_SIZE, bits[2]); + _eglSetConfigAttrib(config, EGL_ALPHA_SIZE, bits[3]); + _eglSetConfigAttrib(config, EGL_BUFFER_SIZE, + bits[0] + bits[1] + bits[2] + bits[3]); + + _eglSetConfigAttrib(config, EGL_STENCIL_SIZE, stencil_bits[k]); + _eglSetConfigAttrib(config, EGL_DEPTH_SIZE, depth_bits[i]); + + _eglSetConfigAttrib(config, EGL_SURFACE_TYPE, EGL_SCREEN_BIT_MESA | + EGL_PBUFFER_BIT | EGL_PIXMAP_BIT | EGL_WINDOW_BIT); + + config++; + } + } + } + return GL_TRUE; +} + diff --git a/src/egl/main/eglconfigutil.h b/src/egl/main/eglconfigutil.h new file mode 100644 index 0000000000..c477b94737 --- /dev/null +++ b/src/egl/main/eglconfigutil.h @@ -0,0 +1,26 @@ + +#ifndef EGLCONFIGUTIL_INCLUDED +#define EGLCONFIGUTIL_INCLUDED + +#include "eglconfig.h" +#include "GL/internal/glcore.h" +#if (!defined(WIN32) && !defined(_WIN32_WCE)) +#include "stdint.h" +#endif + + +extern void +_eglConfigToContextModesRec(const _EGLConfig *config, __GLcontextModes *mode); + + +extern EGLBoolean +_eglFillInConfigs( _EGLConfig *configs, + EGLenum fb_format, EGLenum fb_type, + const uint8_t * depth_bits, const uint8_t * stencil_bits, + unsigned num_depth_stencil_bits, + const EGLenum * db_modes, unsigned num_db_modes, + int visType ); + + + +#endif /* EGLCONFIGUTIL_INCLUDED */ diff --git a/src/egl/main/eglcontext.c b/src/egl/main/eglcontext.c index 374c006dae..461679db09 100644 --- a/src/egl/main/eglcontext.c +++ b/src/egl/main/eglcontext.c @@ -6,12 +6,12 @@ #include "egldisplay.h" #include "egldriver.h" #include "eglglobals.h" -#include "eglhash.h" #include "eglsurface.h" /** - * Initialize the given _EGLContext object to defaults. + * Initialize the given _EGLContext object to defaults and/or the values + * in the attrib_list. */ EGLBoolean _eglInitContext(_EGLDriver *drv, EGLDisplay dpy, _EGLContext *ctx, @@ -20,42 +20,54 @@ _eglInitContext(_EGLDriver *drv, EGLDisplay dpy, _EGLContext *ctx, _EGLConfig *conf; _EGLDisplay *display = _eglLookupDisplay(dpy); EGLint i; + const EGLenum api = eglQueryAPI(); + + if (api == EGL_NONE) { + _eglError(EGL_BAD_MATCH, "eglCreateContext(no client API)"); + return EGL_FALSE; + } conf = _eglLookupConfig(drv, dpy, config); if (!conf) { - _eglError(EGL_BAD_CONFIG, "eglCreateContext"); + _eglError(EGL_BAD_CONFIG, "_eglInitContext"); return EGL_FALSE; } + memset(ctx, 0, sizeof(_EGLContext)); + + ctx->ClientVersion = 1; /* the default, per EGL spec */ + for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i++) { switch (attrib_list[i]) { - /* no attribs defined for now */ + case EGL_CONTEXT_CLIENT_VERSION: + i++; + ctx->ClientVersion = attrib_list[i]; + break; default: - _eglError(EGL_BAD_ATTRIBUTE, "eglCreateContext"); - return EGL_NO_CONTEXT; + _eglError(EGL_BAD_ATTRIBUTE, "_eglInitContext"); + return EGL_FALSE; } } - memset(ctx, 0, sizeof(_EGLContext)); ctx->Display = display; ctx->Config = conf; ctx->DrawSurface = EGL_NO_SURFACE; ctx->ReadSurface = EGL_NO_SURFACE; + ctx->ClientAPI = api; return EGL_TRUE; } -/* - * Assign an EGLContext handle to the _EGLContext object then put it into - * the hash table. +/** + * Save a new _EGLContext into the hash table. */ void _eglSaveContext(_EGLContext *ctx) { - assert(ctx); - ctx->Handle = _eglHashGenKey(_eglGlobal.Contexts); - _eglHashInsert(_eglGlobal.Contexts, ctx->Handle, ctx); + /* no-op. + * Public EGLContext handle and private _EGLContext are the same. + */ } @@ -65,19 +77,34 @@ _eglSaveContext(_EGLContext *ctx) void _eglRemoveContext(_EGLContext *ctx) { - _eglHashRemove(_eglGlobal.Contexts, ctx->Handle); + /* no-op. + * Public EGLContext handle and private _EGLContext are the same. + */ +} + + +/** + * Return the public handle for the given private context ptr. + * This is the inverse of _eglLookupContext(). + */ +EGLContext +_eglGetContextHandle(_EGLContext *ctx) +{ + /* just a cast! */ + return (EGLContext) ctx; } /** * Return the _EGLContext object that corresponds to the given * EGLContext handle. + * This is the inverse of _eglGetContextHandle(). */ _EGLContext * _eglLookupContext(EGLContext ctx) { - _EGLContext *c = (_EGLContext *) _eglHashLookup(_eglGlobal.Contexts, ctx); - return c; + /* just a cast since EGLContext is just a void ptr */ + return (_EGLContext *) ctx; } @@ -112,7 +139,7 @@ _eglCreateContext(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, } _eglSaveContext(context); - return context->Handle; + return (EGLContext) context; #endif return EGL_NO_CONTEXT; } @@ -126,7 +153,6 @@ _eglDestroyContext(_EGLDriver *drv, EGLDisplay dpy, EGLContext ctx) { _EGLContext *context = _eglLookupContext(ctx); if (context) { - _eglHashRemove(_eglGlobal.Contexts, ctx); if (context->IsBound) { context->DeletePending = EGL_TRUE; } @@ -163,8 +189,11 @@ _eglQueryContext(_EGLDriver *drv, EGLDisplay dpy, EGLContext ctx, #ifdef EGL_VERSION_1_2 case EGL_CONTEXT_CLIENT_TYPE: *value = c->ClientAPI; - return EGL_FALSE; + return EGL_TRUE; #endif /* EGL_VERSION_1_2 */ + case EGL_CONTEXT_CLIENT_VERSION: + *value = c->ClientVersion; + return EGL_TRUE; default: _eglError(EGL_BAD_ATTRIBUTE, "eglQueryContext"); return EGL_FALSE; @@ -239,7 +268,7 @@ _eglMakeCurrent(_EGLDriver *drv, EGLDisplay dpy, EGLSurface d, ctx = NULL; } /* really delete context now */ - drv->API.DestroyContext(drv, dpy, oldContext->Handle); + drv->API.DestroyContext(drv, dpy, _eglGetContextHandle(oldContext)); } } diff --git a/src/egl/main/eglcontext.h b/src/egl/main/eglcontext.h index 82bfde151f..34fee9c637 100644 --- a/src/egl/main/eglcontext.h +++ b/src/egl/main/eglcontext.h @@ -11,8 +11,6 @@ */ struct _egl_context { - EGLContext Handle; /* The public/opaque handle which names this object */ - _EGLDisplay *Display; /* who do I belong to? */ _EGLConfig *Config; @@ -22,9 +20,9 @@ struct _egl_context EGLBoolean IsBound; EGLBoolean DeletePending; -#ifdef EGL_VERSION_1_2 - EGLint ClientAPI; /* Either EGL_OPENGL_ES_API or EGL_OPENVG_API */ -#endif /* EGL_VERSION_1_2 */ + + EGLint ClientAPI; /**< EGL_OPENGL_ES_API, EGL_OPENGL_API, EGL_OPENVG_API */ + EGLint ClientVersion; /**< 1 = OpenGLES 1.x, 2 = OpenGLES 2.x */ }; @@ -41,6 +39,10 @@ extern void _eglRemoveContext(_EGLContext *ctx); +extern EGLContext +_eglGetContextHandle(_EGLContext *ctx); + + extern _EGLContext * _eglLookupContext(EGLContext ctx); diff --git a/src/egl/main/egldefines.h b/src/egl/main/egldefines.h new file mode 100644 index 0000000000..8fc2301b79 --- /dev/null +++ b/src/egl/main/egldefines.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +/** + * Internal EGL defines + */ + + +#ifndef EGLDEFINES_INCLUDED +#define EGLDEFINES_INCLUDED + + +#define _EGL_MAX_EXTENSIONS_LEN 1000 + +#define _EGL_VENDOR_STRING "Mesa Project" + + + +#endif /* EGLDEFINES_INCLUDED */ diff --git a/src/egl/main/egldisplay.c b/src/egl/main/egldisplay.c index 074a85bf26..47a2323eaf 100644 --- a/src/egl/main/egldisplay.c +++ b/src/egl/main/egldisplay.c @@ -1,52 +1,88 @@ + +/** + * Functions related to EGLDisplay. + */ + +#include <assert.h> #include <stdlib.h> #include <string.h> #include "eglcontext.h" #include "egldisplay.h" +#include "egldriver.h" #include "eglglobals.h" #include "eglhash.h" - - -static char * -my_strdup(const char *s) -{ - int l = strlen(s); - char *s2 = malloc(l + 1); - strcpy(s2, s); - return s2; -} +#include "eglstring.h" /** - * We're assuming that the NativeDisplayType parameter is actually - * a string. - * Return a new _EGLDisplay object for the given displayName + * Allocate a new _EGLDisplay object for the given nativeDisplay handle. + * We'll also try to determine the device driver name at this time. + * + * Note that nativeDisplay may be an X Display ptr, or a string. */ _EGLDisplay * -_eglNewDisplay(NativeDisplayType displayName) +_eglNewDisplay(NativeDisplayType nativeDisplay) { _EGLDisplay *dpy = (_EGLDisplay *) calloc(1, sizeof(_EGLDisplay)); if (dpy) { - dpy->Handle = _eglHashGenKey(_eglGlobal.Displays); - _eglHashInsert(_eglGlobal.Displays, dpy->Handle, dpy); - if (displayName) - dpy->Name = my_strdup(displayName); - else - dpy->Name = NULL; - dpy->Driver = NULL; /* this gets set later */ + EGLuint key = _eglHashGenKey(_eglGlobal.Displays); + + dpy->Handle = (EGLDisplay) key; + _eglHashInsert(_eglGlobal.Displays, key, dpy); + + dpy->NativeDisplay = nativeDisplay; +#if defined(_EGL_PLATFORM_X) + dpy->Xdpy = (Display *) nativeDisplay; +#endif + + dpy->DriverName = _eglChooseDriver(dpy); + if (!dpy->DriverName) { + free(dpy); + return NULL; + } } return dpy; } /** + * Return the public handle for an internal _EGLDisplay. + * This is the inverse of _eglLookupDisplay(). + */ +EGLDisplay +_eglGetDisplayHandle(_EGLDisplay *display) +{ + if (display) + return display->Handle; + else + return EGL_NO_DISPLAY; +} + + +/** * Return the _EGLDisplay object that corresponds to the given public/ * opaque display handle. + * This is the inverse of _eglGetDisplayHandle(). */ _EGLDisplay * _eglLookupDisplay(EGLDisplay dpy) { - _EGLDisplay *d = (_EGLDisplay *) _eglHashLookup(_eglGlobal.Displays, dpy); - return d; + EGLuint key = (EGLuint) dpy; + if (!_eglGlobal.Displays) + return NULL; + return (_EGLDisplay *) _eglHashLookup(_eglGlobal.Displays, key); +} + + +void +_eglSaveDisplay(_EGLDisplay *dpy) +{ + EGLuint key = _eglHashGenKey(_eglGlobal.Displays); + assert(dpy); + assert(!dpy->Handle); + dpy->Handle = (EGLDisplay) key; + assert(dpy->Handle); + _eglHashInsert(_eglGlobal.Displays, key, dpy); } @@ -61,11 +97,25 @@ _eglGetCurrentDisplay(void) } +/** + * Free all the data hanging of an _EGLDisplay object, but not + * the object itself. + */ void _eglCleanupDisplay(_EGLDisplay *disp) { - /* XXX incomplete */ + EGLint i; + + for (i = 0; i < disp->NumConfigs; i++) { + free(disp->Configs[i]); + } free(disp->Configs); - free(disp->Name); - /* driver deletes _EGLDisplay */ + disp->Configs = NULL; + + /* XXX incomplete */ + + free((void *) disp->DriverName); + disp->DriverName = NULL; + + /* driver deletes the _EGLDisplay object */ } diff --git a/src/egl/main/egldisplay.h b/src/egl/main/egldisplay.h index 1a03fdd4ad..ff623ee1c6 100644 --- a/src/egl/main/egldisplay.h +++ b/src/egl/main/egldisplay.h @@ -1,22 +1,31 @@ #ifndef EGLDISPLAY_INCLUDED #define EGLDISPLAY_INCLUDED +#ifdef _EGL_PLATFORM_X +#include <X11/Xlib.h> +#endif #include "egltypedefs.h" struct _egl_display { + EGLNativeDisplayType NativeDisplay; EGLDisplay Handle; - char *Name; + const char *DriverName; + const char *DriverArgs; _EGLDriver *Driver; EGLint NumScreens; _EGLScreen **Screens; /* array [NumScreens] */ EGLint NumConfigs; - _EGLConfig *Configs; /* array [NumConfigs] */ + _EGLConfig **Configs; /* array [NumConfigs] of ptr to _EGLConfig */ + +#ifdef _EGL_PLATFORM_X + Display *Xdpy; +#endif }; @@ -24,10 +33,18 @@ extern _EGLDisplay * _eglNewDisplay(NativeDisplayType displayName); +EGLDisplay +_eglGetDisplayHandle(_EGLDisplay *display); + + extern _EGLDisplay * _eglLookupDisplay(EGLDisplay dpy); +extern void +_eglSaveDisplay(_EGLDisplay *dpy); + + extern _EGLDisplay * _eglGetCurrentDisplay(void); diff --git a/src/egl/main/egldriver.c b/src/egl/main/egldriver.c index bda06dd827..43b1f51903 100644 --- a/src/egl/main/egldriver.c +++ b/src/egl/main/egldriver.c @@ -1,112 +1,277 @@ +/** + * Functions for choosing and opening/loading device drivers. + */ + + #include <assert.h> -#include <dlfcn.h> -#include <stdio.h> #include <string.h> +#include <stdio.h> +#include <stdlib.h> #include "eglconfig.h" #include "eglcontext.h" +#include "egldefines.h" #include "egldisplay.h" #include "egldriver.h" #include "eglglobals.h" #include "egllog.h" +#include "eglmisc.h" #include "eglmode.h" #include "eglscreen.h" +#include "eglstring.h" #include "eglsurface.h" +#if defined(_EGL_PLATFORM_X) +#include <dlfcn.h> +#include "eglx.h" +#elif defined(_EGL_PLATFORM_WINDOWS) +/* Use static linking on Windows for now */ +#define WINDOWS_STATIC_LINK +#endif + +/** + * Wrappers for dlopen/dlclose() + */ +#if defined(_EGL_PLATFORM_WINDOWS) +#ifdef WINDOWS_STATIC_LINK + static const char *DefaultDriverName = "Windows EGL Static Library"; +#else + /* XXX Need to decide how to do dynamic name lookup on Windows */ + static const char *DefaultDriverName = "TBD"; +#endif + static const char *SysFS = NULL; + typedef HMODULE lib_handle; + + static HMODULE + open_library(const char *filename) + { +#ifdef WINDOWS_STATIC_LINK + return 0; +#else + return LoadLibrary(filename); +#endif + } + + static void + close_library(HMODULE lib) + { +#ifdef WINDOWS_STATIC_LINK +#else + FreeLibrary(lib); +#endif + } + +#elif defined(_EGL_PLATFORM_X) + static const char *DefaultDriverName = ":0"; + static const char *SysFS = "/sys/class"; + + typedef void * lib_handle; + + static void * + open_library(const char *filename) + { + return dlopen(filename, RTLD_LAZY); + } + + static void + close_library(void *lib) + { + dlclose(lib); + } + +#endif + +/** + * Given a card number, use sysfs to determine the DRI driver name. + */ +const char * +_eglChooseDRMDriver(int card) +{ +#if 0 + return _eglstrdup("libEGLdri"); +#else + char path[2000], driverName[2000]; + FILE *f; + int length; + + snprintf(path, sizeof(path), "%s/drm/card%d/dri_library_name", SysFS, card); + + f = fopen(path, "r"); + if (!f) + return NULL; -const char *DefaultDriverName = "demodriver"; + fgets(driverName, sizeof(driverName), f); + fclose(f); + if ((length = strlen(driverName)) > 1) { + /* remove the trailing newline from sysfs */ + driverName[length - 1] = '\0'; + strncat(driverName, "_dri", sizeof(driverName)); + return _eglstrdup(driverName); + } + else { + return NULL; + } +#endif +} /** - * Choose and open/init the hardware driver for the given EGLDisplay. - * Previously, the EGLDisplay was created with _eglNewDisplay() where - * we recorded the user's NativeDisplayType parameter. + * XXX this function is totally subject change!!! + * + * + * Determine/return the name of the driver to use for the given _EGLDisplay. * - * Now we'll use the NativeDisplayType value. + * Try to be clever and determine if nativeDisplay is an Xlib Display + * ptr or a string (naming a driver or screen number, etc). * - * Currently, the native display value is treated as a string. * If the first character is ':' we interpret it as a screen or card index * number (i.e. ":0" or ":1", etc) * Else if the first character is '!' we interpret it as specific driver name * (i.e. "!r200" or "!i830". + * + * Whatever follows ':' is copied and put into dpy->DriverArgs. + * + * The caller may free() the returned string. */ -_EGLDriver * -_eglChooseDriver(EGLDisplay display) +const char * +_eglChooseDriver(_EGLDisplay *dpy) { - _EGLDisplay *dpy = _eglLookupDisplay(display); - _EGLDriver *drv; - const char *driverName = DefaultDriverName; - const char *name; + /* Under Windows, the NativeDisplay is an HDC handle, therefore */ + /* it can't be interpreted as a string or a pointer. */ +#if defined(_EGL_PLATFORM_WINDOWS) + const char *displayString = NULL; +#else + const char *displayString = (const char *) dpy->NativeDisplay; +#endif + const char *driverName = NULL; - assert(dpy); + (void) DefaultDriverName; - name = dpy->Name; - if (!name) { - /* use default */ +#if defined(_EGL_PLATFORM_X) + /* First, if the EGL_DRIVER env var is set, use that */ + driverName = getenv("EGL_DRIVER"); + if (driverName) + return _eglstrdup(driverName); +#endif + +#if 0 + if (!displayString) { + /* choose a default */ + displayString = DefaultDriverName; } - else if (name[0] == ':' && (name[1] >= '0' && name[1] <= '9') && !name[2]) { - /* XXX probe hardware here to determine which driver to open */ - driverName = "libEGLdri"; +#endif + /* extract default DriverArgs = whatever follows ':' */ + if (displayString && + (displayString[0] == '!' || + displayString[0] == ':')) { + const char *args = strchr(displayString, ':'); + if (args) + dpy->DriverArgs = _eglstrdup(args + 1); } - else if (name[0] == '!') { - /* use specified driver name */ - driverName = name + 1; + + /* determine driver name now */ + if (displayString && displayString[0] == ':' && + (displayString[1] >= '0' && displayString[1] <= '9') && + !displayString[2]) { + int card = atoi(displayString + 1); + driverName = _eglChooseDRMDriver(card); } - else { - /* Maybe display was returned by XOpenDisplay? */ - _eglLog(_EGL_FATAL, "eglChooseDriver() bad name"); + else if (displayString && displayString[0] == '!') { + /* use user-specified driver name */ + driverName = _eglstrdup(displayString + 1); + /* truncate driverName at ':' if present */ + { + char *args = strchr(driverName, ':'); + if (args) { + *args = 0; + } + } + } + else + { + /* NativeDisplay is not a string! */ +#if defined(_EGL_PLATFORM_X) + driverName = _xeglChooseDriver(dpy); +#else + driverName = DefaultDriverName; +#endif } - _eglLog(_EGL_INFO, "eglChooseDriver() choosing %s", driverName); - - drv = _eglOpenDriver(dpy, driverName); - dpy->Driver = drv; - - return drv; + return driverName; } /** * Open/load the named driver and call its bootstrap function: _eglMain(). + * By the time this function is called, the dpy->DriverName should have + * been determined. + * * \return new _EGLDriver object. */ _EGLDriver * -_eglOpenDriver(_EGLDisplay *dpy, const char *driverName) +_eglOpenDriver(_EGLDisplay *dpy, const char *driverName, const char *args) { _EGLDriver *drv; _EGLMain_t mainFunc; - void *lib; + lib_handle lib; char driverFilename[1000]; + assert(driverName); + +#if defined(_EGL_PLATFORM_WINDOWS) +/* Use static linking on Windows for now */ +#ifdef WINDOWS_STATIC_LINK + lib = 0; + mainFunc = (_EGLMain_t)_eglMain; +#else + /* XXX untested */ + sprintf(driverFilename, "%s.dll", driverName); + _eglLog(_EGL_DEBUG, "dlopen(%s)", driverFilename); + lib = open_library(driverFilename); + if (!lib) { + _eglLog(_EGL_WARNING, "Could not open %s", + driverFilename); + return NULL; + } + mainFunc = (_EGLMain_t) GetProcAddress(lib, "_eglMain"); +#endif +#elif defined(_EGL_PLATFORM_X) /* XXX also prepend a directory path??? */ sprintf(driverFilename, "%s.so", driverName); - _eglLog(_EGL_DEBUG, "dlopen(%s)", driverFilename); - lib = dlopen(driverFilename, RTLD_NOW); + lib = open_library(driverFilename); if (!lib) { _eglLog(_EGL_WARNING, "Could not open %s (%s)", driverFilename, dlerror()); return NULL; } - mainFunc = (_EGLMain_t) dlsym(lib, "_eglMain"); +#endif + if (!mainFunc) { _eglLog(_EGL_WARNING, "_eglMain not found in %s", driverFilename); - dlclose(lib); + close_library(lib); return NULL; } - drv = mainFunc(dpy); + drv = mainFunc(dpy, args); if (!drv) { - dlclose(lib); + close_library(lib); return NULL; } + /* with a recurvise open you want the inner most handle */ - if (!drv->LibHandle) + if (!drv->LibHandle) { drv->LibHandle = lib; - else - dlclose(lib); + } + else { + close_library(lib); + } + + /* update the global notion of supported APIs */ + _eglGlobal.ClientAPIsMask |= drv->ClientAPIsMask; + + _eglSaveDriver(drv); - drv->Display = dpy; return drv; } @@ -117,19 +282,31 @@ _eglCloseDriver(_EGLDriver *drv, EGLDisplay dpy) void *handle = drv->LibHandle; EGLBoolean b; - _eglLog(_EGL_INFO, "Closing driver"); + _eglLog(_EGL_DEBUG, "Closing %s", drv->Name); /* * XXX check for currently bound context/surfaces and delete them? */ b = drv->API.Terminate(drv, dpy); - dlclose(handle); + + close_library(handle); + return b; } /** + * Save the given driver pointer in the list of all known drivers. + */ +void +_eglSaveDriver(_EGLDriver *drv) +{ + _eglGlobal.Drivers[ _eglGlobal.NumDrivers++ ] = drv; +} + + +/** * Given a display handle, return the _EGLDriver for that display. */ _EGLDriver * @@ -201,72 +378,48 @@ _eglInitDriverFallbacks(_EGLDriver *drv) } + /** - * Examine the individual extension enable/disable flags and recompute - * the driver's Extensions string. + * Try to determine which EGL APIs (OpenGL, OpenGL ES, OpenVG, etc) + * are supported on the system by looking for standard library names. */ -static void -_eglUpdateExtensionsString(_EGLDriver *drv) -{ - drv->Extensions.String[0] = 0; - - if (drv->Extensions.MESA_screen_surface) - strcat(drv->Extensions.String, "EGL_MESA_screen_surface "); - if (drv->Extensions.MESA_copy_context) - strcat(drv->Extensions.String, "EGL_MESA_copy_context "); - assert(strlen(drv->Extensions.String) < MAX_EXTENSIONS_LEN); -} - - - -const char * -_eglQueryString(_EGLDriver *drv, EGLDisplay dpy, EGLint name) +EGLint +_eglFindAPIs(void) { - (void) drv; - (void) dpy; - switch (name) { - case EGL_VENDOR: - return "Mesa Project"; - case EGL_VERSION: - return "1.0"; - case EGL_EXTENSIONS: - _eglUpdateExtensionsString(drv); - return drv->Extensions.String; -#ifdef EGL_VERSION_1_2 - case EGL_CLIENT_APIS: - /* XXX need to initialize somewhere */ - return drv->ClientAPIs; + EGLint mask = 0x0; + lib_handle lib; +#if defined(_EGL_PLATFORM_WINDOWS) + /* XXX not sure about these names */ + const char *es1_libname = "libGLESv1_CM.dll"; + const char *es2_libname = "libGLESv2.dll"; + const char *gl_libname = "OpenGL32.dll"; + const char *vg_libname = "libOpenVG.dll"; +#elif defined(_EGL_PLATFORM_X) + const char *es1_libname = "libGLESv1_CM.so"; + const char *es2_libname = "libGLESv2.so"; + const char *gl_libname = "libGL.so"; + const char *vg_libname = "libOpenVG.so"; #endif - default: - _eglError(EGL_BAD_PARAMETER, "eglQueryString"); - return NULL; - } -} + if ((lib = open_library(es1_libname))) { + close_library(lib); + mask |= EGL_OPENGL_ES_BIT; + } -EGLBoolean -_eglWaitGL(_EGLDriver *drv, EGLDisplay dpy) -{ - /* just a placeholder */ - (void) drv; - (void) dpy; - return EGL_TRUE; -} + if ((lib = open_library(es2_libname))) { + close_library(lib); + mask |= EGL_OPENGL_ES2_BIT; + } + if ((lib = open_library(gl_libname))) { + close_library(lib); + mask |= EGL_OPENGL_BIT; + } -EGLBoolean -_eglWaitNative(_EGLDriver *drv, EGLDisplay dpy, EGLint engine) -{ - /* just a placeholder */ - (void) drv; - (void) dpy; - switch (engine) { - case EGL_CORE_NATIVE_ENGINE: - break; - default: - _eglError(EGL_BAD_PARAMETER, "eglWaitNative(engine)"); - return EGL_FALSE; + if ((lib = open_library(vg_libname))) { + close_library(lib); + mask |= EGL_OPENVG_BIT; } - return EGL_TRUE; + return mask; } diff --git a/src/egl/main/egldriver.h b/src/egl/main/egldriver.h index 88526e973d..4066c6ec1d 100644 --- a/src/egl/main/egldriver.h +++ b/src/egl/main/egldriver.h @@ -4,9 +4,7 @@ #include "egltypedefs.h" #include "eglapi.h" - -/* should probably use a dynamic-length string, but this will do */ -#define MAX_EXTENSIONS_LEN 1000 +#include "egldefines.h" /** @@ -17,7 +15,7 @@ struct _egl_extensions EGLBoolean MESA_screen_surface; EGLBoolean MESA_copy_context; - char String[MAX_EXTENSIONS_LEN]; + char String[_EGL_MAX_EXTENSIONS_LEN]; }; @@ -26,37 +24,48 @@ struct _egl_extensions */ struct _egl_driver { - EGLBoolean Initialized; /* set by driver after initialized */ + EGLBoolean Initialized; /**< set by driver after initialized */ + + void *LibHandle; /**< dlopen handle */ - void *LibHandle; /* dlopen handle */ + const char *Name; /**< name of this driver */ - _EGLDisplay *Display; + int APImajor, APIminor; /**< as returned by eglInitialize() */ + char Version[1000]; /**< initialized from APImajor/minor, Name */ - int ABIversion; - int APImajor, APIminor; /* returned through eglInitialize */ - const char *ClientAPIs; + /** Bitmask of supported APIs (EGL_xx_BIT) set by the driver during init */ + EGLint ClientAPIsMask; - _EGLAPI API; + _EGLAPI API; /**< EGL API dispatch table */ _EGLExtensions Extensions; + + int LargestPbuffer; }; -extern _EGLDriver *_eglMain(_EGLDisplay *dpy); +extern _EGLDriver *_eglMain(_EGLDisplay *dpy, const char *args); -extern _EGLDriver * -_eglChooseDriver(EGLDisplay dpy); +extern const char * +_eglChooseDRMDriver(int card); + +extern const char * +_eglChooseDriver(_EGLDisplay *dpy); extern _EGLDriver * -_eglOpenDriver(_EGLDisplay *dpy, const char *driverName); +_eglOpenDriver(_EGLDisplay *dpy, const char *driverName, const char *args); extern EGLBoolean _eglCloseDriver(_EGLDriver *drv, EGLDisplay dpy); +extern void +_eglSaveDriver(_EGLDriver *drv); + + extern _EGLDriver * _eglLookupDriver(EGLDisplay d); @@ -65,17 +74,8 @@ extern void _eglInitDriverFallbacks(_EGLDriver *drv); -extern const char * -_eglQueryString(_EGLDriver *drv, EGLDisplay dpy, EGLint name); - - -extern EGLBoolean -_eglWaitGL(_EGLDriver *drv, EGLDisplay dpy); - - -extern EGLBoolean -_eglWaitNative(_EGLDriver *drv, EGLDisplay dpy, EGLint engine); - +extern EGLint +_eglFindAPIs(void); #endif /* EGLDRIVER_INCLUDED */ diff --git a/src/egl/main/eglglobals.c b/src/egl/main/eglglobals.c index 608311d749..b770e55dbd 100644 --- a/src/egl/main/eglglobals.c +++ b/src/egl/main/eglglobals.c @@ -2,9 +2,10 @@ #include <stdlib.h> #include "eglglobals.h" - -struct _egl_global _eglGlobal = { .Initialized = EGL_FALSE }; - +struct _egl_global _eglGlobal = +{ + EGL_FALSE +}; /** * Init the fields in the _eglGlobal struct @@ -15,13 +16,11 @@ _eglInitGlobals(void) { if (!_eglGlobal.Initialized) { _eglGlobal.Displays = _eglNewHashTable(); - _eglGlobal.Contexts = _eglNewHashTable(); _eglGlobal.Surfaces = _eglNewHashTable(); _eglGlobal.FreeScreenHandle = 1; _eglGlobal.Initialized = EGL_TRUE; - _eglGlobal.OpenGLESAPISupported = EGL_TRUE; - _eglGlobal.OpenVGAPISupported = EGL_FALSE; + _eglGlobal.ClientAPIsMask = 0x0; /* XXX temporary */ _eglGlobal.ThreadInfo = _eglNewThreadInfo(); @@ -37,7 +36,6 @@ _eglDestroyGlobals(void) { /* XXX TODO walk over table entries, deleting each */ _eglDeleteHashTable(_eglGlobal.Displays); - _eglDeleteHashTable(_eglGlobal.Contexts); _eglDeleteHashTable(_eglGlobal.Surfaces); } @@ -52,7 +50,7 @@ _eglNewThreadInfo(void) if (t) { t->CurrentContext = EGL_NO_CONTEXT; t->LastError = EGL_SUCCESS; - t->CurrentAPI = EGL_NONE; + t->CurrentAPI = EGL_OPENGL_ES_API; /* default, per EGL spec */ } return t; } diff --git a/src/egl/main/eglglobals.h b/src/egl/main/eglglobals.h index c16baa2d6b..14d8ea487a 100644 --- a/src/egl/main/eglglobals.h +++ b/src/egl/main/eglglobals.h @@ -24,17 +24,20 @@ struct _egl_global EGLBoolean Initialized; _EGLHashtable *Displays; - _EGLHashtable *Contexts; _EGLHashtable *Surfaces; EGLScreenMESA FreeScreenHandle; - /* XXX these may be temporary */ - EGLBoolean OpenGLESAPISupported; - EGLBoolean OpenVGAPISupported; + /* bitmaks of supported APIs (supported by _some_ driver) */ + EGLint ClientAPIsMask; + + char ClientAPIs[1000]; /**< updated by eglQueryString */ /* XXX temporary - should be thread-specific data (TSD) */ _EGLThreadInfo *ThreadInfo; + + EGLint NumDrivers; + _EGLDriver *Drivers[10]; }; diff --git a/src/egl/main/egllog.c b/src/egl/main/egllog.c index dc1daaa996..1d7a0a388c 100644 --- a/src/egl/main/egllog.c +++ b/src/egl/main/egllog.c @@ -1,5 +1,7 @@ /** * Logging facility for debug/info messages. + * _EGL_FATAL messages are printed to stderr + * The EGL_LOG_LEVEL var controls the output of other warning/info/debug msgs. */ @@ -10,37 +12,41 @@ #include "egllog.h" #define MAXSTRING 1000 -#define FALLBACK_LOG_LEVEL _EGL_DEBUG -#define FALLBACK_LOG_LEVEL_STR "debug" +#define FALLBACK_LOG_LEVEL _EGL_WARNING +#define FALLBACK_LOG_LEVEL_STR "warning" static EGLint ReportingLevel = -1; static void -log_level_initialize (void) +log_level_initialize(void) { - char *log_env = getenv ("EGL_LOG_LEVEL"); +#if defined(_EGL_PLATFORM_X) + char *log_env = getenv("EGL_LOG_LEVEL"); +#else + char *log_env = NULL; +#endif if (log_env == NULL) { ReportingLevel = FALLBACK_LOG_LEVEL; } - else if (strcasecmp (log_env, "fatal") == 0) { + else if (strcasecmp(log_env, "fatal") == 0) { ReportingLevel = _EGL_FATAL; } - else if (strcasecmp (log_env, "warning") == 0) { + else if (strcasecmp(log_env, "warning") == 0) { ReportingLevel = _EGL_WARNING; } - else if (strcasecmp (log_env, "info") == 0) { + else if (strcasecmp(log_env, "info") == 0) { ReportingLevel = _EGL_INFO; } - else if (strcasecmp (log_env, "debug") == 0) { + else if (strcasecmp(log_env, "debug") == 0) { ReportingLevel = _EGL_DEBUG; } else { - fprintf (stderr, "Unrecognized EGL_LOG_LEVEL environment variable value. " - "Expected one of \"fatal\", \"warning\", \"info\", \"debug\". " - "Got \"%s\". Falling back to \"%s\".\n", - log_env, FALLBACK_LOG_LEVEL_STR); + fprintf(stderr, "Unrecognized EGL_LOG_LEVEL environment variable value. " + "Expected one of \"fatal\", \"warning\", \"info\", \"debug\". " + "Got \"%s\". Falling back to \"%s\".\n", + log_env, FALLBACK_LOG_LEVEL_STR); ReportingLevel = FALLBACK_LOG_LEVEL; } } @@ -59,7 +65,7 @@ _eglLog(EGLint level, const char *fmtStr, ...) static int log_level_initialized = 0; if (!log_level_initialized) { - log_level_initialize (); + log_level_initialize(); log_level_initialized = 1; } @@ -85,7 +91,7 @@ _eglLog(EGLint level, const char *fmtStr, ...) vsnprintf(msg, MAXSTRING, fmtStr, args); va_end(args); - fprintf(stderr, "EGL %s: %s\n", levelStr, msg); + fprintf(stderr, "libEGL %s: %s\n", levelStr, msg); if (level == _EGL_FATAL) { exit(1); /* or abort()? */ diff --git a/src/egl/main/eglmisc.c b/src/egl/main/eglmisc.c new file mode 100644 index 0000000000..b5bdc3ea4b --- /dev/null +++ b/src/egl/main/eglmisc.c @@ -0,0 +1,129 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Small/misc EGL functions + */ + + +#include <assert.h> +#include <string.h> +#include "eglglobals.h" +#include "eglmisc.h" + + +/** + * Examine the individual extension enable/disable flags and recompute + * the driver's Extensions string. + */ +static void +_eglUpdateExtensionsString(_EGLDriver *drv) +{ + drv->Extensions.String[0] = 0; + + if (drv->Extensions.MESA_screen_surface) + strcat(drv->Extensions.String, "EGL_MESA_screen_surface "); + if (drv->Extensions.MESA_copy_context) + strcat(drv->Extensions.String, "EGL_MESA_copy_context "); + assert(strlen(drv->Extensions.String) < _EGL_MAX_EXTENSIONS_LEN); +} + + +static void +_eglUpdateAPIsString(_EGLDriver *drv) +{ + _eglGlobal.ClientAPIs[0] = 0; + + if (_eglGlobal.ClientAPIsMask & EGL_OPENGL_BIT) + strcat(_eglGlobal.ClientAPIs, "OpenGL "); + + if (_eglGlobal.ClientAPIsMask & EGL_OPENGL_ES_BIT) + strcat(_eglGlobal.ClientAPIs, "OpenGL_ES "); + + if (_eglGlobal.ClientAPIsMask & EGL_OPENGL_ES2_BIT) + strcat(_eglGlobal.ClientAPIs, "OpenGL_ES2 "); + + if (_eglGlobal.ClientAPIsMask & EGL_OPENVG_BIT) + strcat(_eglGlobal.ClientAPIs, "OpenVG "); + + assert(strlen(_eglGlobal.ClientAPIs) < sizeof(_eglGlobal.ClientAPIs)); +} + + + +const char * +_eglQueryString(_EGLDriver *drv, EGLDisplay dpy, EGLint name) +{ + (void) drv; + (void) dpy; + switch (name) { + case EGL_VENDOR: + return _EGL_VENDOR_STRING; + case EGL_VERSION: + return drv->Version; + case EGL_EXTENSIONS: + _eglUpdateExtensionsString(drv); + return drv->Extensions.String; +#ifdef EGL_VERSION_1_2 + case EGL_CLIENT_APIS: + _eglUpdateAPIsString(drv); + return _eglGlobal.ClientAPIs; +#endif + default: + _eglError(EGL_BAD_PARAMETER, "eglQueryString"); + return NULL; + } +} + + +EGLBoolean +_eglWaitGL(_EGLDriver *drv, EGLDisplay dpy) +{ + /* just a placeholder */ + (void) drv; + (void) dpy; + return EGL_TRUE; +} + + +EGLBoolean +_eglWaitNative(_EGLDriver *drv, EGLDisplay dpy, EGLint engine) +{ + /* just a placeholder */ + (void) drv; + (void) dpy; + switch (engine) { + case EGL_CORE_NATIVE_ENGINE: + break; + default: + _eglError(EGL_BAD_PARAMETER, "eglWaitNative(engine)"); + return EGL_FALSE; + } + + return EGL_TRUE; +} diff --git a/src/egl/main/eglmisc.h b/src/egl/main/eglmisc.h new file mode 100644 index 0000000000..4e2a40ea99 --- /dev/null +++ b/src/egl/main/eglmisc.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef EGLMISC_INCLUDED +#define EGLMISC_INCLUDED + +#include "egldriver.h" + + +extern const char * +_eglQueryString(_EGLDriver *drv, EGLDisplay dpy, EGLint name); + + +extern EGLBoolean +_eglWaitGL(_EGLDriver *drv, EGLDisplay dpy); + + +extern EGLBoolean +_eglWaitNative(_EGLDriver *drv, EGLDisplay dpy, EGLint engine); + + +#endif /* EGLMISC_INCLUDED */ diff --git a/src/egl/main/eglmode.h b/src/egl/main/eglmode.h index e70da85759..52d4875676 100644 --- a/src/egl/main/eglmode.h +++ b/src/egl/main/eglmode.h @@ -4,6 +4,9 @@ #include "egltypedefs.h" +#define EGL_NO_MODE_MESA 0 + + /** * Data structure which corresponds to an EGLModeMESA. */ diff --git a/src/egl/main/eglstring.c b/src/egl/main/eglstring.c new file mode 100644 index 0000000000..ba7406158c --- /dev/null +++ b/src/egl/main/eglstring.c @@ -0,0 +1,24 @@ +/** + * String utils. + */ + +#include <stdlib.h> +#include <string.h> +#include "eglstring.h" + + +char * +_eglstrdup(const char *s) +{ + if (s) { + int l = strlen(s); + char *s2 = malloc(l + 1); + if (s2) + strcpy(s2, s); + return s2; + } + return NULL; +} + + + diff --git a/src/egl/main/eglstring.h b/src/egl/main/eglstring.h new file mode 100644 index 0000000000..10468636e8 --- /dev/null +++ b/src/egl/main/eglstring.h @@ -0,0 +1,9 @@ +#ifndef EGLSTRING_INCLUDED +#define EGLSTRING_INCLUDED + + +extern char * +_eglstrdup(const char *s); + + +#endif /* EGLSTRING_INCLUDED */ diff --git a/src/egl/main/eglsurface.c b/src/egl/main/eglsurface.c index 874f318e96..6905acac50 100644 --- a/src/egl/main/eglsurface.c +++ b/src/egl/main/eglsurface.c @@ -8,6 +8,7 @@ #include <string.h> #include "eglcontext.h" #include "eglconfig.h" +#include "egldriver.h" #include "eglglobals.h" #include "eglhash.h" #include "egllog.h" @@ -60,6 +61,12 @@ _eglInitSurface(_EGLDriver *drv, EGLDisplay dpy, return EGL_FALSE; } + if ((GET_CONFIG_ATTRIB(conf, EGL_SURFACE_TYPE) & type) == 0) { + /* The config can't be used to create a surface of this type */ + _eglError(EGL_BAD_CONFIG, func); + return EGL_FALSE; + } + /* * Parse attribute list. Different kinds of surfaces support different * attributes. @@ -175,7 +182,7 @@ _eglInitSurface(_EGLDriver *drv, EGLDisplay dpy, } } - if (width <= 0 || height <= 0) { + if (width < 0 || height < 0) { _eglError(EGL_BAD_ATTRIBUTE, func); return EGL_FALSE; } @@ -207,25 +214,47 @@ _eglInitSurface(_EGLDriver *drv, EGLDisplay dpy, void _eglSaveSurface(_EGLSurface *surf) { + EGLuint key = _eglHashGenKey(_eglGlobal.Surfaces); assert(surf); assert(!surf->Handle); - surf->Handle = _eglHashGenKey(_eglGlobal.Contexts); + surf->Handle = (EGLSurface) key; assert(surf->Handle); - _eglHashInsert(_eglGlobal.Surfaces, surf->Handle, surf); + _eglHashInsert(_eglGlobal.Surfaces, key, surf); } void _eglRemoveSurface(_EGLSurface *surf) { - _eglHashRemove(_eglGlobal.Surfaces, surf->Handle); + _eglHashRemove(_eglGlobal.Surfaces, (EGLuint) surf->Handle); +} + + + +/** + * Return the public handle for an internal _EGLSurface. + * This is the inverse of _eglLookupSurface(). + */ +EGLSurface +_eglGetSurfaceHandle(_EGLSurface *surface) +{ + if (surface) + return surface->Handle; + else + return EGL_NO_SURFACE; } +/** + * Return the private _EGLSurface which corresponds to a public EGLSurface + * handle. + * This is the inverse of _eglGetSurfaceHandle(). + */ _EGLSurface * _eglLookupSurface(EGLSurface surf) { - _EGLSurface *c = (_EGLSurface *) _eglHashLookup(_eglGlobal.Surfaces, surf); + _EGLSurface *c = (_EGLSurface *) _eglHashLookup(_eglGlobal.Surfaces, + (EGLuint) surf); return c; } @@ -254,12 +283,7 @@ _eglSwapBuffers(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw) /* Basically just do error checking here. Drivers have to do the * actual buffer swap. */ - _EGLContext *context = _eglGetCurrentContext(); _EGLSurface *surface = _eglLookupSurface(draw); - if (context && context->DrawSurface != surface) { - _eglError(EGL_BAD_SURFACE, "eglSwapBuffers"); - return EGL_FALSE; - } if (surface == NULL) { _eglError(EGL_BAD_SURFACE, "eglSwapBuffers"); return EGL_FALSE; @@ -297,7 +321,9 @@ _eglQuerySurface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surf, case EGL_CONFIG_ID: *value = GET_CONFIG_ATTRIB(surface->Config, EGL_CONFIG_ID); return EGL_TRUE; - /*XXX case EGL_LARGEST_PBUFFER:*/ + case EGL_LARGEST_PBUFFER: + *value = drv->LargestPbuffer; + return EGL_TRUE; case EGL_SURFACE_TYPE: *value = surface->Type; return EGL_TRUE; @@ -439,7 +465,7 @@ _eglDestroySurface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface) { _EGLSurface *surf = _eglLookupSurface(surface); if (surf) { - _eglHashRemove(_eglGlobal.Surfaces, surface); + _eglHashRemove(_eglGlobal.Surfaces, (EGLuint) surface); if (surf->IsBound) { surf->DeletePending = EGL_TRUE; } @@ -459,7 +485,8 @@ _eglDestroySurface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface) * Default fallback routine - drivers might override this. */ EGLBoolean -_eglSurfaceAttrib(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surf, EGLint attribute, EGLint value) +_eglSurfaceAttrib(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surf, + EGLint attribute, EGLint value) { _EGLSurface *surface = _eglLookupSurface(surf); @@ -481,18 +508,67 @@ _eglSurfaceAttrib(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surf, EGLint attri EGLBoolean -_eglBindTexImage(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface, EGLint buffer) +_eglBindTexImage(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surf, + EGLint buffer) { - /* XXX unfinished */ - return EGL_FALSE; + /* Just do basic error checking and return success/fail. + * Drivers must implement the real stuff. + */ + _EGLSurface *surface = _eglLookupSurface(surf); + + if (!surface || surface->Type != EGL_PBUFFER_BIT) { + _eglError(EGL_BAD_SURFACE, "eglBindTexImage"); + return EGL_FALSE; + } + + if (surface->TextureFormat == EGL_NO_TEXTURE) { + _eglError(EGL_BAD_MATCH, "eglBindTexImage"); + return EGL_FALSE; + } + + if (buffer != EGL_BACK_BUFFER) { + _eglError(EGL_BAD_PARAMETER, "eglBindTexImage"); + return EGL_FALSE; + } + + surface->BoundToTexture = EGL_TRUE; + + return EGL_TRUE; } EGLBoolean -_eglReleaseTexImage(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface, EGLint buffer) +_eglReleaseTexImage(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surf, + EGLint buffer) { - /* XXX unfinished */ - return EGL_FALSE; + /* Just do basic error checking and return success/fail. + * Drivers must implement the real stuff. + */ + _EGLSurface *surface = _eglLookupSurface(surf); + + if (!surface || surface->Type != EGL_PBUFFER_BIT) { + _eglError(EGL_BAD_SURFACE, "eglBindTexImage"); + return EGL_FALSE; + } + + if (surface->TextureFormat == EGL_NO_TEXTURE) { + _eglError(EGL_BAD_MATCH, "eglBindTexImage"); + return EGL_FALSE; + } + + if (buffer != EGL_BACK_BUFFER) { + _eglError(EGL_BAD_PARAMETER, "eglReleaseTexImage"); + return EGL_FALSE; + } + + if (!surface->BoundToTexture) { + _eglError(EGL_BAD_SURFACE, "eglReleaseTexImage"); + return EGL_FALSE; + } + + surface->BoundToTexture = EGL_FALSE; + + return EGL_TRUE; } diff --git a/src/egl/main/eglsurface.h b/src/egl/main/eglsurface.h index 79abeca0b2..50f965b5cb 100644 --- a/src/egl/main/eglsurface.h +++ b/src/egl/main/eglsurface.h @@ -16,6 +16,7 @@ struct _egl_surface /* May need reference counting here */ EGLBoolean IsBound; EGLBoolean DeletePending; + EGLBoolean BoundToTexture; EGLint Type; /* one of EGL_WINDOW_BIT, EGL_PIXMAP_BIT or EGL_PBUFFER_BIT */ EGLint Width, Height; @@ -51,6 +52,10 @@ extern void _eglRemoveSurface(_EGLSurface *surf); +extern EGLSurface +_eglGetSurfaceHandle(_EGLSurface *surface); + + extern _EGLSurface * _eglLookupSurface(EGLSurface surf); diff --git a/src/egl/main/egltypedefs.h b/src/egl/main/egltypedefs.h index fa8cb496f8..9fbc55352c 100644 --- a/src/egl/main/egltypedefs.h +++ b/src/egl/main/egltypedefs.h @@ -1,8 +1,10 @@ #ifndef EGLTYPEDEFS_INCLUDED #define EGLTYPEDEFS_INCLUDED +#define EGL_EGLEXT_PROTOTYPES -#include <GLES/egl.h> +#include <EGL/egl.h> +#include <EGL/eglext.h> typedef struct _egl_api _EGLAPI; @@ -26,9 +28,7 @@ typedef struct _egl_surface _EGLSurface; typedef struct _egl_thread_info _EGLThreadInfo; -typedef void (*_EGLProc)(); - -typedef _EGLDriver *(*_EGLMain_t)(_EGLDisplay *dpy); +typedef _EGLDriver *(*_EGLMain_t)(_EGLDisplay *dpy, const char *args); #endif /* EGLTYPEDEFS_INCLUDED */ diff --git a/src/egl/main/eglx.c b/src/egl/main/eglx.c new file mode 100644 index 0000000000..50acc3a24f --- /dev/null +++ b/src/egl/main/eglx.c @@ -0,0 +1,100 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * X-specific EGL code. + * + * Any glue code needed to make EGL work with X is placed in this file. + */ + + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <X11/Xlib.h> + +#include "egldriver.h" +#include "egllog.h" +#include "eglstring.h" +#include "eglx.h" + + +static const char *DefaultGLXDriver = "egl_glx"; +static const char *DefaultSoftDriver = "egl_softpipe"; + + +/** + * Given an X Display ptr (at dpy->Xdpy) try to determine the appropriate + * device driver. Return its name. + * + * This boils down to whether to use the egl_glx.so driver which will + * load a DRI driver or the egl_softpipe.so driver that'll do software + * rendering on Xlib. + */ +const char * +_xeglChooseDriver(_EGLDisplay *dpy) +{ +#ifdef _EGL_PLATFORM_X + _XPrivDisplay xdpy; + int screen; + const char *driverName; + + assert(dpy); + + if (!dpy->Xdpy) { + dpy->Xdpy = XOpenDisplay(NULL); + if (!dpy->Xdpy) { + /* can't open X display -> can't use X-based driver */ + return NULL; + } + } + xdpy = (_XPrivDisplay) dpy->Xdpy; + + assert(dpy->Xdpy); + + screen = DefaultScreen(dpy->Xdpy); + + /* See if we can choose a DRI/DRM driver */ + driverName = _eglChooseDRMDriver(screen); + if (driverName) { + free((void *) driverName); + driverName = _eglstrdup(DefaultGLXDriver); + } + else { + driverName = _eglstrdup(DefaultSoftDriver); + } + + _eglLog(_EGL_DEBUG, "_xeglChooseDriver: %s", driverName); + + return driverName; +#else + return NULL; +#endif +} + + diff --git a/src/egl/main/eglx.h b/src/egl/main/eglx.h new file mode 100644 index 0000000000..4323d55838 --- /dev/null +++ b/src/egl/main/eglx.h @@ -0,0 +1,12 @@ +#ifndef EGLX_INCLUDED +#define EGLX_INCLUDED + + +#include "egldisplay.h" + + +extern const char * +_xeglChooseDriver(_EGLDisplay *dpy); + + +#endif /* EGLX_INCLUDED */ diff --git a/src/gallium/Makefile b/src/gallium/Makefile new file mode 100644 index 0000000000..36bd3623e7 --- /dev/null +++ b/src/gallium/Makefile @@ -0,0 +1,26 @@ +TOP = ../.. +include $(TOP)/configs/current + + +SUBDIRS = auxiliary drivers +# Note winsys/ needs to be built after src/mesa + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` + rm -f `find . -name depend` + + +# Dummy install target +install: diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template new file mode 100644 index 0000000000..4e462b5c97 --- /dev/null +++ b/src/gallium/Makefile.template @@ -0,0 +1,64 @@ +# -*-makefile-*- + + +# We still have a dependency on the "dri" buffer manager. Most likely +# the interface can be reused in non-dri environments, and also as a +# frontend to simpler memory managers. +# +COMMON_SOURCES = + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(CPP_SOURCES:.cpp=.o) \ + $(ASM_SOURCES:.S=.o) + + +### Include directories +INCLUDES = \ + -I. \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/include \ + $(DRIVER_INCLUDES) + + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + + +##### TARGETS ##### + +default: depend symlinks $(LIBNAME) + + +$(LIBNAME): $(OBJECTS) Makefile $(TOP)/src/gallium/Makefile.template + $(TOP)/bin/mklib -o $@ -static $(OBJECTS) $(DRIVER_LIBS) + + +depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \ + $(ASM_SOURCES) 2> /dev/null + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + + +# Remove .o and backup files +clean:: + -rm -f *.o */*.o *~ *.so *~ server/*.o $(SYMLINKS) + -rm -f depend depend.bak + + +include depend diff --git a/src/gallium/README.portability b/src/gallium/README.portability new file mode 100644 index 0000000000..adecf4bb79 --- /dev/null +++ b/src/gallium/README.portability @@ -0,0 +1,109 @@ + CROSS-PLATFORM PORTABILITY GUIDELINES FOR GALLIUM3D + + += General Considerations = + +The state tracker and winsys driver support a rather limited number of +platforms. However, the pipe drivers are meant to run in a wide number of +platforms. Hence the pipe drivers, the auxiliary modules, and all public +headers in general, should strictly follow these guidelines to ensure + + += Compiler Support = + +* Include the p_compiler.h. + +* Don't use the 'inline' keyword, use the INLINE macro in p_compiler.h instead. + +* Cast explicitly when converting to integer types of smaller sizes. + +* Cast explicitly when converting between float, double and integral types. + +* Don't use named struct initializers. + +* Don't use variable number of macro arguments. Use static inline functions +instead. + +* Don't use C99 features. + += Standard Library = + +* Avoid including standard library headers. Most standard library functions are +not available in Windows Kernel Mode. Use the appropriate p_*.h include. + +== Memory Allocation == + +* Use MALLOC, CALLOC, FREE instead of the malloc, calloc, free functions. + +* Use align_pointer() function defined in u_memory.h for aligning pointers + in a portable way. + +== Debugging == + +* Use the functions/macros in p_debug.h. + +* Don't include assert.h, call abort, printf, etc. + + += Code Style = + +== Inherantice in C == + +The main thing we do is mimic inheritance by structure containment. + +Here's a silly made-up example: + +/* base class */ +struct buffer +{ + int size; + void (*validate)(struct buffer *buf); +}; + +/* sub-class of bufffer */ +struct texture_buffer +{ + struct buffer base; /* the base class, MUST COME FIRST! */ + int format; + int width, height; +}; + + +Then, we'll typically have cast-wrapper functions to convert base-class +pointers to sub-class pointers where needed: + +static inline struct vertex_buffer *vertex_buffer(struct buffer *buf) +{ + return (struct vertex_buffer *) buf; +} + + +To create/init a sub-classed object: + +struct buffer *create_texture_buffer(int w, int h, int format) +{ + struct texture_buffer *t = malloc(sizeof(*t)); + t->format = format; + t->width = w; + t->height = h; + t->base.size = w * h; + t->base.validate = tex_validate; + return &t->base; +} + +Example sub-class method: + +void tex_validate(struct buffer *buf) +{ + struct texture_buffer *tb = texture_buffer(buf); + assert(tb->format); + assert(tb->width); + assert(tb->height); +} + + +Note that we typically do not use typedefs to make "class names"; we use +'struct whatever' everywhere. + +Gallium's pipe_context and the subclassed psb_context, etc are prime examples +of this. There's also many examples in Mesa and the Mesa state tracker. diff --git a/src/gallium/SConscript b/src/gallium/SConscript new file mode 100644 index 0000000000..6a3e7e77ed --- /dev/null +++ b/src/gallium/SConscript @@ -0,0 +1,29 @@ +import os + +Import('*') + +env = env.Clone() + +auxiliaries = [] + +Export('auxiliaries') + + +if llvm: + SConscript(['auxiliary/gallivm/SConscript']) + +SConscript([ + # NOTE: order matters! + 'auxiliary/util/SConscript', + 'auxiliary/rtasm/SConscript', + 'auxiliary/tgsi/SConscript', + 'auxiliary/cso_cache/SConscript', + 'auxiliary/translate/SConscript', + 'auxiliary/draw/SConscript', + 'auxiliary/pipebuffer/SConscript', +]) + +for driver in env['drivers']: + SConscript(os.path.join('drivers', driver, 'SConscript')) + +SConscript('state_trackers/python/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile new file mode 100644 index 0000000000..eaa0f2fe4e --- /dev/null +++ b/src/gallium/auxiliary/Makefile @@ -0,0 +1,20 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +SUBDIRS = $(GALLIUM_AUXILIARY_DIRS) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile new file mode 100644 index 0000000000..6bd6602088 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/Makefile @@ -0,0 +1,14 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = cso_cache + +C_SOURCES = \ + cso_context.c \ + cso_cache.c \ + cso_hash.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript new file mode 100644 index 0000000000..651e68a191 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/SConscript @@ -0,0 +1,11 @@ +Import('*') + +cso_cache = env.ConvenienceLibrary( + target = 'cso_cache', + source = [ + 'cso_context.c', + 'cso_cache.c', + 'cso_hash.c', + ]) + +auxiliaries.insert(0, cso_cache) diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c new file mode 100644 index 0000000000..6b1754ea00 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -0,0 +1,406 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Zack Rusin <zack@tungstengraphics.com> + */ + +#include "pipe/p_debug.h" + +#include "util/u_memory.h" + +#include "cso_cache.h" +#include "cso_hash.h" + + +struct cso_cache { + struct cso_hash *blend_hash; + struct cso_hash *depth_stencil_hash; + struct cso_hash *fs_hash; + struct cso_hash *vs_hash; + struct cso_hash *rasterizer_hash; + struct cso_hash *sampler_hash; + int max_size; + + cso_sanitize_callback sanitize_cb; + void *sanitize_data; +}; + +#if 1 +static unsigned hash_key(const void *key, unsigned key_size) +{ + unsigned *ikey = (unsigned *)key; + unsigned hash = 0, i; + + assert(key_size % 4 == 0); + + /* I'm sure this can be improved on: + */ + for (i = 0; i < key_size/4; i++) + hash ^= ikey[i]; + + return hash; +} +#else +static unsigned hash_key(const unsigned char *p, int n) +{ + unsigned h = 0; + unsigned g; + + while (n--) { + h = (h << 4) + *p++; + if ((g = (h & 0xf0000000)) != 0) + h ^= g >> 23; + h &= ~g; + } + return h; +} +#endif + +unsigned cso_construct_key(void *item, int item_size) +{ + return hash_key((item), item_size); +} + +static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type) +{ + struct cso_hash *hash = 0; + + switch(type) { + case CSO_BLEND: + hash = sc->blend_hash; + break; + case CSO_SAMPLER: + hash = sc->sampler_hash; + break; + case CSO_DEPTH_STENCIL_ALPHA: + hash = sc->depth_stencil_hash; + break; + case CSO_RASTERIZER: + hash = sc->rasterizer_hash; + break; + case CSO_FRAGMENT_SHADER: + hash = sc->fs_hash; + break; + case CSO_VERTEX_SHADER: + hash = sc->vs_hash; + break; + } + + return hash; +} + +static int _cso_size_for_type(enum cso_cache_type type) +{ + switch(type) { + case CSO_BLEND: + return sizeof(struct pipe_blend_state); + case CSO_SAMPLER: + return sizeof(struct pipe_sampler_state); + case CSO_DEPTH_STENCIL_ALPHA: + return sizeof(struct pipe_depth_stencil_alpha_state); + case CSO_RASTERIZER: + return sizeof(struct pipe_rasterizer_state); + case CSO_FRAGMENT_SHADER: + return sizeof(struct pipe_shader_state); + case CSO_VERTEX_SHADER: + return sizeof(struct pipe_shader_state); + } + return 0; +} + + +static void delete_blend_state(void *state, void *data) +{ + struct cso_blend *cso = (struct cso_blend *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static void delete_depth_stencil_state(void *state, void *data) +{ + struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static void delete_sampler_state(void *state, void *data) +{ + struct cso_sampler *cso = (struct cso_sampler *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static void delete_rasterizer_state(void *state, void *data) +{ + struct cso_rasterizer *cso = (struct cso_rasterizer *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static void delete_fs_state(void *state, void *data) +{ + struct cso_fragment_shader *cso = (struct cso_fragment_shader *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + +static void delete_vs_state(void *state, void *data) +{ + struct cso_vertex_shader *cso = (struct cso_vertex_shader *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); +} + + +static INLINE void delete_cso(void *state, enum cso_cache_type type) +{ + switch (type) { + case CSO_BLEND: + delete_blend_state(state, 0); + break; + case CSO_SAMPLER: + delete_sampler_state(state, 0); + break; + case CSO_DEPTH_STENCIL_ALPHA: + delete_depth_stencil_state(state, 0); + break; + case CSO_RASTERIZER: + delete_rasterizer_state(state, 0); + break; + case CSO_FRAGMENT_SHADER: + delete_fs_state(state, 0); + break; + case CSO_VERTEX_SHADER: + delete_vs_state(state, 0); + break; + default: + assert(0); + FREE(state); + } +} + + +static INLINE void sanitize_hash(struct cso_cache *sc, + struct cso_hash *hash, + enum cso_cache_type type, + int max_size) +{ + if (sc->sanitize_cb) + sc->sanitize_cb(hash, type, max_size, sc->sanitize_data); +} + + +static INLINE void sanitize_cb(struct cso_hash *hash, enum cso_cache_type type, + int max_size, void *user_data) +{ + /* if we're approach the maximum size, remove fourth of the entries + * otherwise every subsequent call will go through the same */ + int hash_size = cso_hash_size(hash); + int max_entries = (max_size > hash_size) ? max_size : hash_size; + int to_remove = (max_size < max_entries) * max_entries/4; + if (hash_size > max_size) + to_remove += hash_size - max_size; + while (to_remove) { + /*remove elements until we're good */ + /*fixme: currently we pick the nodes to remove at random*/ + struct cso_hash_iter iter = cso_hash_first_node(hash); + void *cso = cso_hash_take(hash, cso_hash_iter_key(iter)); + delete_cso(cso, type); + --to_remove; + } +} + +struct cso_hash_iter +cso_insert_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *state) +{ + struct cso_hash *hash = _cso_hash_for_type(sc, type); + sanitize_hash(sc, hash, type, sc->max_size); + + return cso_hash_insert(hash, hash_key, state); +} + +struct cso_hash_iter +cso_find_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type) +{ + struct cso_hash *hash = _cso_hash_for_type(sc, type); + + return cso_hash_find(hash, hash_key); +} + + +void *cso_hash_find_data_from_template( struct cso_hash *hash, + unsigned hash_key, + void *templ, + int size ) +{ + struct cso_hash_iter iter = cso_hash_find(hash, hash_key); + while (!cso_hash_iter_is_null(iter)) { + void *iter_data = cso_hash_iter_data(iter); + if (!memcmp(iter_data, templ, size)) { + /* We found a match + */ + return iter_data; + } + iter = cso_hash_iter_next(iter); + } + return NULL; +} + + +struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *templ) +{ + struct cso_hash_iter iter = cso_find_state(sc, hash_key, type); + int size = _cso_size_for_type(type); + while (!cso_hash_iter_is_null(iter)) { + void *iter_data = cso_hash_iter_data(iter); + if (!memcmp(iter_data, templ, size)) + return iter; + iter = cso_hash_iter_next(iter); + } + return iter; +} + +void * cso_take_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type) +{ + struct cso_hash *hash = _cso_hash_for_type(sc, type); + return cso_hash_take(hash, hash_key); +} + +struct cso_cache *cso_cache_create(void) +{ + struct cso_cache *sc = MALLOC_STRUCT(cso_cache); + if (sc == NULL) + return NULL; + + sc->max_size = 4096; + sc->blend_hash = cso_hash_create(); + sc->sampler_hash = cso_hash_create(); + sc->depth_stencil_hash = cso_hash_create(); + sc->rasterizer_hash = cso_hash_create(); + sc->fs_hash = cso_hash_create(); + sc->vs_hash = cso_hash_create(); + sc->sanitize_cb = sanitize_cb; + sc->sanitize_data = 0; + + return sc; +} + +void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, + cso_state_callback func, void *user_data) +{ + struct cso_hash *hash = 0; + struct cso_hash_iter iter; + + switch (type) { + case CSO_BLEND: + hash = sc->blend_hash; + break; + case CSO_SAMPLER: + hash = sc->sampler_hash; + break; + case CSO_DEPTH_STENCIL_ALPHA: + hash = sc->depth_stencil_hash; + break; + case CSO_RASTERIZER: + hash = sc->rasterizer_hash; + break; + case CSO_FRAGMENT_SHADER: + hash = sc->fs_hash; + break; + case CSO_VERTEX_SHADER: + hash = sc->vs_hash; + break; + } + + iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + void *state = cso_hash_iter_data(iter); + iter = cso_hash_iter_next(iter); + if (state) { + func(state, user_data); + } + } +} + +void cso_cache_delete(struct cso_cache *sc) +{ + assert(sc); + /* delete driver data */ + cso_for_each_state(sc, CSO_BLEND, delete_blend_state, 0); + cso_for_each_state(sc, CSO_DEPTH_STENCIL_ALPHA, delete_depth_stencil_state, 0); + cso_for_each_state(sc, CSO_FRAGMENT_SHADER, delete_fs_state, 0); + cso_for_each_state(sc, CSO_VERTEX_SHADER, delete_vs_state, 0); + cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0); + cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0); + + cso_hash_delete(sc->blend_hash); + cso_hash_delete(sc->sampler_hash); + cso_hash_delete(sc->depth_stencil_hash); + cso_hash_delete(sc->rasterizer_hash); + cso_hash_delete(sc->fs_hash); + cso_hash_delete(sc->vs_hash); + FREE(sc); +} + +void cso_set_maximum_cache_size(struct cso_cache *sc, int number) +{ + sc->max_size = number; + + sanitize_hash(sc, sc->blend_hash, CSO_BLEND, sc->max_size); + sanitize_hash(sc, sc->depth_stencil_hash, CSO_DEPTH_STENCIL_ALPHA, + sc->max_size); + sanitize_hash(sc, sc->fs_hash, CSO_FRAGMENT_SHADER, sc->max_size); + sanitize_hash(sc, sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size); + sanitize_hash(sc, sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size); + sanitize_hash(sc, sc->sampler_hash, CSO_SAMPLER, sc->max_size); +} + +int cso_maximum_cache_size(const struct cso_cache *sc) +{ + return sc->max_size; +} + +void cso_cache_set_sanitize_callback(struct cso_cache *sc, + cso_sanitize_callback cb, + void *user_data) +{ + sc->sanitize_cb = cb; + sc->sanitize_data = user_data; +} + diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h new file mode 100644 index 0000000000..6b5c230e8f --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -0,0 +1,176 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /** + * @file + * Constant State Object (CSO) cache. + * + * The basic idea is that the states are created via the + * create_state/bind_state/delete_state semantics. The driver is expected to + * perform as much of the Gallium state translation to whatever its internal + * representation is during the create call. Gallium then has a caching + * mechanism where it stores the created states. When the pipeline needs an + * actual state change, a bind call is issued. In the bind call the driver + * gets its already translated representation. + * + * Those semantics mean that the driver doesn't do the repeated translations + * of states on every frame, but only once, when a new state is actually + * created. + * + * Even on hardware that doesn't do any kind of state cache, it makes the + * driver look a lot neater, plus it avoids all the redundant state + * translations on every frame. + * + * Currently our constant state objects are: + * - alpha test + * - blend + * - depth stencil + * - fragment shader + * - rasterizer (old setup) + * - sampler + * - vertex shader + * + * Things that are not constant state objects include: + * - blend_color + * - clip_state + * - clear_color_state + * - constant_buffer + * - feedback_state + * - framebuffer_state + * - polygon_stipple + * - scissor_state + * - texture_state + * - viewport_state + * + * @author Zack Rusin <zack@tungstengraphics.com> + */ + +#ifndef CSO_CACHE_H +#define CSO_CACHE_H + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +/* cso_hash.h is necessary for cso_hash_iter, as MSVC requires structures + * returned by value to be fully defined */ +#include "cso_hash.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +enum cso_cache_type { + CSO_BLEND, + CSO_SAMPLER, + CSO_DEPTH_STENCIL_ALPHA, + CSO_RASTERIZER, + CSO_FRAGMENT_SHADER, + CSO_VERTEX_SHADER +}; + +typedef void (*cso_state_callback)(void *ctx, void *obj); + +typedef void (*cso_sanitize_callback)(struct cso_hash *hash, + enum cso_cache_type type, + int max_size, + void *user_data); + +struct cso_cache; + +struct cso_blend { + struct pipe_blend_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +struct cso_depth_stencil_alpha { + struct pipe_depth_stencil_alpha_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +struct cso_rasterizer { + struct pipe_rasterizer_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +struct cso_fragment_shader { + struct pipe_shader_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +struct cso_vertex_shader { + struct pipe_shader_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +struct cso_sampler { + struct pipe_sampler_state state; + void *data; + cso_state_callback delete_state; + struct pipe_context *context; +}; + +unsigned cso_construct_key(void *item, int item_size); + +struct cso_cache *cso_cache_create(void); +void cso_cache_delete(struct cso_cache *sc); + +void cso_cache_set_sanitize_callback(struct cso_cache *sc, + cso_sanitize_callback cb, + void *user_data); + +struct cso_hash_iter cso_insert_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *state); +struct cso_hash_iter cso_find_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type); +struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *templ); +void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, + cso_state_callback func, void *user_data); +void * cso_take_state(struct cso_cache *sc, unsigned hash_key, + enum cso_cache_type type); + +void cso_set_maximum_cache_size(struct cso_cache *sc, int number); +int cso_maximum_cache_size(const struct cso_cache *sc); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c new file mode 100644 index 0000000000..68508f24de --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -0,0 +1,866 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /** + * @file + * + * Wrap the cso cache & hash mechanisms in a simplified + * pipe-driver-specific interface. + * + * @author Zack Rusin <zack@tungstengraphics.com> + * @author Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_state.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "tgsi/tgsi_parse.h" + +#include "cso_cache/cso_context.h" +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" + +struct cso_context { + struct pipe_context *pipe; + struct cso_cache *cache; + + struct { + void *samplers[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + } hw; + + void *samplers[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + + void *samplers_saved[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers_saved; + + struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; + uint nr_textures; + + struct pipe_texture *textures_saved[PIPE_MAX_SAMPLERS]; + uint nr_textures_saved; + + /** Current and saved state. + * The saved state is used as a 1-deep stack. + */ + void *blend, *blend_saved; + void *depth_stencil, *depth_stencil_saved; + void *rasterizer, *rasterizer_saved; + void *fragment_shader, *fragment_shader_saved; + void *vertex_shader, *vertex_shader_saved; + + struct pipe_framebuffer_state fb, fb_saved; + struct pipe_viewport_state vp, vp_saved; + struct pipe_blend_color blend_color; +}; + + +static void +free_framebuffer_state(struct pipe_framebuffer_state *fb); + + +static boolean delete_blend_state(struct cso_context *ctx, void *state) +{ + struct cso_blend *cso = (struct cso_blend *)state; + + if (ctx->blend == cso->data) + return FALSE; + + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_depth_stencil_state(struct cso_context *ctx, void *state) +{ + struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state; + + if (ctx->depth_stencil == cso->data) + return FALSE; + + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + + return TRUE; +} + +static boolean delete_sampler_state(struct cso_context *ctx, void *state) +{ + struct cso_sampler *cso = (struct cso_sampler *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_rasterizer_state(struct cso_context *ctx, void *state) +{ + struct cso_rasterizer *cso = (struct cso_rasterizer *)state; + + if (ctx->rasterizer == cso->data) + return FALSE; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_fs_state(struct cso_context *ctx, void *state) +{ + struct cso_fragment_shader *cso = (struct cso_fragment_shader *)state; + if (ctx->fragment_shader == cso->data) + return FALSE; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_vs_state(struct cso_context *ctx, void *state) +{ + struct cso_vertex_shader *cso = (struct cso_vertex_shader *)state; + if (ctx->vertex_shader == cso->data) + return TRUE; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return FALSE; +} + + +static INLINE boolean delete_cso(struct cso_context *ctx, + void *state, enum cso_cache_type type) +{ + switch (type) { + case CSO_BLEND: + return delete_blend_state(ctx, state); + break; + case CSO_SAMPLER: + return delete_sampler_state(ctx, state); + break; + case CSO_DEPTH_STENCIL_ALPHA: + return delete_depth_stencil_state(ctx, state); + break; + case CSO_RASTERIZER: + return delete_rasterizer_state(ctx, state); + break; + case CSO_FRAGMENT_SHADER: + return delete_fs_state(ctx, state); + break; + case CSO_VERTEX_SHADER: + return delete_vs_state(ctx, state); + break; + default: + assert(0); + FREE(state); + } + return FALSE; +} + +static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type, + int max_size, void *user_data) +{ + struct cso_context *ctx = (struct cso_context *)user_data; + /* if we're approach the maximum size, remove fourth of the entries + * otherwise every subsequent call will go through the same */ + int hash_size = cso_hash_size(hash); + int max_entries = (max_size > hash_size) ? max_size : hash_size; + int to_remove = (max_size < max_entries) * max_entries/4; + struct cso_hash_iter iter = cso_hash_first_node(hash); + if (hash_size > max_size) + to_remove += hash_size - max_size; + while (to_remove) { + /*remove elements until we're good */ + /*fixme: currently we pick the nodes to remove at random*/ + void *cso = cso_hash_iter_data(iter); + if (delete_cso(ctx, cso, type)) { + iter = cso_hash_erase(hash, iter); + --to_remove; + } else + iter = cso_hash_iter_next(iter); + } +} + + +struct cso_context *cso_create_context( struct pipe_context *pipe ) +{ + struct cso_context *ctx = CALLOC_STRUCT(cso_context); + if (ctx == NULL) + goto out; + + ctx->cache = cso_cache_create(); + if (ctx->cache == NULL) + goto out; + cso_cache_set_sanitize_callback(ctx->cache, + sanitize_hash, + ctx); + + ctx->pipe = pipe; + + /* Enable for testing: */ + if (0) cso_set_maximum_cache_size( ctx->cache, 4 ); + + return ctx; + +out: + cso_destroy_context( ctx ); + return NULL; +} + + +/** + * Prior to context destruction, this function unbinds all state objects. + */ +void cso_release_all( struct cso_context *ctx ) +{ + unsigned i; + + if (ctx->pipe) { + ctx->pipe->bind_blend_state( ctx->pipe, NULL ); + ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL ); + ctx->pipe->bind_sampler_states( ctx->pipe, 0, NULL ); + ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL ); + ctx->pipe->bind_fs_state( ctx->pipe, NULL ); + ctx->pipe->bind_vs_state( ctx->pipe, NULL ); + } + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + pipe_texture_reference(&ctx->textures[i], NULL); + pipe_texture_reference(&ctx->textures_saved[i], NULL); + } + + free_framebuffer_state(&ctx->fb); + free_framebuffer_state(&ctx->fb_saved); + + if (ctx->cache) { + cso_cache_delete( ctx->cache ); + ctx->cache = NULL; + } +} + + +void cso_destroy_context( struct cso_context *ctx ) +{ + if (ctx) { + //cso_release_all( ctx ); + FREE( ctx ); + } +} + + +/* Those function will either find the state of the given template + * in the cache or they will create a new state from the given + * template, insert it in the cache and return it. + */ + +/* + * If the driver returns 0 from the create method then they will assign + * the data member of the cso to be the template itself. + */ + +enum pipe_error cso_set_blend(struct cso_context *ctx, + const struct pipe_blend_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_blend_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_BLEND, + (void*)templ); + void *handle; + + if (cso_hash_iter_is_null(iter)) { + struct cso_blend *cso = MALLOC(sizeof(struct cso_blend)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(&cso->state, templ, sizeof(*templ)); + cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_BLEND, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_blend *)cso_hash_iter_data(iter))->data; + } + + if (ctx->blend != handle) { + ctx->blend = handle; + ctx->pipe->bind_blend_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_save_blend(struct cso_context *ctx) +{ + assert(!ctx->blend_saved); + ctx->blend_saved = ctx->blend; +} + +void cso_restore_blend(struct cso_context *ctx) +{ + if (ctx->blend != ctx->blend_saved) { + ctx->blend = ctx->blend_saved; + ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend_saved); + } + ctx->blend_saved = NULL; +} + + + +enum pipe_error cso_single_sampler(struct cso_context *ctx, + unsigned idx, + const struct pipe_sampler_state *templ) +{ + void *handle = NULL; + + if (templ != NULL) { + unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_SAMPLER, + (void*)templ); + + if (cso_hash_iter_is_null(iter)) { + struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(&cso->state, templ, sizeof(*templ)); + cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; + } + } + + ctx->samplers[idx] = handle; + return PIPE_OK; +} + +void cso_single_sampler_done( struct cso_context *ctx ) +{ + unsigned i; + + /* find highest non-null sampler */ + for (i = PIPE_MAX_SAMPLERS; i > 0; i--) { + if (ctx->samplers[i - 1] != NULL) + break; + } + + ctx->nr_samplers = i; + + if (ctx->hw.nr_samplers != ctx->nr_samplers || + memcmp(ctx->hw.samplers, + ctx->samplers, + ctx->nr_samplers * sizeof(void *)) != 0) + { + memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *)); + ctx->hw.nr_samplers = ctx->nr_samplers; + + ctx->pipe->bind_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers); + } +} + +/* + * If the function encouters any errors it will return the + * last one. Done to always try to set as many samplers + * as possible. + */ +enum pipe_error cso_set_samplers( struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates ) +{ + unsigned i; + enum pipe_error temp, error = PIPE_OK; + + /* TODO: fastpath + */ + + for (i = 0; i < nr; i++) { + temp = cso_single_sampler( ctx, i, templates[i] ); + if (temp != PIPE_OK) + error = temp; + } + + for ( ; i < ctx->nr_samplers; i++) { + temp = cso_single_sampler( ctx, i, NULL ); + if (temp != PIPE_OK) + error = temp; + } + + cso_single_sampler_done( ctx ); + + return error; +} + +void cso_save_samplers(struct cso_context *ctx) +{ + ctx->nr_samplers_saved = ctx->nr_samplers; + memcpy(ctx->samplers_saved, ctx->samplers, sizeof(ctx->samplers)); +} + +void cso_restore_samplers(struct cso_context *ctx) +{ + ctx->nr_samplers = ctx->nr_samplers_saved; + memcpy(ctx->samplers, ctx->samplers_saved, sizeof(ctx->samplers)); + cso_single_sampler_done( ctx ); +} + + +enum pipe_error cso_set_sampler_textures( struct cso_context *ctx, + uint count, + struct pipe_texture **textures ) +{ + uint i; + + ctx->nr_textures = count; + + for (i = 0; i < count; i++) + pipe_texture_reference(&ctx->textures[i], textures[i]); + for ( ; i < PIPE_MAX_SAMPLERS; i++) + pipe_texture_reference(&ctx->textures[i], NULL); + + ctx->pipe->set_sampler_textures(ctx->pipe, count, textures); + + return PIPE_OK; +} + +void cso_save_sampler_textures( struct cso_context *ctx ) +{ + uint i; + + ctx->nr_textures_saved = ctx->nr_textures; + for (i = 0; i < ctx->nr_textures; i++) { + assert(!ctx->textures_saved[i]); + pipe_texture_reference(&ctx->textures_saved[i], ctx->textures[i]); + } +} + +void cso_restore_sampler_textures( struct cso_context *ctx ) +{ + uint i; + + ctx->nr_textures = ctx->nr_textures_saved; + + for (i = 0; i < ctx->nr_textures; i++) { + pipe_texture_reference(&ctx->textures[i], NULL); + ctx->textures[i] = ctx->textures_saved[i]; + ctx->textures_saved[i] = NULL; + } + for ( ; i < PIPE_MAX_SAMPLERS; i++) + pipe_texture_reference(&ctx->textures[i], NULL); + + ctx->pipe->set_sampler_textures(ctx->pipe, ctx->nr_textures, ctx->textures); + + ctx->nr_textures_saved = 0; +} + + + +enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx, + const struct pipe_depth_stencil_alpha_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_depth_stencil_alpha_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, + CSO_DEPTH_STENCIL_ALPHA, + (void*)templ); + void *handle; + + if (cso_hash_iter_is_null(iter)) { + struct cso_depth_stencil_alpha *cso = MALLOC(sizeof(struct cso_depth_stencil_alpha)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(&cso->state, templ, sizeof(*templ)); + cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_depth_stencil_alpha *)cso_hash_iter_data(iter))->data; + } + + if (ctx->depth_stencil != handle) { + ctx->depth_stencil = handle; + ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_save_depth_stencil_alpha(struct cso_context *ctx) +{ + assert(!ctx->depth_stencil_saved); + ctx->depth_stencil_saved = ctx->depth_stencil; +} + +void cso_restore_depth_stencil_alpha(struct cso_context *ctx) +{ + if (ctx->depth_stencil != ctx->depth_stencil_saved) { + ctx->depth_stencil = ctx->depth_stencil_saved; + ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->depth_stencil_saved); + } + ctx->depth_stencil_saved = NULL; +} + + + +enum pipe_error cso_set_rasterizer(struct cso_context *ctx, + const struct pipe_rasterizer_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_rasterizer_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_RASTERIZER, + (void*)templ); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(&cso->state, templ, sizeof(*templ)); + cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_rasterizer_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_rasterizer *)cso_hash_iter_data(iter))->data; + } + + if (ctx->rasterizer != handle) { + ctx->rasterizer = handle; + ctx->pipe->bind_rasterizer_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_save_rasterizer(struct cso_context *ctx) +{ + assert(!ctx->rasterizer_saved); + ctx->rasterizer_saved = ctx->rasterizer; +} + +void cso_restore_rasterizer(struct cso_context *ctx) +{ + if (ctx->rasterizer != ctx->rasterizer_saved) { + ctx->rasterizer = ctx->rasterizer_saved; + ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rasterizer_saved); + } + ctx->rasterizer_saved = NULL; +} + + + +enum pipe_error cso_set_fragment_shader_handle(struct cso_context *ctx, + void *handle ) +{ + if (ctx->fragment_shader != handle) { + ctx->fragment_shader = handle; + ctx->pipe->bind_fs_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_delete_fragment_shader(struct cso_context *ctx, void *handle ) +{ + if (handle == ctx->fragment_shader) { + /* unbind before deleting */ + ctx->pipe->bind_fs_state(ctx->pipe, NULL); + ctx->fragment_shader = NULL; + } + ctx->pipe->delete_fs_state(ctx->pipe, handle); +} + +/* Not really working: + */ +#if 0 +enum pipe_error cso_set_fragment_shader(struct cso_context *ctx, + const struct pipe_shader_state *templ) +{ + const struct tgsi_token *tokens = templ->tokens; + unsigned num_tokens = tgsi_num_tokens(tokens); + size_t tokens_size = num_tokens*sizeof(struct tgsi_token); + unsigned hash_key = cso_construct_key((void*)tokens, tokens_size); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, + CSO_FRAGMENT_SHADER, + (void*)tokens); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader) + tokens_size); + struct tgsi_token *cso_tokens = (struct tgsi_token *)((char *)cso + sizeof(*cso)); + + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(cso_tokens, tokens, tokens_size); + cso->state.tokens = cso_tokens; + cso->data = ctx->pipe->create_fs_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_fs_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_FRAGMENT_SHADER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_fragment_shader *)cso_hash_iter_data(iter))->data; + } + + return cso_set_fragment_shader_handle( ctx, handle ); +} +#endif + +void cso_save_fragment_shader(struct cso_context *ctx) +{ + assert(!ctx->fragment_shader_saved); + ctx->fragment_shader_saved = ctx->fragment_shader; +} + +void cso_restore_fragment_shader(struct cso_context *ctx) +{ + if (ctx->fragment_shader_saved != ctx->fragment_shader) { + ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved); + ctx->fragment_shader = ctx->fragment_shader_saved; + } + ctx->fragment_shader_saved = NULL; +} + + +enum pipe_error cso_set_vertex_shader_handle(struct cso_context *ctx, + void *handle ) +{ + if (ctx->vertex_shader != handle) { + ctx->vertex_shader = handle; + ctx->pipe->bind_vs_state(ctx->pipe, handle); + } + return PIPE_OK; +} + +void cso_delete_vertex_shader(struct cso_context *ctx, void *handle ) +{ + if (handle == ctx->vertex_shader) { + /* unbind before deleting */ + ctx->pipe->bind_vs_state(ctx->pipe, NULL); + ctx->vertex_shader = NULL; + } + ctx->pipe->delete_vs_state(ctx->pipe, handle); +} + + +/* Not really working: + */ +#if 0 +enum pipe_error cso_set_vertex_shader(struct cso_context *ctx, + const struct pipe_shader_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_shader_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_VERTEX_SHADER, + (void*)templ); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_vertex_shader *cso = MALLOC(sizeof(struct cso_vertex_shader)); + + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + + memcpy(cso->state, templ, sizeof(*templ)); + cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_vs_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_VERTEX_SHADER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + handle = cso->data; + } + else { + handle = ((struct cso_vertex_shader *)cso_hash_iter_data(iter))->data; + } + + return cso_set_vertex_shader_handle( ctx, handle ); +} +#endif + + + +void cso_save_vertex_shader(struct cso_context *ctx) +{ + assert(!ctx->vertex_shader_saved); + ctx->vertex_shader_saved = ctx->vertex_shader; +} + +void cso_restore_vertex_shader(struct cso_context *ctx) +{ + if (ctx->vertex_shader_saved != ctx->vertex_shader) { + ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved); + ctx->vertex_shader = ctx->vertex_shader_saved; + } + ctx->vertex_shader_saved = NULL; +} + + +/** + * Copy framebuffer state from src to dst with refcounting of surfaces. + */ +static void +copy_framebuffer_state(struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src) +{ + uint i; + + dst->width = src->width; + dst->height = src->height; + dst->num_cbufs = src->num_cbufs; + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); + } + pipe_surface_reference(&dst->zsbuf, src->zsbuf); +} + + +static void +free_framebuffer_state(struct pipe_framebuffer_state *fb) +{ + uint i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&fb->cbufs[i], NULL); + } + pipe_surface_reference(&fb->zsbuf, NULL); +} + + +enum pipe_error cso_set_framebuffer(struct cso_context *ctx, + const struct pipe_framebuffer_state *fb) +{ + if (memcmp(&ctx->fb, fb, sizeof(*fb)) != 0) { + copy_framebuffer_state(&ctx->fb, fb); + ctx->pipe->set_framebuffer_state(ctx->pipe, fb); + } + return PIPE_OK; +} + +void cso_save_framebuffer(struct cso_context *ctx) +{ + copy_framebuffer_state(&ctx->fb_saved, &ctx->fb); +} + +void cso_restore_framebuffer(struct cso_context *ctx) +{ + if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) { + copy_framebuffer_state(&ctx->fb, &ctx->fb_saved); + ctx->pipe->set_framebuffer_state(ctx->pipe, &ctx->fb); + free_framebuffer_state(&ctx->fb_saved); + } +} + + +enum pipe_error cso_set_viewport(struct cso_context *ctx, + const struct pipe_viewport_state *vp) +{ + if (memcmp(&ctx->vp, vp, sizeof(*vp))) { + ctx->vp = *vp; + ctx->pipe->set_viewport_state(ctx->pipe, vp); + } + return PIPE_OK; +} + +void cso_save_viewport(struct cso_context *ctx) +{ + ctx->vp_saved = ctx->vp; +} + + +void cso_restore_viewport(struct cso_context *ctx) +{ + if (memcmp(&ctx->vp, &ctx->vp_saved, sizeof(ctx->vp))) { + ctx->vp = ctx->vp_saved; + ctx->pipe->set_viewport_state(ctx->pipe, &ctx->vp); + } +} + + + + +enum pipe_error cso_set_blend_color(struct cso_context *ctx, + const struct pipe_blend_color *bc) +{ + if (memcmp(&ctx->blend_color, bc, sizeof(ctx->blend_color))) { + ctx->blend_color = *bc; + ctx->pipe->set_blend_color(ctx->pipe, bc); + } + return PIPE_OK; +} diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h new file mode 100644 index 0000000000..b04e98bfa1 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -0,0 +1,145 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CSO_CONTEXT_H +#define CSO_CONTEXT_H + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_error.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +struct cso_context; + +struct cso_context *cso_create_context( struct pipe_context *pipe ); + +void cso_release_all( struct cso_context *ctx ); + +void cso_destroy_context( struct cso_context *cso ); + + + +enum pipe_error cso_set_blend( struct cso_context *cso, + const struct pipe_blend_state *blend ); +void cso_save_blend(struct cso_context *cso); +void cso_restore_blend(struct cso_context *cso); + + + +enum pipe_error cso_set_depth_stencil_alpha( struct cso_context *cso, + const struct pipe_depth_stencil_alpha_state *dsa ); +void cso_save_depth_stencil_alpha(struct cso_context *cso); +void cso_restore_depth_stencil_alpha(struct cso_context *cso); + + + +enum pipe_error cso_set_rasterizer( struct cso_context *cso, + const struct pipe_rasterizer_state *rasterizer ); +void cso_save_rasterizer(struct cso_context *cso); +void cso_restore_rasterizer(struct cso_context *cso); + + + +enum pipe_error cso_set_samplers( struct cso_context *cso, + unsigned count, + const struct pipe_sampler_state **states ); +void cso_save_samplers(struct cso_context *cso); +void cso_restore_samplers(struct cso_context *cso); + +/* Alternate interface to support state trackers that like to modify + * samplers one at a time: + */ +enum pipe_error cso_single_sampler( struct cso_context *cso, + unsigned nr, + const struct pipe_sampler_state *states ); + +void cso_single_sampler_done( struct cso_context *cso ); + + + +enum pipe_error cso_set_sampler_textures( struct cso_context *cso, + uint count, + struct pipe_texture **textures ); +void cso_save_sampler_textures( struct cso_context *cso ); +void cso_restore_sampler_textures( struct cso_context *cso ); + + + +/* These aren't really sensible -- most of the time the api provides + * object semantics for shaders anyway, and the cases where it doesn't + * (eg mesa's internall-generated texenv programs), it will be up to + * the state tracker to implement their own specialized caching. + */ +enum pipe_error cso_set_fragment_shader_handle(struct cso_context *ctx, + void *handle ); +void cso_delete_fragment_shader(struct cso_context *ctx, void *handle ); +/* +enum pipe_error cso_set_fragment_shader( struct cso_context *cso, + const struct pipe_shader_state *shader ); +*/ +void cso_save_fragment_shader(struct cso_context *cso); +void cso_restore_fragment_shader(struct cso_context *cso); + + +enum pipe_error cso_set_vertex_shader_handle(struct cso_context *ctx, + void *handle ); +void cso_delete_vertex_shader(struct cso_context *ctx, void *handle ); +/* +enum pipe_error cso_set_vertex_shader( struct cso_context *cso, + const struct pipe_shader_state *shader ); +*/ +void cso_save_vertex_shader(struct cso_context *cso); +void cso_restore_vertex_shader(struct cso_context *cso); + + + +enum pipe_error cso_set_framebuffer(struct cso_context *cso, + const struct pipe_framebuffer_state *fb); +void cso_save_framebuffer(struct cso_context *cso); +void cso_restore_framebuffer(struct cso_context *cso); + + +enum pipe_error cso_set_viewport(struct cso_context *cso, + const struct pipe_viewport_state *vp); +void cso_save_viewport(struct cso_context *cso); +void cso_restore_viewport(struct cso_context *cso); + + +enum pipe_error cso_set_blend_color(struct cso_context *cso, + const struct pipe_blend_color *bc); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c new file mode 100644 index 0000000000..4e7664f9bf --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -0,0 +1,439 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin <zack@tungstengraphics.com> + */ + +#include "pipe/p_debug.h" +#include "util/u_memory.h" + +#include "cso_hash.h" + +#define MAX(a, b) ((a > b) ? (a) : (b)) + +static const int MinNumBits = 4; + +static const unsigned char prime_deltas[] = { + 0, 0, 1, 3, 1, 5, 3, 3, 1, 9, 7, 5, 3, 9, 25, 3, + 1, 21, 3, 21, 7, 15, 9, 5, 3, 29, 15, 0, 0, 0, 0, 0 +}; + +static int primeForNumBits(int numBits) +{ + return (1 << numBits) + prime_deltas[numBits]; +} + +/* + Returns the smallest integer n such that + primeForNumBits(n) >= hint. +*/ +static int countBits(int hint) +{ + int numBits = 0; + int bits = hint; + + while (bits > 1) { + bits >>= 1; + numBits++; + } + + if (numBits >= (int)sizeof(prime_deltas)) { + numBits = sizeof(prime_deltas) - 1; + } else if (primeForNumBits(numBits) < hint) { + ++numBits; + } + return numBits; +} + +struct cso_node { + struct cso_node *next; + unsigned key; + void *value; +}; + +struct cso_hash_data { + struct cso_node *fakeNext; + struct cso_node **buckets; + int size; + int nodeSize; + short userNumBits; + short numBits; + int numBuckets; +}; + +struct cso_hash { + union { + struct cso_hash_data *d; + struct cso_node *e; + } data; +}; + +static void *cso_data_allocate_node(struct cso_hash_data *hash) +{ + return MALLOC(hash->nodeSize); +} + +static void cso_free_node(struct cso_node *node) +{ + FREE(node); +} + +static struct cso_node * +cso_hash_create_node(struct cso_hash *hash, + unsigned akey, void *avalue, + struct cso_node **anextNode) +{ + struct cso_node *node = cso_data_allocate_node(hash->data.d); + + if (!node) + return NULL; + + node->key = akey; + node->value = avalue; + + node->next = (struct cso_node*)(*anextNode); + *anextNode = node; + ++hash->data.d->size; + return node; +} + +static void cso_data_rehash(struct cso_hash_data *hash, int hint) +{ + if (hint < 0) { + hint = countBits(-hint); + if (hint < MinNumBits) + hint = MinNumBits; + hash->userNumBits = (short)hint; + while (primeForNumBits(hint) < (hash->size >> 1)) + ++hint; + } else if (hint < MinNumBits) { + hint = MinNumBits; + } + + if (hash->numBits != hint) { + struct cso_node *e = (struct cso_node *)(hash); + struct cso_node **oldBuckets = hash->buckets; + int oldNumBuckets = hash->numBuckets; + int i = 0; + + hash->numBits = (short)hint; + hash->numBuckets = primeForNumBits(hint); + hash->buckets = MALLOC(sizeof(struct cso_node*) * hash->numBuckets); + for (i = 0; i < hash->numBuckets; ++i) + hash->buckets[i] = e; + + for (i = 0; i < oldNumBuckets; ++i) { + struct cso_node *firstNode = oldBuckets[i]; + while (firstNode != e) { + unsigned h = firstNode->key; + struct cso_node *lastNode = firstNode; + struct cso_node *afterLastNode; + struct cso_node **beforeFirstNode; + + while (lastNode->next != e && lastNode->next->key == h) + lastNode = lastNode->next; + + afterLastNode = lastNode->next; + beforeFirstNode = &hash->buckets[h % hash->numBuckets]; + while (*beforeFirstNode != e) + beforeFirstNode = &(*beforeFirstNode)->next; + lastNode->next = *beforeFirstNode; + *beforeFirstNode = firstNode; + firstNode = afterLastNode; + } + } + FREE(oldBuckets); + } +} + +static void cso_data_might_grow(struct cso_hash_data *hash) +{ + if (hash->size >= hash->numBuckets) + cso_data_rehash(hash, hash->numBits + 1); +} + +static void cso_data_has_shrunk(struct cso_hash_data *hash) +{ + if (hash->size <= (hash->numBuckets >> 3) && + hash->numBits > hash->userNumBits) { + int max = MAX(hash->numBits-2, hash->userNumBits); + cso_data_rehash(hash, max); + } +} + +static struct cso_node *cso_data_first_node(struct cso_hash_data *hash) +{ + struct cso_node *e = (struct cso_node *)(hash); + struct cso_node **bucket = hash->buckets; + int n = hash->numBuckets; + while (n--) { + if (*bucket != e) + return *bucket; + ++bucket; + } + return e; +} + +static struct cso_node **cso_hash_find_node(struct cso_hash *hash, unsigned akey) +{ + struct cso_node **node; + + if (hash->data.d->numBuckets) { + node = (struct cso_node **)(&hash->data.d->buckets[akey % hash->data.d->numBuckets]); + assert(*node == hash->data.e || (*node)->next); + while (*node != hash->data.e && (*node)->key != akey) + node = &(*node)->next; + } else { + node = (struct cso_node **)((const struct cso_node * const *)(&hash->data.e)); + } + return node; +} + +struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, + unsigned key, void *data) +{ + cso_data_might_grow(hash->data.d); + + { + struct cso_node **nextNode = cso_hash_find_node(hash, key); + struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode); + if (!node) { + struct cso_hash_iter null_iter = {hash, 0}; + return null_iter; + } + + { + struct cso_hash_iter iter = {hash, node}; + return iter; + } + } +} + +struct cso_hash * cso_hash_create(void) +{ + struct cso_hash *hash = MALLOC_STRUCT(cso_hash); + if (!hash) + return NULL; + + hash->data.d = MALLOC_STRUCT(cso_hash_data); + if (!hash->data.d) { + FREE(hash); + return NULL; + } + + hash->data.d->fakeNext = 0; + hash->data.d->buckets = 0; + hash->data.d->size = 0; + hash->data.d->nodeSize = sizeof(struct cso_node); + hash->data.d->userNumBits = (short)MinNumBits; + hash->data.d->numBits = 0; + hash->data.d->numBuckets = 0; + + return hash; +} + +void cso_hash_delete(struct cso_hash *hash) +{ + struct cso_node *e_for_x = (struct cso_node *)(hash->data.d); + struct cso_node **bucket = (struct cso_node **)(hash->data.d->buckets); + int n = hash->data.d->numBuckets; + while (n--) { + struct cso_node *cur = *bucket++; + while (cur != e_for_x) { + struct cso_node *next = cur->next; + cso_free_node(cur); + cur = next; + } + } + FREE(hash->data.d->buckets); + FREE(hash->data.d); + FREE(hash); +} + +struct cso_hash_iter cso_hash_find(struct cso_hash *hash, + unsigned key) +{ + struct cso_node **nextNode = cso_hash_find_node(hash, key); + struct cso_hash_iter iter = {hash, *nextNode}; + return iter; +} + +unsigned cso_hash_iter_key(struct cso_hash_iter iter) +{ + if (!iter.node || iter.hash->data.e == iter.node) + return 0; + return iter.node->key; +} + +void * cso_hash_iter_data(struct cso_hash_iter iter) +{ + if (!iter.node || iter.hash->data.e == iter.node) + return 0; + return iter.node->value; +} + +static struct cso_node *cso_hash_data_next(struct cso_node *node) +{ + union { + struct cso_node *next; + struct cso_node *e; + struct cso_hash_data *d; + } a; + int start; + struct cso_node **bucket; + int n; + + a.next = node->next; + if (!a.next) { + debug_printf("iterating beyond the last element\n"); + return 0; + } + if (a.next->next) + return a.next; + + start = (node->key % a.d->numBuckets) + 1; + bucket = a.d->buckets + start; + n = a.d->numBuckets - start; + while (n--) { + if (*bucket != a.e) + return *bucket; + ++bucket; + } + return a.e; +} + + +static struct cso_node *cso_hash_data_prev(struct cso_node *node) +{ + union { + struct cso_node *e; + struct cso_hash_data *d; + } a; + int start; + struct cso_node *sentinel; + struct cso_node **bucket; + + a.e = node; + while (a.e->next) + a.e = a.e->next; + + if (node == a.e) + start = a.d->numBuckets - 1; + else + start = node->key % a.d->numBuckets; + + sentinel = node; + bucket = a.d->buckets + start; + while (start >= 0) { + if (*bucket != sentinel) { + struct cso_node *prev = *bucket; + while (prev->next != sentinel) + prev = prev->next; + return prev; + } + + sentinel = a.e; + --bucket; + --start; + } + debug_printf("iterating backward beyond first element\n"); + return a.e; +} + +struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter) +{ + struct cso_hash_iter next = {iter.hash, cso_hash_data_next(iter.node)}; + return next; +} + +int cso_hash_iter_is_null(struct cso_hash_iter iter) +{ + if (!iter.node || iter.node == iter.hash->data.e) + return 1; + return 0; +} + +void * cso_hash_take(struct cso_hash *hash, + unsigned akey) +{ + struct cso_node **node = cso_hash_find_node(hash, akey); + if (*node != hash->data.e) { + void *t = (*node)->value; + struct cso_node *next = (*node)->next; + cso_free_node(*node); + *node = next; + --hash->data.d->size; + cso_data_has_shrunk(hash->data.d); + return t; + } + return 0; +} + +struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter) +{ + struct cso_hash_iter prev = {iter.hash, + cso_hash_data_prev(iter.node)}; + return prev; +} + +struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash) +{ + struct cso_hash_iter iter = {hash, cso_data_first_node(hash->data.d)}; + return iter; +} + +int cso_hash_size(struct cso_hash *hash) +{ + return hash->data.d->size; +} + +struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter iter) +{ + struct cso_hash_iter ret = iter; + struct cso_node *node = iter.node; + struct cso_node **node_ptr; + + if (node == hash->data.e) + return iter; + + ret = cso_hash_iter_next(ret); + node_ptr = (struct cso_node**)(&hash->data.d->buckets[node->key % hash->data.d->numBuckets]); + while (*node_ptr != node) + node_ptr = &(*node_ptr)->next; + *node_ptr = node->next; + cso_free_node(node); + --hash->data.d->size; + return ret; +} + +boolean cso_hash_contains(struct cso_hash *hash, unsigned key) +{ + struct cso_node **node = cso_hash_find_node(hash, key); + return (*node != hash->data.e); +} diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h new file mode 100644 index 0000000000..5891c325fa --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -0,0 +1,129 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Hash table implementation. + * + * This file provides a hash implementation that is capable of dealing + * with collisions. It stores colliding entries in linked list. All + * functions operating on the hash return an iterator. The iterator + * itself points to the collision list. If there wasn't any collision + * the list will have just one entry, otherwise client code should + * iterate over the entries to find the exact entry among ones that + * had the same key (e.g. memcmp could be used on the data to check + * that) + * + * @author Zack Rusin <zack@tungstengraphics.com> + */ + +#ifndef CSO_HASH_H +#define CSO_HASH_H + +#include "pipe/p_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +struct cso_hash; +struct cso_node; + + +struct cso_hash_iter { + struct cso_hash *hash; + struct cso_node *node; +}; + + +struct cso_hash *cso_hash_create(void); +void cso_hash_delete(struct cso_hash *hash); + + +int cso_hash_size(struct cso_hash *hash); + + +/** + * Adds a data with the given key to the hash. If entry with the given + * key is already in the hash, this current entry is instered before it + * in the collision list. + * Function returns iterator pointing to the inserted item in the hash. + */ +struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, unsigned key, + void *data); +/** + * Removes the item pointed to by the current iterator from the hash. + * Note that the data itself is not erased and if it was a malloc'ed pointer + * it will have to be freed after calling this function by the callee. + * Function returns iterator pointing to the item after the removed one in + * the hash. + */ +struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter iter); + +void *cso_hash_take(struct cso_hash *hash, unsigned key); + + + +struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash); + +/** + * Return an iterator pointing to the first entry in the collision list. + */ +struct cso_hash_iter cso_hash_find(struct cso_hash *hash, unsigned key); + +/** + * Returns true if a value with the given key exists in the hash + */ +boolean cso_hash_contains(struct cso_hash *hash, unsigned key); + + +int cso_hash_iter_is_null(struct cso_hash_iter iter); +unsigned cso_hash_iter_key(struct cso_hash_iter iter); +void *cso_hash_iter_data(struct cso_hash_iter iter); + + +struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter); +struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter); + + +/** + * Convenience routine to iterate over the collision list while doing a memory + * comparison to see which entry in the list is a direct copy of our template + * and returns that entry. + */ +void *cso_hash_find_data_from_template( struct cso_hash *hash, + unsigned hash_key, + void *templ, + int size ); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile new file mode 100644 index 0000000000..bdbf5a08ed --- /dev/null +++ b/src/gallium/auxiliary/draw/Makefile @@ -0,0 +1,50 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = draw + +C_SOURCES = \ + draw_context.c \ + draw_pipe.c \ + draw_pipe_aaline.c \ + draw_pipe_aapoint.c \ + draw_pipe_clip.c \ + draw_pipe_cull.c \ + draw_pipe_flatshade.c \ + draw_pipe_offset.c \ + draw_pipe_pstipple.c \ + draw_pipe_stipple.c \ + draw_pipe_twoside.c \ + draw_pipe_unfilled.c \ + draw_pipe_util.c \ + draw_pipe_validate.c \ + draw_pipe_vbuf.c \ + draw_pipe_wide_line.c \ + draw_pipe_wide_point.c \ + draw_pt.c \ + draw_pt_elts.c \ + draw_pt_emit.c \ + draw_pt_fetch.c \ + draw_pt_fetch_emit.c \ + draw_pt_fetch_shade_emit.c \ + draw_pt_fetch_shade_pipeline.c \ + draw_pt_post_vs.c \ + draw_pt_util.c \ + draw_pt_varray.c \ + draw_pt_vcache.c \ + draw_vertex.c \ + draw_vs.c \ + draw_vs_varient.c \ + draw_vs_aos.c \ + draw_vs_aos_io.c \ + draw_vs_aos_machine.c \ + draw_vs_exec.c \ + draw_vs_llvm.c \ + draw_vs_ppc.c \ + draw_vs_sse.c + + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript new file mode 100644 index 0000000000..5f05aa324a --- /dev/null +++ b/src/gallium/auxiliary/draw/SConscript @@ -0,0 +1,46 @@ +Import('*') + +draw = env.ConvenienceLibrary( + target = 'draw', + source = [ + 'draw_context.c', + 'draw_pipe.c', + 'draw_pipe_aaline.c', + 'draw_pipe_aapoint.c', + 'draw_pipe_clip.c', + 'draw_pipe_cull.c', + 'draw_pipe_flatshade.c', + 'draw_pipe_offset.c', + 'draw_pipe_pstipple.c', + 'draw_pipe_stipple.c', + 'draw_pipe_twoside.c', + 'draw_pipe_unfilled.c', + 'draw_pipe_util.c', + 'draw_pipe_validate.c', + 'draw_pipe_vbuf.c', + 'draw_pipe_wide_line.c', + 'draw_pipe_wide_point.c', + 'draw_pt.c', + 'draw_pt_elts.c', + 'draw_pt_emit.c', + 'draw_pt_fetch.c', + 'draw_pt_fetch_emit.c', + 'draw_pt_fetch_shade_emit.c', + 'draw_pt_fetch_shade_pipeline.c', + 'draw_pt_post_vs.c', + 'draw_pt_util.c', + 'draw_pt_varray.c', + 'draw_pt_vcache.c', + 'draw_vertex.c', + 'draw_vs.c', + 'draw_vs_aos.c', + 'draw_vs_aos_io.c', + 'draw_vs_aos_machine.c', + 'draw_vs_exec.c', + 'draw_vs_llvm.c', + 'draw_vs_ppc.c', + 'draw_vs_sse.c', + 'draw_vs_varient.c' + ]) + +auxiliaries.insert(0, draw) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c new file mode 100644 index 0000000000..41a4cba1dd --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -0,0 +1,402 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "draw_context.h" +#include "draw_vbuf.h" +#include "draw_vs.h" +#include "draw_pt.h" +#include "draw_pipe.h" + + +struct draw_context *draw_create( void ) +{ + struct draw_context *draw = CALLOC_STRUCT( draw_context ); + if (draw == NULL) + goto fail; + + ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 ); + ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 ); + ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 ); + ASSIGN_4V( draw->plane[3], 0, 1, 0, 1 ); + ASSIGN_4V( draw->plane[4], 0, 0, 1, 1 ); /* yes these are correct */ + ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */ + draw->nr_planes = 6; + + + draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ + + + if (!draw_pipeline_init( draw )) + goto fail; + + if (!draw_pt_init( draw )) + goto fail; + + if (!draw_vs_init( draw )) + goto fail; + + return draw; + +fail: + draw_destroy( draw ); + return NULL; +} + + +void draw_destroy( struct draw_context *draw ) +{ + if (!draw) + return; + + + + /* Not so fast -- we're just borrowing this at the moment. + * + if (draw->render) + draw->render->destroy( draw->render ); + */ + + draw_pipeline_destroy( draw ); + draw_pt_destroy( draw ); + draw_vs_destroy( draw ); + + FREE( draw ); +} + + + +void draw_flush( struct draw_context *draw ) +{ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); +} + + + +/** + * Register new primitive rasterization/rendering state. + * This causes the drawing pipeline to be rebuilt. + */ +void draw_set_rasterizer_state( struct draw_context *draw, + const struct pipe_rasterizer_state *raster ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + draw->rasterizer = raster; + draw->bypass_clipping = + ((draw->rasterizer && draw->rasterizer->bypass_clipping) || + draw->driver.bypass_clipping); +} + + +void draw_set_driver_clipping( struct draw_context *draw, + boolean bypass_clipping ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + draw->driver.bypass_clipping = bypass_clipping; + draw->bypass_clipping = (draw->rasterizer->bypass_clipping || + draw->driver.bypass_clipping); +} + + +/** + * Plug in the primitive rendering/rasterization stage (which is the last + * stage in the drawing pipeline). + * This is provided by the device driver. + */ +void draw_set_rasterize_stage( struct draw_context *draw, + struct draw_stage *stage ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + draw->pipeline.rasterize = stage; +} + + +/** + * Set the draw module's clipping state. + */ +void draw_set_clip_state( struct draw_context *draw, + const struct pipe_clip_state *clip ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + assert(clip->nr <= PIPE_MAX_CLIP_PLANES); + memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0])); + draw->nr_planes = 6 + clip->nr; +} + + +/** + * Set the draw module's viewport state. + */ +void draw_set_viewport_state( struct draw_context *draw, + const struct pipe_viewport_state *viewport ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->viewport = *viewport; /* struct copy */ + draw->identity_viewport = (viewport->scale[0] == 1.0f && + viewport->scale[1] == 1.0f && + viewport->scale[2] == 1.0f && + viewport->scale[3] == 1.0f && + viewport->translate[0] == 0.0f && + viewport->translate[1] == 0.0f && + viewport->translate[2] == 0.0f && + viewport->translate[3] == 0.0f); + + draw_vs_set_viewport( draw, viewport ); +} + + + +void +draw_set_vertex_buffers(struct draw_context *draw, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(draw->pt.vertex_buffer, buffers, count * sizeof(buffers[0])); + draw->pt.nr_vertex_buffers = count; +} + + +void +draw_set_vertex_elements(struct draw_context *draw, + unsigned count, + const struct pipe_vertex_element *elements) +{ + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(draw->pt.vertex_element, elements, count * sizeof(elements[0])); + draw->pt.nr_vertex_elements = count; +} + + +/** + * Tell drawing context where to find mapped vertex buffers. + */ +void +draw_set_mapped_vertex_buffer(struct draw_context *draw, + unsigned attr, const void *buffer) +{ + draw->pt.user.vbuffer[attr] = buffer; +} + + +void +draw_set_mapped_constant_buffer(struct draw_context *draw, + const void *buffer, + unsigned size ) +{ + draw->pt.user.constants = buffer; + draw_vs_set_constants( draw, (const float (*)[4])buffer, size ); +} + + +/** + * Tells the draw module to draw points with triangles if their size + * is greater than this threshold. + */ +void +draw_wide_point_threshold(struct draw_context *draw, float threshold) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->pipeline.wide_point_threshold = threshold; +} + + +/** + * Tells the draw module to draw lines with triangles if their width + * is greater than this threshold. + */ +void +draw_wide_line_threshold(struct draw_context *draw, float threshold) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->pipeline.wide_line_threshold = threshold; +} + + +/** + * Tells the draw module whether or not to implement line stipple. + */ +void +draw_enable_line_stipple(struct draw_context *draw, boolean enable) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->pipeline.line_stipple = enable; +} + + +/** + * Tells draw module whether to convert points to quads for sprite mode. + */ +void +draw_enable_point_sprites(struct draw_context *draw, boolean enable) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->pipeline.point_sprite = enable; +} + + +void +draw_set_force_passthrough( struct draw_context *draw, boolean enable ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->force_passthrough = enable; +} + + +/** + * Ask the draw module for the location/slot of the given vertex attribute in + * a post-transformed vertex. + * + * With this function, drivers that use the draw module should have no reason + * to track the current vertex shader. + * + * Note that the draw module may sometimes generate vertices with extra + * attributes (such as texcoords for AA lines). The driver can call this + * function to find those attributes. + * + * Zero is returned if the attribute is not found since this is + * a don't care / undefined situtation. Returning -1 would be a bit more + * work for the drivers. + */ +int +draw_find_vs_output(const struct draw_context *draw, + uint semantic_name, uint semantic_index) +{ + const struct draw_vertex_shader *vs = draw->vs.vertex_shader; + uint i; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == semantic_name && + vs->info.output_semantic_index[i] == semantic_index) + return i; + } + + /* XXX there may be more than one extra vertex attrib. + * For example, simulated gl_FragCoord and gl_PointCoord. + */ + if (draw->extra_vp_outputs.semantic_name == semantic_name && + draw->extra_vp_outputs.semantic_index == semantic_index) { + return draw->extra_vp_outputs.slot; + } + return 0; +} + + +/** + * Return number of vertex shader outputs. + */ +uint +draw_num_vs_outputs(const struct draw_context *draw) +{ + uint count = draw->vs.vertex_shader->info.num_outputs; + if (draw->extra_vp_outputs.slot > 0) + count++; + return count; +} + + + +void draw_set_render( struct draw_context *draw, + struct vbuf_render *render ) +{ + draw->render = render; +} + +void draw_set_edgeflags( struct draw_context *draw, + const unsigned *edgeflag ) +{ + draw->pt.user.edgeflag = edgeflag; +} + + + + +/** + * Tell the drawing context about the index/element buffer to use + * (ala glDrawElements) + * If no element buffer is to be used (i.e. glDrawArrays) then this + * should be called with eltSize=0 and elements=NULL. + * + * \param draw the drawing context + * \param eltSize size of each element (1, 2 or 4 bytes) + * \param elements the element buffer ptr + */ +void +draw_set_mapped_element_buffer_range( struct draw_context *draw, + unsigned eltSize, + unsigned min_index, + unsigned max_index, + void *elements ) +{ + draw->pt.user.elts = elements; + draw->pt.user.eltSize = eltSize; + draw->pt.user.min_index = min_index; + draw->pt.user.max_index = max_index; +} + + +void +draw_set_mapped_element_buffer( struct draw_context *draw, + unsigned eltSize, + void *elements ) +{ + draw->pt.user.elts = elements; + draw->pt.user.eltSize = eltSize; + draw->pt.user.min_index = 0; + draw->pt.user.max_index = 0xffffffff; +} + + +/* Revamp me please: + */ +void draw_do_flush( struct draw_context *draw, unsigned flags ) +{ + if (!draw->suspend_flushing) + { + assert(!draw->flushing); /* catch inadvertant recursion */ + + draw->flushing = TRUE; + + draw_pipeline_flush( draw, flags ); + + draw->reduced_prim = ~0; /* is reduced_prim needed any more? */ + + draw->flushing = FALSE; + } +} diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h new file mode 100644 index 0000000000..3eeb453531 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -0,0 +1,175 @@ + +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Public interface into the drawing module. + */ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef DRAW_CONTEXT_H +#define DRAW_CONTEXT_H + + +#include "pipe/p_state.h" + + +struct pipe_context; +struct draw_context; +struct draw_stage; +struct draw_vertex_shader; + + + +struct draw_context *draw_create( void ); + +void draw_destroy( struct draw_context *draw ); + +void draw_set_viewport_state( struct draw_context *draw, + const struct pipe_viewport_state *viewport ); + +void draw_set_clip_state( struct draw_context *pipe, + const struct pipe_clip_state *clip ); + +void draw_set_rasterizer_state( struct draw_context *draw, + const struct pipe_rasterizer_state *raster ); + +void draw_set_rasterize_stage( struct draw_context *draw, + struct draw_stage *stage ); + +void draw_wide_point_threshold(struct draw_context *draw, float threshold); + +void draw_wide_line_threshold(struct draw_context *draw, float threshold); + +void draw_enable_line_stipple(struct draw_context *draw, boolean enable); + +void draw_enable_point_sprites(struct draw_context *draw, boolean enable); + + +boolean +draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); + +boolean +draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe); + +boolean +draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe); + + +int +draw_find_vs_output(const struct draw_context *draw, + uint semantic_name, uint semantic_index); + +uint +draw_num_vs_outputs(const struct draw_context *draw); + + + +/* + * Vertex shader functions + */ + +struct draw_vertex_shader * +draw_create_vertex_shader(struct draw_context *draw, + const struct pipe_shader_state *shader); +void draw_bind_vertex_shader(struct draw_context *draw, + struct draw_vertex_shader *dvs); +void draw_delete_vertex_shader(struct draw_context *draw, + struct draw_vertex_shader *dvs); + + + +/* + * Vertex data functions + */ + +void draw_set_vertex_buffers(struct draw_context *draw, + unsigned count, + const struct pipe_vertex_buffer *buffers); + +void draw_set_vertex_elements(struct draw_context *draw, + unsigned count, + const struct pipe_vertex_element *elements); + +void +draw_set_mapped_element_buffer_range( struct draw_context *draw, + unsigned eltSize, + unsigned min_index, + unsigned max_index, + void *elements ); + +void draw_set_mapped_element_buffer( struct draw_context *draw, + unsigned eltSize, + void *elements ); + +void draw_set_mapped_vertex_buffer(struct draw_context *draw, + unsigned attr, const void *buffer); + +void draw_set_mapped_constant_buffer(struct draw_context *draw, + const void *buffer, + unsigned size ); + +void draw_set_edgeflags( struct draw_context *draw, + const unsigned *edgeflag ); + + +/*********************************************************************** + * draw_prim.c + */ + +void draw_arrays(struct draw_context *draw, unsigned prim, + unsigned start, unsigned count); + +void draw_flush(struct draw_context *draw); + + +/******************************************************************************* + * Driver backend interface + */ +struct vbuf_render; +void draw_set_render( struct draw_context *draw, + struct vbuf_render *render ); + +void draw_set_driver_clipping( struct draw_context *draw, + boolean bypass_clipping ); + +void draw_set_force_passthrough( struct draw_context *draw, + boolean enable ); + +/******************************************************************************* + * Draw pipeline + */ +boolean draw_need_pipeline(const struct draw_context *draw, + const struct pipe_rasterizer_state *rasterizer, + unsigned prim ); + + + +#endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c new file mode 100644 index 0000000000..3cde9d36d3 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -0,0 +1,287 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "draw/draw_private.h" +#include "draw/draw_pipe.h" + + + +boolean draw_pipeline_init( struct draw_context *draw ) +{ + /* create pipeline stages */ + draw->pipeline.wide_line = draw_wide_line_stage( draw ); + draw->pipeline.wide_point = draw_wide_point_stage( draw ); + draw->pipeline.stipple = draw_stipple_stage( draw ); + draw->pipeline.unfilled = draw_unfilled_stage( draw ); + draw->pipeline.twoside = draw_twoside_stage( draw ); + draw->pipeline.offset = draw_offset_stage( draw ); + draw->pipeline.clip = draw_clip_stage( draw ); + draw->pipeline.flatshade = draw_flatshade_stage( draw ); + draw->pipeline.cull = draw_cull_stage( draw ); + draw->pipeline.validate = draw_validate_stage( draw ); + draw->pipeline.first = draw->pipeline.validate; + + if (!draw->pipeline.wide_line || + !draw->pipeline.wide_point || + !draw->pipeline.stipple || + !draw->pipeline.unfilled || + !draw->pipeline.twoside || + !draw->pipeline.offset || + !draw->pipeline.clip || + !draw->pipeline.flatshade || + !draw->pipeline.cull || + !draw->pipeline.validate) + return FALSE; + + /* these defaults are oriented toward the needs of softpipe */ + draw->pipeline.wide_point_threshold = 1000000.0; /* infinity */ + draw->pipeline.wide_line_threshold = 1.0; + draw->pipeline.line_stipple = TRUE; + draw->pipeline.point_sprite = TRUE; + + return TRUE; +} + + +void draw_pipeline_destroy( struct draw_context *draw ) +{ + if (draw->pipeline.wide_line) + draw->pipeline.wide_line->destroy( draw->pipeline.wide_line ); + if (draw->pipeline.wide_point) + draw->pipeline.wide_point->destroy( draw->pipeline.wide_point ); + if (draw->pipeline.stipple) + draw->pipeline.stipple->destroy( draw->pipeline.stipple ); + if (draw->pipeline.unfilled) + draw->pipeline.unfilled->destroy( draw->pipeline.unfilled ); + if (draw->pipeline.twoside) + draw->pipeline.twoside->destroy( draw->pipeline.twoside ); + if (draw->pipeline.offset) + draw->pipeline.offset->destroy( draw->pipeline.offset ); + if (draw->pipeline.clip) + draw->pipeline.clip->destroy( draw->pipeline.clip ); + if (draw->pipeline.flatshade) + draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); + if (draw->pipeline.cull) + draw->pipeline.cull->destroy( draw->pipeline.cull ); + if (draw->pipeline.validate) + draw->pipeline.validate->destroy( draw->pipeline.validate ); + if (draw->pipeline.aaline) + draw->pipeline.aaline->destroy( draw->pipeline.aaline ); + if (draw->pipeline.aapoint) + draw->pipeline.aapoint->destroy( draw->pipeline.aapoint ); + if (draw->pipeline.pstipple) + draw->pipeline.pstipple->destroy( draw->pipeline.pstipple ); + if (draw->pipeline.rasterize) + draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); +} + + + + + + + +static void do_point( struct draw_context *draw, + const char *v0 ) +{ + struct prim_header prim; + + prim.flags = 0; + prim.pad = 0; + prim.v[0] = (struct vertex_header *)v0; + + draw->pipeline.first->point( draw->pipeline.first, &prim ); +} + + +static void do_line( struct draw_context *draw, + ushort flags, + const char *v0, + const char *v1 ) +{ + struct prim_header prim; + + prim.flags = flags; + prim.pad = 0; + prim.v[0] = (struct vertex_header *)v0; + prim.v[1] = (struct vertex_header *)v1; + + draw->pipeline.first->line( draw->pipeline.first, &prim ); +} + + +static void do_triangle( struct draw_context *draw, + ushort flags, + char *v0, + char *v1, + char *v2 ) +{ + struct prim_header prim; + + prim.v[0] = (struct vertex_header *)v0; + prim.v[1] = (struct vertex_header *)v1; + prim.v[2] = (struct vertex_header *)v2; + prim.flags = flags; + prim.pad = 0; + + draw->pipeline.first->tri( draw->pipeline.first, &prim ); +} + + + + +/* Code to run the pipeline on a fairly arbitary collection of vertices. + * + * Vertex headers must be pre-initialized with the + * UNDEFINED_VERTEX_ID, this code will cause that id to become + * overwritten, so it may have to be reset if there is the intention + * to reuse the vertices. + * + * This code provides a callback to reset the vertex id's which the + * draw_vbuf.c code uses when it has to perform a flush. + */ +void draw_pipeline_run( struct draw_context *draw, + unsigned prim, + struct vertex_header *vertices, + unsigned vertex_count, + unsigned stride, + const ushort *elts, + unsigned count ) +{ + char *verts = (char *)vertices; + unsigned i; + + draw->pipeline.verts = verts; + draw->pipeline.vertex_stride = stride; + draw->pipeline.vertex_count = vertex_count; + + switch (prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i++) + do_point( draw, + verts + stride * elts[i] ); + break; + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) + do_line( draw, + elts[i+0], /* flags */ + verts + stride * (elts[i+0] & ~DRAW_PIPE_FLAG_MASK), + verts + stride * elts[i+1]); + break; + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) + do_triangle( draw, + elts[i+0], /* flags */ + verts + stride * (elts[i+0] & ~DRAW_PIPE_FLAG_MASK), + verts + stride * elts[i+1], + verts + stride * elts[i+2]); + break; + } + + draw->pipeline.verts = NULL; + draw->pipeline.vertex_count = 0; +} + +#define QUAD(i0,i1,i2,i3) \ + do_triangle( draw, \ + ( DRAW_PIPE_RESET_STIPPLE | \ + DRAW_PIPE_EDGE_FLAG_0 | \ + DRAW_PIPE_EDGE_FLAG_2 ), \ + verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1), \ + verts + stride * (i3)); \ + do_triangle( draw, \ + ( DRAW_PIPE_EDGE_FLAG_0 | \ + DRAW_PIPE_EDGE_FLAG_1 ), \ + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i2), \ + verts + stride * (i3)) + +#define TRIANGLE(flags,i0,i1,i2) \ + do_triangle( draw, \ + flags, /* flags */ \ + verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1), \ + verts + stride * (i2)) + +#define LINE(flags,i0,i1) \ + do_line( draw, \ + flags, \ + verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1)) + +#define POINT(i0) \ + do_point( draw, \ + verts + stride * i0 ) + +#define FUNC pipe_run_linear +#define ARGS \ + struct draw_context *draw, \ + unsigned prim, \ + struct vertex_header *vertices, \ + unsigned stride + +#define LOCAL_VARS \ + char *verts = (char *)vertices; \ + boolean flatfirst = (draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first); \ + unsigned i; \ + ushort flags + +#define FLUSH + +#include "draw_pt_decompose.h" + +void draw_pipeline_run_linear( struct draw_context *draw, + unsigned prim, + struct vertex_header *vertices, + unsigned count, + unsigned stride ) +{ + char *verts = (char *)vertices; + draw->pipeline.verts = verts; + draw->pipeline.vertex_stride = stride; + draw->pipeline.vertex_count = count; + + pipe_run_linear(draw, prim, vertices, stride, count); + + draw->pipeline.verts = NULL; + draw->pipeline.vertex_count = 0; +} + + +void draw_pipeline_flush( struct draw_context *draw, + unsigned flags ) +{ + draw->pipeline.first->flush( draw->pipeline.first, flags ); + draw->pipeline.first = draw->pipeline.validate; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h new file mode 100644 index 0000000000..dbad8f98ac --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe.h @@ -0,0 +1,125 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef DRAW_PIPE_H +#define DRAW_PIPE_H + +#include "pipe/p_compiler.h" +#include "draw_private.h" /* for sizeof(vertex_header) */ + + +/** + * Basic info for a point/line/triangle primitive. + */ +struct prim_header { + float det; /**< front/back face determinant */ + ushort flags; + ushort pad; + struct vertex_header *v[3]; /**< 1 to 3 vertex pointers */ +}; + + + +/** + * Base class for all primitive drawing stages. + */ +struct draw_stage +{ + struct draw_context *draw; /**< parent context */ + + struct draw_stage *next; /**< next stage in pipeline */ + + struct vertex_header **tmp; /**< temp vert storage, such as for clipping */ + unsigned nr_tmps; + + void (*point)( struct draw_stage *, + struct prim_header * ); + + void (*line)( struct draw_stage *, + struct prim_header * ); + + void (*tri)( struct draw_stage *, + struct prim_header * ); + + void (*flush)( struct draw_stage *, + unsigned flags ); + + void (*reset_stipple_counter)( struct draw_stage * ); + + void (*destroy)( struct draw_stage * ); +}; + + +extern struct draw_stage *draw_unfilled_stage( struct draw_context *context ); +extern struct draw_stage *draw_twoside_stage( struct draw_context *context ); +extern struct draw_stage *draw_offset_stage( struct draw_context *context ); +extern struct draw_stage *draw_clip_stage( struct draw_context *context ); +extern struct draw_stage *draw_flatshade_stage( struct draw_context *context ); +extern struct draw_stage *draw_cull_stage( struct draw_context *context ); +extern struct draw_stage *draw_stipple_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_line_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_point_stage( struct draw_context *context ); +extern struct draw_stage *draw_validate_stage( struct draw_context *context ); + + +extern void draw_free_temp_verts( struct draw_stage *stage ); +extern boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ); + +extern void draw_reset_vertex_ids( struct draw_context *draw ); + +void draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header); +void draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header); +void draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header); + + + +/** + * Get a writeable copy of a vertex. + * \param stage drawing stage info + * \param vert the vertex to copy (source) + * \param idx index into stage's tmp[] array to put the copy (dest) + * \return pointer to the copied vertex + */ +static INLINE struct vertex_header * +dup_vert( struct draw_stage *stage, + const struct vertex_header *vert, + unsigned idx ) +{ + struct vertex_header *tmp = stage->tmp[idx]; + const uint vsize = sizeof(struct vertex_header) + + stage->draw->vs.num_vs_outputs * 4 * sizeof(float); + memcpy(tmp, vert, vsize); + tmp->vertex_id = UNDEFINED_VERTEX_ID; + return tmp; +} + +#endif diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c new file mode 100644 index 0000000000..20841bb5d6 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -0,0 +1,914 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * AA line stage: AA lines are converted to texture mapped triangles. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" +#include "draw_pipe.h" + + +/** + * Max texture level for the alpha texture used for antialiasing + */ +#define MAX_TEXTURE_LEVEL 5 /* 32 x 32 */ + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct aaline_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; + void *aaline_fs; + void *aapoint_fs; /* not yet */ + void *sprite_fs; /* not yet */ + uint sampler_unit; + int generic_attrib; /**< texcoord/generic used for texture */ +}; + + +/** + * Subclass of draw_stage + */ +struct aaline_stage +{ + struct draw_stage stage; + + float half_line_width; + + /** For AA lines, this is the vertex attrib slot for the new texcoords */ + uint tex_slot; + /** position, not necessarily output zero */ + uint pos_slot; + + void *sampler_cso; + struct pipe_texture *texture; + uint num_samplers; + uint num_textures; + + + /* + * Currently bound state + */ + struct aaline_fragment_shader *fs; + struct { + void *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + } state; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, + void **); + void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, + struct pipe_texture **); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct aa_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int colorOutput; /**< which output is the primary color */ + uint samplersUsed; /**< bitfield of samplers used */ + int freeSampler; /** an available sampler for the pstipple */ + int maxInput, maxGeneric; /**< max input index found */ + int colorTemp, texTemp; /**< temp registers */ + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for a free sampler, a free input attrib, and two free temp regs. + */ +static void +aa_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && + decl->Semantic.SemanticIndex == 0) { + aactx->colorOutput = decl->DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + uint i; + for (i = decl->DeclarationRange.First; + i <= decl->DeclarationRange.Last; i++) { + aactx->samplersUsed |= 1 << i; + } + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + if ((int) decl->DeclarationRange.Last > aactx->maxInput) + aactx->maxInput = decl->DeclarationRange.Last; + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.SemanticIndex; + } + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->DeclarationRange.First; + i <= decl->DeclarationRange.Last; i++) { + aactx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +/** + * Find the lowest zero bit in the given word, or -1 if bitfield is all ones. + */ +static int +free_bit(uint bitfield) +{ + int i; + for (i = 0; i < 32; i++) { + if ((bitfield & (1 << i)) == 0) + return i; + } + return -1; +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +aa_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (aactx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + uint i; + + /* find free sampler */ + aactx->freeSampler = free_bit(aactx->samplersUsed); + if (aactx->freeSampler >= PIPE_MAX_SAMPLERS) + aactx->freeSampler = PIPE_MAX_SAMPLERS - 1; + + /* find two free temp regs */ + for (i = 0; i < 32; i++) { + if ((aactx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (aactx->colorTemp < 0) + aactx->colorTemp = i; + else if (aactx->texTemp < 0) + aactx->texTemp = i; + else + break; + } + } + assert(aactx->colorTemp >= 0); + assert(aactx->texTemp >= 0); + + /* declare new generic input/texcoord */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + /* XXX this could be linear... */ + decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = aactx->maxInput + 1; + ctx->emit_declaration(ctx, &decl); + + /* declare new sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = aactx->freeSampler; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = aactx->texTemp; + ctx->emit_declaration(ctx, &decl); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = aactx->colorTemp; + ctx->emit_declaration(ctx, &decl); + + aactx->firstInstruction = FALSE; + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_END && + aactx->colorOutput != -1) { + struct tgsi_full_instruction newInst; + + /* TEX */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_TEX; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->freeSampler; + + ctx->emit_instruction(ctx, &newInst); + + /* MOV rgb */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + ctx->emit_instruction(ctx, &newInst); + + /* MUL alpha */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->texTemp; + ctx->emit_instruction(ctx, &newInst); + + /* END */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_END; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 0; + ctx->emit_instruction(ctx, &newInst); + } + else { + /* Not an END instruction. + * Look for writes to result.color and replace with colorTemp reg. + */ + uint i; + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + if (dst->DstRegister.File == TGSI_FILE_OUTPUT && + dst->DstRegister.Index == aactx->colorOutput) { + dst->DstRegister.File = TGSI_FILE_TEMPORARY; + dst->DstRegister.Index = aactx->colorTemp; + } + } + + ctx->emit_instruction(ctx, inst); + } +} + + +/** + * Generate the frag shader we'll use for drawing AA lines. + * This will be the user's shader plus some texture/modulate instructions. + */ +static boolean +generate_aaline_fs(struct aaline_stage *aaline) +{ + const struct pipe_shader_state *orig_fs = &aaline->fs->state; + struct pipe_shader_state aaline_fs; + struct aa_transform_context transform; + +#define MAX 1000 + + aaline_fs = *orig_fs; /* copy to init */ + aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + if (aaline_fs.tokens == NULL) + return FALSE; + + memset(&transform, 0, sizeof(transform)); + transform.colorOutput = -1; + transform.maxInput = -1; + transform.maxGeneric = -1; + transform.colorTemp = -1; + transform.texTemp = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = aa_transform_inst; + transform.base.transform_declaration = aa_transform_decl; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) aaline_fs.tokens, + MAX, &transform.base); + +#if 0 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(aaline_fs.tokens, 0); +#endif + + aaline->fs->sampler_unit = transform.freeSampler; + + aaline->fs->aaline_fs + = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); + if (aaline->fs->aaline_fs == NULL) + return FALSE; + + aaline->fs->generic_attrib = transform.maxGeneric + 1; + return TRUE; +} + + +/** + * Create the texture map we'll use for antialiasing the lines. + */ +static boolean +aaline_create_texture(struct aaline_stage *aaline) +{ + struct pipe_context *pipe = aaline->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_texture texTemp; + uint level; + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ + texTemp.last_level = MAX_TEXTURE_LEVEL; + texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; + texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; + texTemp.depth[0] = 1; + pf_get_block(texTemp.format, &texTemp.block); + + aaline->texture = screen->texture_create(screen, &texTemp); + if (!aaline->texture) + return FALSE; + + /* Fill in mipmap images. + * Basically each level is solid opaque, except for the outermost + * texels which are zero. Special case the 1x1 and 2x2 levels. + */ + for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { + struct pipe_surface *surface; + const uint size = aaline->texture->width[level]; + ubyte *data; + uint i, j; + + assert(aaline->texture->width[level] == aaline->texture->height[level]); + + /* This texture is new, no need to flush. + */ + surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + data = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_WRITE); + if (data == NULL) + return FALSE; + + for (i = 0; i < size; i++) { + for (j = 0; j < size; j++) { + ubyte d; + if (size == 1) { + d = 255; + } + else if (size == 2) { + d = 200; /* tuneable */ + } + else if (i == 0 || j == 0 || i == size - 1 || j == size - 1) { + d = 0; + } + else { + d = 255; + } + data[i * surface->stride + j] = d; + } + } + + /* unmap */ + screen->surface_unmap(screen, surface); + screen->tex_surface_release(screen, &surface); + } + return TRUE; +} + + +/** + * Create the sampler CSO that'll be used for antialiasing. + * By using a mipmapped texture, we don't have to generate a different + * texture image for each line size. + */ +static boolean +aaline_create_sampler(struct aaline_stage *aaline) +{ + struct pipe_sampler_state sampler; + struct pipe_context *pipe = aaline->pipe; + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.normalized_coords = 1; + sampler.min_lod = 0.0f; + sampler.max_lod = MAX_TEXTURE_LEVEL; + + aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler); + if (aaline->sampler_cso == NULL) + return FALSE; + + return TRUE; +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static boolean +bind_aaline_fragment_shader(struct aaline_stage *aaline) +{ + struct draw_context *draw = aaline->stage.draw; + + if (!aaline->fs->aaline_fs && + !generate_aaline_fs(aaline)) + return FALSE; + + draw->suspend_flushing = TRUE; + aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs); + draw->suspend_flushing = FALSE; + + return TRUE; +} + + + +static INLINE struct aaline_stage * +aaline_stage( struct draw_stage *stage ) +{ + return (struct aaline_stage *) stage; +} + + +/** + * Draw a wide line by drawing a quad, using geometry which will + * fullfill GL's antialiased line requirements. + */ +static void +aaline_line(struct draw_stage *stage, struct prim_header *header) +{ + const struct aaline_stage *aaline = aaline_stage(stage); + const float half_width = aaline->half_line_width; + struct prim_header tri; + struct vertex_header *v[8]; + uint texPos = aaline->tex_slot; + uint posPos = aaline->pos_slot; + float *pos, *tex; + float dx = header->v[1]->data[posPos][0] - header->v[0]->data[posPos][0]; + float dy = header->v[1]->data[posPos][1] - header->v[0]->data[posPos][1]; + double a = atan2(dy, dx); + float c_a = (float) cos(a), s_a = (float) sin(a); + uint i; + + /* XXX the ends of lines aren't quite perfect yet, but probably passable */ + dx = 0.5F * half_width; + dy = half_width; + + /* allocate/dup new verts */ + for (i = 0; i < 8; i++) { + v[i] = dup_vert(stage, header->v[i/4], i); + } + + /* + * Quad strip for line from v0 to v1 (*=endpoints): + * + * 1 3 5 7 + * +---+---------------------+---+ + * | | + * | *v0 v1* | + * | | + * +---+---------------------+---+ + * 0 2 4 6 + */ + + /* new verts */ + pos = v[0]->data[posPos]; + pos[0] += (-dx * c_a - dy * s_a); + pos[1] += (-dx * s_a + dy * c_a); + + pos = v[1]->data[posPos]; + pos[0] += (-dx * c_a - -dy * s_a); + pos[1] += (-dx * s_a + -dy * c_a); + + pos = v[2]->data[posPos]; + pos[0] += ( dx * c_a - dy * s_a); + pos[1] += ( dx * s_a + dy * c_a); + + pos = v[3]->data[posPos]; + pos[0] += ( dx * c_a - -dy * s_a); + pos[1] += ( dx * s_a + -dy * c_a); + + pos = v[4]->data[posPos]; + pos[0] += (-dx * c_a - dy * s_a); + pos[1] += (-dx * s_a + dy * c_a); + + pos = v[5]->data[posPos]; + pos[0] += (-dx * c_a - -dy * s_a); + pos[1] += (-dx * s_a + -dy * c_a); + + pos = v[6]->data[posPos]; + pos[0] += ( dx * c_a - dy * s_a); + pos[1] += ( dx * s_a + dy * c_a); + + pos = v[7]->data[posPos]; + pos[0] += ( dx * c_a - -dy * s_a); + pos[1] += ( dx * s_a + -dy * c_a); + + /* new texcoords */ + tex = v[0]->data[texPos]; + ASSIGN_4V(tex, 0, 0, 0, 1); + + tex = v[1]->data[texPos]; + ASSIGN_4V(tex, 0, 1, 0, 1); + + tex = v[2]->data[texPos]; + ASSIGN_4V(tex, .5, 0, 0, 1); + + tex = v[3]->data[texPos]; + ASSIGN_4V(tex, .5, 1, 0, 1); + + tex = v[4]->data[texPos]; + ASSIGN_4V(tex, .5, 0, 0, 1); + + tex = v[5]->data[texPos]; + ASSIGN_4V(tex, .5, 1, 0, 1); + + tex = v[6]->data[texPos]; + ASSIGN_4V(tex, 1, 0, 0, 1); + + tex = v[7]->data[texPos]; + ASSIGN_4V(tex, 1, 1, 0, 1); + + /* emit 6 tris for the quad strip */ + tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[4]; tri.v[1] = v[3]; tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[5]; tri.v[1] = v[3]; tri.v[2] = v[4]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[6]; tri.v[1] = v[5]; tri.v[2] = v[4]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[7]; tri.v[1] = v[5]; tri.v[2] = v[6]; + stage->next->tri( stage->next, &tri ); +} + + +static void +aaline_first_line(struct draw_stage *stage, struct prim_header *header) +{ + auto struct aaline_stage *aaline = aaline_stage(stage); + struct draw_context *draw = stage->draw; + struct pipe_context *pipe = aaline->pipe; + uint num_samplers; + + assert(draw->rasterizer->line_smooth); + + if (draw->rasterizer->line_width <= 3.0) + aaline->half_line_width = 1.5f; + else + aaline->half_line_width = 0.5f * draw->rasterizer->line_width; + + /* + * Bind (generate) our fragprog, sampler and texture + */ + if (!bind_aaline_fragment_shader(aaline)) { + stage->line = draw_pipe_passthrough_line; + stage->line(stage, header); + return; + } + + /* update vertex attrib info */ + aaline->tex_slot = draw->vs.num_vs_outputs; + aaline->pos_slot = draw->vs.position_output; + + /* advertise the extra post-transformed vertex attribute */ + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = aaline->fs->generic_attrib; + draw->extra_vp_outputs.slot = aaline->tex_slot; + + /* how many samplers? */ + /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ + num_samplers = MAX2(aaline->num_textures, aaline->num_samplers); + num_samplers = MAX2(num_samplers, aaline->fs->sampler_unit + 1); + + aaline->state.sampler[aaline->fs->sampler_unit] = aaline->sampler_cso; + pipe_texture_reference(&aaline->state.texture[aaline->fs->sampler_unit], + aaline->texture); + + draw->suspend_flushing = TRUE; + aaline->driver_bind_sampler_states(pipe, num_samplers, aaline->state.sampler); + aaline->driver_set_sampler_textures(pipe, num_samplers, aaline->state.texture); + draw->suspend_flushing = FALSE; + + /* now really draw first line */ + stage->line = aaline_line; + stage->line(stage, header); +} + + +static void +aaline_flush(struct draw_stage *stage, unsigned flags) +{ + struct draw_context *draw = stage->draw; + struct aaline_stage *aaline = aaline_stage(stage); + struct pipe_context *pipe = aaline->pipe; + + stage->line = aaline_first_line; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader, texture, sampler state */ + draw->suspend_flushing = TRUE; + aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); + aaline->driver_bind_sampler_states(pipe, aaline->num_samplers, + aaline->state.sampler); + aaline->driver_set_sampler_textures(pipe, aaline->num_textures, + aaline->state.texture); + draw->suspend_flushing = FALSE; + + draw->extra_vp_outputs.slot = 0; +} + + +static void +aaline_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +aaline_destroy(struct draw_stage *stage) +{ + struct aaline_stage *aaline = aaline_stage(stage); + uint i; + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + pipe_texture_reference(&aaline->state.texture[i], NULL); + } + + if (aaline->sampler_cso) + aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso); + + if (aaline->texture) + pipe_texture_release(&aaline->texture); + + draw_free_temp_verts( stage ); + + FREE( stage ); +} + + +static struct aaline_stage * +draw_aaline_stage(struct draw_context *draw) +{ + struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage); + if (aaline == NULL) + return NULL; + + if (!draw_alloc_temp_verts( &aaline->stage, 8 )) + goto fail; + + aaline->stage.draw = draw; + aaline->stage.next = NULL; + aaline->stage.point = draw_pipe_passthrough_point; + aaline->stage.line = aaline_first_line; + aaline->stage.tri = draw_pipe_passthrough_tri; + aaline->stage.flush = aaline_flush; + aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter; + aaline->stage.destroy = aaline_destroy; + + return aaline; + + fail: + if (aaline) + aaline_destroy(&aaline->stage); + + return NULL; +} + + +static struct aaline_stage * +aaline_stage_from_pipe(struct pipe_context *pipe) +{ + struct draw_context *draw = (struct draw_context *) pipe->draw; + return aaline_stage(draw->pipeline.aaline); +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +aaline_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader); + if (aafs == NULL) + return NULL; + + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs); + + return aafs; +} + + +static void +aaline_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; + + /* save current */ + aaline->fs = aafs; + /* pass-through */ + aaline->driver_bind_fs_state(aaline->pipe, + (aafs ? aafs->driver_fs : NULL)); +} + + +static void +aaline_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; + /* pass-through */ + aaline->driver_delete_fs_state(aaline->pipe, aafs->driver_fs); + FREE(aafs); +} + + +static void +aaline_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + + /* save current */ + memcpy(aaline->state.sampler, sampler, num * sizeof(void *)); + aaline->num_samplers = num; + + /* pass-through */ + aaline->driver_bind_sampler_states(aaline->pipe, num, sampler); +} + + +static void +aaline_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + uint i; + + /* save current */ + for (i = 0; i < num; i++) { + pipe_texture_reference(&aaline->state.texture[i], texture[i]); + } + for ( ; i < PIPE_MAX_SAMPLERS; i++) { + pipe_texture_reference(&aaline->state.texture[i], NULL); + } + aaline->num_textures = num; + + /* pass-through */ + aaline->driver_set_sampler_textures(aaline->pipe, num, texture); +} + + +/** + * Called by drivers that want to install this AA line prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA lines. + */ +boolean +draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) +{ + struct aaline_stage *aaline; + + pipe->draw = (void *) draw; + + /* + * Create / install AA line drawing / prim stage + */ + aaline = draw_aaline_stage( draw ); + if (!aaline) + goto fail; + + aaline->pipe = pipe; + + /* create special texture, sampler state */ + if (!aaline_create_texture(aaline)) + goto fail; + + if (!aaline_create_sampler(aaline)) + goto fail; + + /* save original driver functions */ + aaline->driver_create_fs_state = pipe->create_fs_state; + aaline->driver_bind_fs_state = pipe->bind_fs_state; + aaline->driver_delete_fs_state = pipe->delete_fs_state; + + aaline->driver_bind_sampler_states = pipe->bind_sampler_states; + aaline->driver_set_sampler_textures = pipe->set_sampler_textures; + + /* override the driver's functions */ + pipe->create_fs_state = aaline_create_fs_state; + pipe->bind_fs_state = aaline_bind_fs_state; + pipe->delete_fs_state = aaline_delete_fs_state; + + pipe->bind_sampler_states = aaline_bind_sampler_states; + pipe->set_sampler_textures = aaline_set_sampler_textures; + + /* Install once everything is known to be OK: + */ + draw->pipeline.aaline = &aaline->stage; + + return TRUE; + + fail: + if (aaline) + aaline->stage.destroy( &aaline->stage ); + + return FALSE; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c new file mode 100644 index 0000000000..2c1cacbdb4 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -0,0 +1,875 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * AA point stage: AA points are converted to quads and rendered with a + * special fragment shader. Another approach would be to use a texture + * map image of a point, but experiments indicate the quality isn't nearly + * as good as this approach. + * + * Note: this looks a lot like draw_aaline.c but there's actually little + * if any code that can be shared. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_dump.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "draw_context.h" +#include "draw_vs.h" +#include "draw_pipe.h" + + +/* + * Enabling NORMALIZE might give _slightly_ better results. + * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or + * d=x*x+y*y. Since we're working with a unit circle, the later seems + * close enough and saves some costly instructions. + */ +#define NORMALIZE 0 + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct aapoint_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; /**< the regular shader */ + void *aapoint_fs; /**< the aa point-augmented shader */ + int generic_attrib; /**< The generic input attrib/texcoord we'll use */ +}; + + +/** + * Subclass of draw_stage + */ +struct aapoint_stage +{ + struct draw_stage stage; + + int psize_slot; + float radius; + + /** this is the vertex attrib slot for the new texcoords */ + uint tex_slot; + uint pos_slot; + + /* + * Currently bound state + */ + struct aapoint_fragment_shader *fs; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct aa_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int colorOutput; /**< which output is the primary color */ + int maxInput, maxGeneric; /**< max input index found */ + int tmp0, colorTemp; /**< temp registers */ + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for two free temp regs and available input reg for new texcoords. + */ +static void +aa_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && + decl->Semantic.SemanticIndex == 0) { + aactx->colorOutput = decl->DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + if ((int) decl->DeclarationRange.Last > aactx->maxInput) + aactx->maxInput = decl->DeclarationRange.Last; + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.SemanticIndex; + } + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->DeclarationRange.First; + i <= decl->DeclarationRange.Last; i++) { + aactx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +aa_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + struct tgsi_full_instruction newInst; + + if (aactx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + const int texInput = aactx->maxInput + 1; + int tmp0; + uint i; + + /* find two free temp regs */ + for (i = 0; i < 32; i++) { + if ((aactx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (aactx->tmp0 < 0) + aactx->tmp0 = i; + else if (aactx->colorTemp < 0) + aactx->colorTemp = i; + else + break; + } + } + + assert(aactx->colorTemp != aactx->tmp0); + + tmp0 = aactx->tmp0; + + /* declare new generic input/texcoord */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + /* XXX this could be linear... */ + decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = texInput; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = tmp0; + ctx->emit_declaration(ctx, &decl); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = aactx->colorTemp; + ctx->emit_declaration(ctx, &decl); + + aactx->firstInstruction = FALSE; + + + /* + * Emit code to compute fragment coverage, kill if outside point radius + * + * Temp reg0 usage: + * t0.x = distance of fragment from center point + * t0.y = boolean, is t0.x > 1.0, also misc temp usage + * t0.z = temporary for computing 1/(1-k) value + * t0.w = final coverage value + */ + + /* MUL t0.xy, tex, tex; # compute x^2, y^2 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + ctx->emit_instruction(ctx, &newInst); + + /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_ADD; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + ctx->emit_instruction(ctx, &newInst); + +#if NORMALIZE /* OPTIONAL normalization of length */ + /* RSQ t0.x, t0.x; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RSQ; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + ctx->emit_instruction(ctx, &newInst); + + /* RCP t0.x, t0.x; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RCP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + ctx->emit_instruction(ctx, &newInst); +#endif + + /* SGT t0.y, t0.xxxx, t0.wwww; # bool b = d > 1 (NOTE t0.w == 1) */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SGT; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + ctx->emit_instruction(ctx, &newInst); + + /* KIL -tmp0.yyyy; # if -tmp0.y < 0, KILL */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_KIL; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + ctx->emit_instruction(ctx, &newInst); + + + /* compute coverage factor = (1-d)/(1-k) */ + + /* SUB t0.z, tex.w, tex.z; # m = 1 - k */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* RCP t0.z, t0.z; # t0.z = 1 / m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RCP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* SUB t0.y, 1, t0.x; # d = 1 - d */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + ctx->emit_instruction(ctx, &newInst); + + /* MUL t0.w, t0.y, t0.z; # coverage = d * m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SLE; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* CMP t0.w, -t0.y, tex.w, t0.w; + * # if -t0.y < 0 then + * t0.w = 1 + * else + * t0.w = t0.w + */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_CMP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 3; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + ctx->emit_instruction(ctx, &newInst); + + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_END) { + /* add alpha modulation code at tail of program */ + + /* MOV result.color.xyz, colorTemp; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + ctx->emit_instruction(ctx, &newInst); + + /* MUL result.color.w, colorTemp, tmp0.w; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0; + ctx->emit_instruction(ctx, &newInst); + } + else { + /* Not an END instruction. + * Look for writes to result.color and replace with colorTemp reg. + */ + uint i; + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + if (dst->DstRegister.File == TGSI_FILE_OUTPUT && + dst->DstRegister.Index == aactx->colorOutput) { + dst->DstRegister.File = TGSI_FILE_TEMPORARY; + dst->DstRegister.Index = aactx->colorTemp; + } + } + } + + ctx->emit_instruction(ctx, inst); +} + + +/** + * Generate the frag shader we'll use for drawing AA points. + * This will be the user's shader plus some texture/modulate instructions. + */ +static boolean +generate_aapoint_fs(struct aapoint_stage *aapoint) +{ + const struct pipe_shader_state *orig_fs = &aapoint->fs->state; + struct pipe_shader_state aapoint_fs; + struct aa_transform_context transform; + +#define MAX 1000 + + aapoint_fs = *orig_fs; /* copy to init */ + aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + if (aapoint_fs.tokens == NULL) + return FALSE; + + memset(&transform, 0, sizeof(transform)); + transform.colorOutput = -1; + transform.maxInput = -1; + transform.maxGeneric = -1; + transform.colorTemp = -1; + transform.tmp0 = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = aa_transform_inst; + transform.base.transform_declaration = aa_transform_decl; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) aapoint_fs.tokens, + MAX, &transform.base); + +#if 0 /* DEBUG */ + printf("draw_aapoint, orig shader:\n"); + tgsi_dump(orig_fs->tokens, 0); + printf("draw_aapoint, new shader:\n"); + tgsi_dump(aapoint_fs.tokens, 0); +#endif + + aapoint->fs->aapoint_fs + = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs); + if (aapoint->fs->aapoint_fs == NULL) + return FALSE; + + aapoint->fs->generic_attrib = transform.maxGeneric + 1; + + return TRUE; +} + + +/** + * When we're about to draw our first AA point in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static boolean +bind_aapoint_fragment_shader(struct aapoint_stage *aapoint) +{ + struct draw_context *draw = aapoint->stage.draw; + + if (!aapoint->fs->aapoint_fs && + !generate_aapoint_fs(aapoint)) + return FALSE; + + draw->suspend_flushing = TRUE; + aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs); + draw->suspend_flushing = FALSE; + + return TRUE; +} + + + +static INLINE struct aapoint_stage * +aapoint_stage( struct draw_stage *stage ) +{ + return (struct aapoint_stage *) stage; +} + + + + +/** + * Draw an AA point by drawing a quad. + */ +static void +aapoint_point(struct draw_stage *stage, struct prim_header *header) +{ + const struct aapoint_stage *aapoint = aapoint_stage(stage); + struct prim_header tri; + struct vertex_header *v[4]; + uint texPos = aapoint->tex_slot; + uint pos_slot = aapoint->pos_slot; + float radius, *pos, *tex; + uint i; + float k; + + if (aapoint->psize_slot >= 0) { + radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0]; + } + else { + radius = aapoint->radius; + } + + /* + * Note: the texcoords (generic attrib, really) we use are special: + * The S and T components simply vary from -1 to +1. + * The R component is k, below. + * The Q component is 1.0 and will used as a handy constant in the + * fragment shader. + */ + + /* + * k is the threshold distance from the point's center at which + * we begin alpha attenuation (the coverage value). + * Operating within a unit circle, we'll compute the fragment's + * distance 'd' from the center point using the texcoords. + * IF d > 1.0 THEN + * KILL fragment + * ELSE IF d > k THEN + * compute coverage in [0,1] proportional to d in [k, 1]. + * ELSE + * coverage = 1.0; // full coverage + * ENDIF + * + * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to + * avoid using IF/ELSE/ENDIF TGSI opcodes. + */ + +#if !NORMALIZE + k = 1.0f / radius; + k = 1.0f - 2.0f * k + k * k; +#else + k = 1.0f - 1.0f / radius; +#endif + + /* allocate/dup new verts */ + for (i = 0; i < 4; i++) { + v[i] = dup_vert(stage, header->v[0], i); + } + + /* new verts */ + pos = v[0]->data[pos_slot]; + pos[0] -= radius; + pos[1] -= radius; + + pos = v[1]->data[pos_slot]; + pos[0] += radius; + pos[1] -= radius; + + pos = v[2]->data[pos_slot]; + pos[0] += radius; + pos[1] += radius; + + pos = v[3]->data[pos_slot]; + pos[0] -= radius; + pos[1] += radius; + + /* new texcoords */ + tex = v[0]->data[texPos]; + ASSIGN_4V(tex, -1, -1, k, 1); + + tex = v[1]->data[texPos]; + ASSIGN_4V(tex, 1, -1, k, 1); + + tex = v[2]->data[texPos]; + ASSIGN_4V(tex, 1, 1, k, 1); + + tex = v[3]->data[texPos]; + ASSIGN_4V(tex, -1, 1, k, 1); + + /* emit 2 tris for the quad strip */ + tri.v[0] = v[0]; + tri.v[1] = v[1]; + tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[0]; + tri.v[1] = v[2]; + tri.v[2] = v[3]; + stage->next->tri( stage->next, &tri ); +} + + +static void +aapoint_first_point(struct draw_stage *stage, struct prim_header *header) +{ + auto struct aapoint_stage *aapoint = aapoint_stage(stage); + struct draw_context *draw = stage->draw; + + assert(draw->rasterizer->point_smooth); + + if (draw->rasterizer->point_size <= 2.0) + aapoint->radius = 1.0; + else + aapoint->radius = 0.5f * draw->rasterizer->point_size; + + /* + * Bind (generate) our fragprog. + */ + bind_aapoint_fragment_shader(aapoint); + + /* update vertex attrib info */ + aapoint->tex_slot = draw->vs.num_vs_outputs; + assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ + + aapoint->pos_slot = draw->vs.position_output; + + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib; + draw->extra_vp_outputs.slot = aapoint->tex_slot; + + /* find psize slot in post-transform vertex */ + aapoint->psize_slot = -1; + if (draw->rasterizer->point_size_per_vertex) { + /* find PSIZ vertex output */ + const struct draw_vertex_shader *vs = draw->vs.vertex_shader; + uint i; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + aapoint->psize_slot = i; + break; + } + } + } + + /* now really draw first point */ + stage->point = aapoint_point; + stage->point(stage, header); +} + + +static void +aapoint_flush(struct draw_stage *stage, unsigned flags) +{ + struct draw_context *draw = stage->draw; + struct aapoint_stage *aapoint = aapoint_stage(stage); + struct pipe_context *pipe = aapoint->pipe; + + stage->point = aapoint_first_point; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + draw->suspend_flushing = TRUE; + aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs); + draw->suspend_flushing = FALSE; + + draw->extra_vp_outputs.slot = 0; +} + + +static void +aapoint_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +aapoint_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct aapoint_stage * +draw_aapoint_stage(struct draw_context *draw) +{ + struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage); + if (aapoint == NULL) + goto fail; + + if (!draw_alloc_temp_verts( &aapoint->stage, 4 )) + goto fail; + + aapoint->stage.draw = draw; + aapoint->stage.next = NULL; + aapoint->stage.point = aapoint_first_point; + aapoint->stage.line = draw_pipe_passthrough_line; + aapoint->stage.tri = draw_pipe_passthrough_tri; + aapoint->stage.flush = aapoint_flush; + aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; + aapoint->stage.destroy = aapoint_destroy; + + return aapoint; + + fail: + if (aapoint) + aapoint_destroy(&aapoint->stage); + + return NULL; + +} + + +static struct aapoint_stage * +aapoint_stage_from_pipe(struct pipe_context *pipe) +{ + struct draw_context *draw = (struct draw_context *) pipe->draw; + return aapoint_stage(draw->pipeline.aapoint); +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +aapoint_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader); + if (aafs == NULL) + return NULL; + + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs); + + return aafs; +} + + +static void +aapoint_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; + /* save current */ + aapoint->fs = aafs; + /* pass-through */ + aapoint->driver_bind_fs_state(aapoint->pipe, + (aafs ? aafs->driver_fs : NULL)); +} + + +static void +aapoint_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; + /* pass-through */ + aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs); + FREE(aafs); +} + + +/** + * Called by drivers that want to install this AA point prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA points. + */ +boolean +draw_install_aapoint_stage(struct draw_context *draw, + struct pipe_context *pipe) +{ + struct aapoint_stage *aapoint; + + pipe->draw = (void *) draw; + + /* + * Create / install AA point drawing / prim stage + */ + aapoint = draw_aapoint_stage( draw ); + if (aapoint == NULL) + goto fail; + + aapoint->pipe = pipe; + + /* save original driver functions */ + aapoint->driver_create_fs_state = pipe->create_fs_state; + aapoint->driver_bind_fs_state = pipe->bind_fs_state; + aapoint->driver_delete_fs_state = pipe->delete_fs_state; + + /* override the driver's functions */ + pipe->create_fs_state = aapoint_create_fs_state; + pipe->bind_fs_state = aapoint_bind_fs_state; + pipe->delete_fs_state = aapoint_delete_fs_state; + + draw->pipeline.aapoint = &aapoint->stage; + + return TRUE; + + fail: + if (aapoint) + aapoint->stage.destroy( &aapoint->stage ); + + return FALSE; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c new file mode 100644 index 0000000000..3265dcd154 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -0,0 +1,515 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Clipping stage + * + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "pipe/p_shader_tokens.h" + +#include "draw_vs.h" +#include "draw_pipe.h" + + +#ifndef IS_NEGATIVE +#define IS_NEGATIVE(X) ((X) < 0.0) +#endif + +#ifndef DIFFERENT_SIGNS +#define DIFFERENT_SIGNS(x, y) ((x) * (y) <= 0.0F && (x) - (y) != 0.0F) +#endif + +#ifndef MAX_CLIPPED_VERTICES +#define MAX_CLIPPED_VERTICES ((2 * (6 + PIPE_MAX_CLIP_PLANES))+1) +#endif + + + +struct clipper { + struct draw_stage stage; /**< base class */ + + /* Basically duplicate some of the flatshading logic here: + */ + boolean flat; + uint num_color_attribs; + uint color_attribs[4]; /* front/back primary/secondary colors */ + + float (*plane)[4]; +}; + + +/* This is a bit confusing: + */ +static INLINE struct clipper *clipper_stage( struct draw_stage *stage ) +{ + return (struct clipper *)stage; +} + + +#define LINTERP(T, OUT, IN) ((OUT) + (T) * ((IN) - (OUT))) + + +/* All attributes are float[4], so this is easy: + */ +static void interp_attr( float *fdst, + float t, + const float *fin, + const float *fout ) +{ + fdst[0] = LINTERP( t, fout[0], fin[0] ); + fdst[1] = LINTERP( t, fout[1], fin[1] ); + fdst[2] = LINTERP( t, fout[2], fin[2] ); + fdst[3] = LINTERP( t, fout[3], fin[3] ); +} + +static void copy_colors( struct draw_stage *stage, + struct vertex_header *dst, + const struct vertex_header *src ) +{ + const struct clipper *clipper = clipper_stage(stage); + uint i; + for (i = 0; i < clipper->num_color_attribs; i++) { + const uint attr = clipper->color_attribs[i]; + COPY_4FV(dst->data[attr], src->data[attr]); + } +} + + + +/* Interpolate between two vertices to produce a third. + */ +static void interp( const struct clipper *clip, + struct vertex_header *dst, + float t, + const struct vertex_header *out, + const struct vertex_header *in ) +{ + const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs; + const unsigned pos_attr = clip->stage.draw->vs.position_output; + unsigned j; + + /* Vertex header. + */ + { + dst->clipmask = 0; + dst->edgeflag = 0; /* will get overwritten later */ + dst->pad = 0; + dst->vertex_id = UNDEFINED_VERTEX_ID; + } + + /* Clip coordinates: interpolate normally + */ + { + interp_attr(dst->clip, t, in->clip, out->clip); + } + + /* Do the projective divide and insert window coordinates: + */ + { + const float *pos = dst->clip; + const float *scale = clip->stage.draw->viewport.scale; + const float *trans = clip->stage.draw->viewport.translate; + const float oow = 1.0f / pos[3]; + + dst->data[pos_attr][0] = pos[0] * oow * scale[0] + trans[0]; + dst->data[pos_attr][1] = pos[1] * oow * scale[1] + trans[1]; + dst->data[pos_attr][2] = pos[2] * oow * scale[2] + trans[2]; + dst->data[pos_attr][3] = oow; + } + + /* Other attributes + */ + for (j = 0; j < nr_attrs; j++) { + if (j != pos_attr) + interp_attr(dst->data[j], t, in->data[j], out->data[j]); + } +} + + +static void emit_poly( struct draw_stage *stage, + struct vertex_header **inlist, + unsigned n, + const struct prim_header *origPrim) +{ + struct prim_header header; + unsigned i; + + const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; + const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; + + /* later stages may need the determinant, but only the sign matters */ + header.det = origPrim->det; + header.flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + header.pad = 0; + + for (i = 2; i < n; i++, header.flags = edge_middle) { + header.v[0] = inlist[i-1]; + header.v[1] = inlist[i]; + header.v[2] = inlist[0]; /* keep in v[2] for flatshading */ + + if (i == n-1) + header.flags |= edge_last; + + if (0) { + const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; + uint j, k; + debug_printf("Clipped tri:\n"); + for (j = 0; j < 3; j++) { + for (k = 0; k < vs->info.num_outputs; k++) { + debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k, + header.v[j]->data[k][0], + header.v[j]->data[k][1], + header.v[j]->data[k][2], + header.v[j]->data[k][3]); + } + } + } + + stage->next->tri( stage->next, &header ); + } +} + +static INLINE float +dot4(const float *a, const float *b) +{ + return (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); +} + + +/* Clip a triangle against the viewport and user clip planes. + */ +static void +do_clip_tri( struct draw_stage *stage, + struct prim_header *header, + unsigned clipmask ) +{ + struct clipper *clipper = clipper_stage( stage ); + struct vertex_header *a[MAX_CLIPPED_VERTICES]; + struct vertex_header *b[MAX_CLIPPED_VERTICES]; + struct vertex_header **inlist = a; + struct vertex_header **outlist = b; + unsigned tmpnr = 0; + unsigned n = 3; + unsigned i; + + inlist[0] = header->v[0]; + inlist[1] = header->v[1]; + inlist[2] = header->v[2]; + + while (clipmask && n >= 3) { + const unsigned plane_idx = ffs(clipmask)-1; + const float *plane = clipper->plane[plane_idx]; + struct vertex_header *vert_prev = inlist[0]; + float dp_prev = dot4( vert_prev->clip, plane ); + unsigned outcount = 0; + + clipmask &= ~(1<<plane_idx); + + inlist[n] = inlist[0]; /* prevent rotation of vertices */ + + for (i = 1; i <= n; i++) { + struct vertex_header *vert = inlist[i]; + + float dp = dot4( vert->clip, plane ); + + if (!IS_NEGATIVE(dp_prev)) { + outlist[outcount++] = vert_prev; + } + + if (DIFFERENT_SIGNS(dp, dp_prev)) { + struct vertex_header *new_vert = clipper->stage.tmp[tmpnr++]; + outlist[outcount++] = new_vert; + + if (IS_NEGATIVE(dp)) { + /* Going out of bounds. Avoid division by zero as we + * know dp != dp_prev from DIFFERENT_SIGNS, above. + */ + float t = dp / (dp - dp_prev); + interp( clipper, new_vert, t, vert, vert_prev ); + + /* Force edgeflag true in this case: + */ + new_vert->edgeflag = 1; + } else { + /* Coming back in. + */ + float t = dp_prev / (dp_prev - dp); + interp( clipper, new_vert, t, vert_prev, vert ); + + /* Copy starting vert's edgeflag: + */ + new_vert->edgeflag = vert_prev->edgeflag; + } + } + + vert_prev = vert; + dp_prev = dp; + } + + { + struct vertex_header **tmp = inlist; + inlist = outlist; + outlist = tmp; + n = outcount; + } + } + + /* If flat-shading, copy color to new provoking vertex. + */ + if (clipper->flat && inlist[0] != header->v[2]) { + if (1) { + inlist[0] = dup_vert(stage, inlist[0], tmpnr++); + } + + copy_colors(stage, inlist[0], header->v[2]); + } + + + + /* Emit the polygon as triangles to the setup stage: + */ + if (n >= 3) + emit_poly( stage, inlist, n, header ); +} + + +/* Clip a line against the viewport and user clip planes. + */ +static void +do_clip_line( struct draw_stage *stage, + struct prim_header *header, + unsigned clipmask ) +{ + const struct clipper *clipper = clipper_stage( stage ); + struct vertex_header *v0 = header->v[0]; + struct vertex_header *v1 = header->v[1]; + const float *pos0 = v0->clip; + const float *pos1 = v1->clip; + float t0 = 0.0F; + float t1 = 0.0F; + struct prim_header newprim; + + while (clipmask) { + const unsigned plane_idx = ffs(clipmask)-1; + const float *plane = clipper->plane[plane_idx]; + const float dp0 = dot4( pos0, plane ); + const float dp1 = dot4( pos1, plane ); + + if (dp1 < 0.0F) { + float t = dp1 / (dp1 - dp0); + t1 = MAX2(t1, t); + } + + if (dp0 < 0.0F) { + float t = dp0 / (dp0 - dp1); + t0 = MAX2(t0, t); + } + + if (t0 + t1 >= 1.0F) + return; /* discard */ + + clipmask &= ~(1 << plane_idx); /* turn off this plane's bit */ + } + + if (v0->clipmask) { + interp( clipper, stage->tmp[0], t0, v0, v1 ); + + if (clipper->flat) + copy_colors(stage, stage->tmp[0], v0); + + newprim.v[0] = stage->tmp[0]; + } + else { + newprim.v[0] = v0; + } + + if (v1->clipmask) { + interp( clipper, stage->tmp[1], t1, v1, v0 ); + newprim.v[1] = stage->tmp[1]; + } + else { + newprim.v[1] = v1; + } + + stage->next->line( stage->next, &newprim ); +} + + +static void +clip_point( struct draw_stage *stage, + struct prim_header *header ) +{ + if (header->v[0]->clipmask == 0) + stage->next->point( stage->next, header ); +} + + +static void +clip_line( struct draw_stage *stage, + struct prim_header *header ) +{ + unsigned clipmask = (header->v[0]->clipmask | + header->v[1]->clipmask); + + if (clipmask == 0) { + /* no clipping needed */ + stage->next->line( stage->next, header ); + } + else if ((header->v[0]->clipmask & + header->v[1]->clipmask) == 0) { + do_clip_line(stage, header, clipmask); + } + /* else, totally clipped */ +} + + +static void +clip_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + unsigned clipmask = (header->v[0]->clipmask | + header->v[1]->clipmask | + header->v[2]->clipmask); + + if (clipmask == 0) { + /* no clipping needed */ + stage->next->tri( stage->next, header ); + } + else if ((header->v[0]->clipmask & + header->v[1]->clipmask & + header->v[2]->clipmask) == 0) { + do_clip_tri(stage, header, clipmask); + } +} + +/* Update state. Could further delay this until we hit the first + * primitive that really requires clipping. + */ +static void +clip_init_state( struct draw_stage *stage ) +{ + struct clipper *clipper = clipper_stage( stage ); + + clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; + + if (clipper->flat) { + const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; + uint i; + + clipper->num_color_attribs = 0; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR || + vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + clipper->color_attribs[clipper->num_color_attribs++] = i; + } + } + } + + stage->tri = clip_tri; + stage->line = clip_line; +} + + + +static void clip_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + clip_init_state( stage ); + stage->tri( stage, header ); +} + +static void clip_first_line( struct draw_stage *stage, + struct prim_header *header ) +{ + clip_init_state( stage ); + stage->line( stage, header ); +} + + +static void clip_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->tri = clip_first_tri; + stage->line = clip_first_line; + stage->next->flush( stage->next, flags ); +} + + +static void clip_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void clip_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Allocate a new clipper stage. + * \return pointer to new stage object + */ +struct draw_stage *draw_clip_stage( struct draw_context *draw ) +{ + struct clipper *clipper = CALLOC_STRUCT(clipper); + if (clipper == NULL) + goto fail; + + if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 )) + goto fail; + + clipper->stage.draw = draw; + clipper->stage.point = clip_point; + clipper->stage.line = clip_first_line; + clipper->stage.tri = clip_first_tri; + clipper->stage.flush = clip_flush; + clipper->stage.reset_stipple_counter = clip_reset_stipple_counter; + clipper->stage.destroy = clip_destroy; + + clipper->plane = draw->plane; + + return &clipper->stage; + + fail: + if (clipper) + clipper->stage.destroy( &clipper->stage ); + + return NULL; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c new file mode 100644 index 0000000000..053be5f050 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -0,0 +1,147 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Drawing stage for polygon culling + */ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "util/u_memory.h" +#include "pipe/p_defines.h" +#include "draw_pipe.h" + + +struct cull_stage { + struct draw_stage stage; + unsigned winding; /**< which winding(s) to cull (one of PIPE_WINDING_x) */ +}; + + +static INLINE struct cull_stage *cull_stage( struct draw_stage *stage ) +{ + return (struct cull_stage *)stage; +} + + + + +static void cull_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + const unsigned pos = stage->draw->vs.position_output; + + /* Window coords: */ + const float *v0 = header->v[0]->data[pos]; + const float *v1 = header->v[1]->data[pos]; + const float *v2 = header->v[2]->data[pos]; + + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0] - v2[0]; + const float ey = v0[1] - v2[1]; + const float fx = v1[0] - v2[0]; + const float fy = v1[1] - v2[1]; + + /* det = cross(e,f).z */ + header->det = ex * fy - ey * fx; + + if (header->det != 0) { + /* if (det < 0 then Z points toward camera and triangle is + * counter-clockwise winding. + */ + unsigned winding = (header->det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; + + if ((winding & cull_stage(stage)->winding) == 0) { + /* triangle is not culled, pass to next stage */ + stage->next->tri( stage->next, header ); + } + } +} + +static void cull_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct cull_stage *cull = cull_stage(stage); + + cull->winding = stage->draw->rasterizer->cull_mode; + + stage->tri = cull_tri; + stage->tri( stage, header ); +} + + + +static void cull_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->tri = cull_first_tri; + stage->next->flush( stage->next, flags ); +} + +static void cull_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void cull_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create a new polygon culling stage. + */ +struct draw_stage *draw_cull_stage( struct draw_context *draw ) +{ + struct cull_stage *cull = CALLOC_STRUCT(cull_stage); + if (cull == NULL) + goto fail; + + if (!draw_alloc_temp_verts( &cull->stage, 0 )) + goto fail; + + cull->stage.draw = draw; + cull->stage.next = NULL; + cull->stage.point = draw_pipe_passthrough_point; + cull->stage.line = draw_pipe_passthrough_line; + cull->stage.tri = cull_first_tri; + cull->stage.flush = cull_flush; + cull->stage.reset_stipple_counter = cull_reset_stipple_counter; + cull->stage.destroy = cull_destroy; + + return &cull->stage; + + fail: + if (cull) + cull->stage.destroy( &cull->stage ); + + return NULL; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c new file mode 100644 index 0000000000..43d1fecc4d --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -0,0 +1,281 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "pipe/p_shader_tokens.h" +#include "draw_vs.h" +#include "draw_pipe.h" + + +/** subclass of draw_stage */ +struct flat_stage +{ + struct draw_stage stage; + + uint num_color_attribs; + uint color_attribs[2]; /* front/back primary colors */ + + uint num_spec_attribs; + uint spec_attribs[2]; /* front/back secondary colors */ +}; + +#define COPY_3FV( DST, SRC ) \ +do { \ + (DST)[0] = (SRC)[0]; \ + (DST)[1] = (SRC)[1]; \ + (DST)[2] = (SRC)[2]; \ +} while (0) + + +static INLINE struct flat_stage * +flat_stage(struct draw_stage *stage) +{ + return (struct flat_stage *) stage; +} + + +/** Copy all the color attributes from 'src' vertex to 'dst' vertex */ +static INLINE void copy_colors( struct draw_stage *stage, + struct vertex_header *dst, + const struct vertex_header *src ) +{ + const struct flat_stage *flat = flat_stage(stage); + uint i; + + for (i = 0; i < flat->num_color_attribs; i++) { + const uint attr = flat->color_attribs[i]; + COPY_4FV(dst->data[attr], src->data[attr]); + } + + for (i = 0; i < flat->num_spec_attribs; i++) { + const uint attr = flat->spec_attribs[i]; + COPY_3FV(dst->data[attr], src->data[attr]); + } +} + + +/** Copy all the color attributes from src vertex to dst0 & dst1 vertices */ +static INLINE void copy_colors2( struct draw_stage *stage, + struct vertex_header *dst0, + struct vertex_header *dst1, + const struct vertex_header *src ) +{ + const struct flat_stage *flat = flat_stage(stage); + uint i; + for (i = 0; i < flat->num_color_attribs; i++) { + const uint attr = flat->color_attribs[i]; + COPY_4FV(dst0->data[attr], src->data[attr]); + COPY_4FV(dst1->data[attr], src->data[attr]); + } + + for (i = 0; i < flat->num_spec_attribs; i++) { + const uint attr = flat->spec_attribs[i]; + COPY_3FV(dst0->data[attr], src->data[attr]); + COPY_3FV(dst1->data[attr], src->data[attr]); + } +} + + +/** + * Flatshade tri. Required for clipping and when unfilled tris are + * active, otherwise handled by hardware. + */ +static void flatshade_tri_0( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.det = header->det; + tmp.flags = header->flags; + tmp.pad = header->pad; + tmp.v[0] = header->v[0]; + tmp.v[1] = dup_vert(stage, header->v[1], 0); + tmp.v[2] = dup_vert(stage, header->v[2], 1); + + copy_colors2(stage, tmp.v[1], tmp.v[2], tmp.v[0]); + + stage->next->tri( stage->next, &tmp ); +} + + +static void flatshade_tri_2( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.det = header->det; + tmp.flags = header->flags; + tmp.pad = header->pad; + tmp.v[0] = dup_vert(stage, header->v[0], 0); + tmp.v[1] = dup_vert(stage, header->v[1], 1); + tmp.v[2] = header->v[2]; + + copy_colors2(stage, tmp.v[0], tmp.v[1], tmp.v[2]); + + stage->next->tri( stage->next, &tmp ); +} + + + + + +/** + * Flatshade line. Required for clipping. + */ +static void flatshade_line_0( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.v[0] = header->v[0]; + tmp.v[1] = dup_vert(stage, header->v[1], 0); + + copy_colors(stage, tmp.v[1], tmp.v[0]); + + stage->next->line( stage->next, &tmp ); +} + +static void flatshade_line_1( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.v[0] = dup_vert(stage, header->v[0], 0); + tmp.v[1] = header->v[1]; + + copy_colors(stage, tmp.v[0], tmp.v[1]); + + stage->next->line( stage->next, &tmp ); +} + + + + +static void flatshade_init_state( struct draw_stage *stage ) +{ + struct flat_stage *flat = flat_stage(stage); + const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; + uint i; + + /* Find which vertex shader outputs are colors, make a list */ + flat->num_color_attribs = 0; + flat->num_spec_attribs = 0; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR || + vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + if (vs->info.output_semantic_index[i] == 0) + flat->color_attribs[flat->num_color_attribs++] = i; + else + flat->spec_attribs[flat->num_spec_attribs++] = i; + } + } + + /* Choose flatshade routine according to provoking vertex: + */ + if (stage->draw->rasterizer->flatshade_first) { + stage->line = flatshade_line_0; + stage->tri = flatshade_tri_0; + } + else { + stage->line = flatshade_line_1; + stage->tri = flatshade_tri_2; + } +} + +static void flatshade_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + flatshade_init_state( stage ); + stage->tri( stage, header ); +} + +static void flatshade_first_line( struct draw_stage *stage, + struct prim_header *header ) +{ + flatshade_init_state( stage ); + stage->line( stage, header ); +} + + +static void flatshade_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->tri = flatshade_first_tri; + stage->line = flatshade_first_line; + stage->next->flush( stage->next, flags ); +} + + +static void flatshade_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void flatshade_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create flatshading drawing stage. + */ +struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) +{ + struct flat_stage *flatshade = CALLOC_STRUCT(flat_stage); + if (flatshade == NULL) + goto fail; + + if (!draw_alloc_temp_verts( &flatshade->stage, 2 )) + goto fail; + + flatshade->stage.draw = draw; + flatshade->stage.next = NULL; + flatshade->stage.point = draw_pipe_passthrough_point; + flatshade->stage.line = flatshade_first_line; + flatshade->stage.tri = flatshade_first_tri; + flatshade->stage.flush = flatshade_flush; + flatshade->stage.reset_stipple_counter = flatshade_reset_stipple_counter; + flatshade->stage.destroy = flatshade_destroy; + + return &flatshade->stage; + + fail: + if (flatshade) + flatshade->stage.destroy( &flatshade->stage ); + + return NULL; +} + + diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c new file mode 100644 index 0000000000..1fea5e6dcb --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -0,0 +1,185 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief polygon offset state + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * \author Brian Paul + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "draw_pipe.h" + + + +struct offset_stage { + struct draw_stage stage; + + float scale; + float units; +}; + + + +static INLINE struct offset_stage *offset_stage( struct draw_stage *stage ) +{ + return (struct offset_stage *) stage; +} + + + + + +/** + * Offset tri Z. Some hardware can handle this, but not usually when + * doing unfilled rendering. + */ +static void do_offset_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + const unsigned pos = stage->draw->vs.position_output; + struct offset_stage *offset = offset_stage(stage); + float inv_det = 1.0f / header->det; + + /* Window coords: + */ + float *v0 = header->v[0]->data[pos]; + float *v1 = header->v[1]->data[pos]; + float *v2 = header->v[2]->data[pos]; + + /* edge vectors e = v0 - v2, f = v1 - v2 */ + float ex = v0[0] - v2[0]; + float ey = v0[1] - v2[1]; + float ez = v0[2] - v2[2]; + float fx = v1[0] - v2[0]; + float fy = v1[1] - v2[1]; + float fz = v1[2] - v2[2]; + + /* (a,b) = cross(e,f).xy */ + float a = ey*fz - ez*fy; + float b = ez*fx - ex*fz; + + float dzdx = fabsf(a * inv_det); + float dzdy = fabsf(b * inv_det); + + float zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale; + + /* + * Note: we're applying the offset and clamping per-vertex. + * Ideally, the offset is applied per-fragment prior to fragment shading. + */ + v0[2] = CLAMP(v0[2] + zoffset, 0.0f, 1.0f); + v1[2] = CLAMP(v1[2] + zoffset, 0.0f, 1.0f); + v2[2] = CLAMP(v2[2] + zoffset, 0.0f, 1.0f); + + stage->next->tri( stage->next, header ); +} + + +static void offset_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.det = header->det; + tmp.flags = header->flags; + tmp.pad = header->pad; + tmp.v[0] = dup_vert(stage, header->v[0], 0); + tmp.v[1] = dup_vert(stage, header->v[1], 1); + tmp.v[2] = dup_vert(stage, header->v[2], 2); + + do_offset_tri( stage, &tmp ); +} + + +static void offset_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct offset_stage *offset = offset_stage(stage); + float mrd = 1.0f / 65535.0f; /* XXX this depends on depthbuffer bits! */ + + offset->units = stage->draw->rasterizer->offset_units * mrd; + offset->scale = stage->draw->rasterizer->offset_scale; + + stage->tri = offset_tri; + stage->tri( stage, header ); +} + + + + +static void offset_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->tri = offset_first_tri; + stage->next->flush( stage->next, flags ); +} + + +static void offset_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void offset_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create polygon offset drawing stage. + */ +struct draw_stage *draw_offset_stage( struct draw_context *draw ) +{ + struct offset_stage *offset = CALLOC_STRUCT(offset_stage); + if (offset == NULL) + goto fail; + + draw_alloc_temp_verts( &offset->stage, 3 ); + + offset->stage.draw = draw; + offset->stage.next = NULL; + offset->stage.point = draw_pipe_passthrough_point; + offset->stage.line = draw_pipe_passthrough_line; + offset->stage.tri = offset_first_tri; + offset->stage.flush = offset_flush; + offset->stage.reset_stipple_counter = offset_reset_stipple_counter; + offset->stage.destroy = offset_destroy; + + return &offset->stage; + + fail: + if (offset) + offset->stage.destroy( &offset->stage ); + + return NULL; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c new file mode 100644 index 0000000000..b764d9c518 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -0,0 +1,770 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Polygon stipple stage: implement polygon stipple with texture map and + * fragment program. The fragment program samples the texture and does + * a fragment kill for the stipple-failing fragments. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_pipe.h" + + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct pstip_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; + void *pstip_fs; + uint sampler_unit; +}; + + +/** + * Subclass of draw_stage + */ +struct pstip_stage +{ + struct draw_stage stage; + + void *sampler_cso; + struct pipe_texture *texture; + uint num_samplers; + uint num_textures; + + /* + * Currently bound state + */ + struct pstip_fragment_shader *fs; + struct { + void *samplers[PIPE_MAX_SAMPLERS]; + struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; + const struct pipe_poly_stipple *stipple; + } state; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, void **); + + void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, + struct pipe_texture **); + + void (*driver_set_polygon_stipple)(struct pipe_context *, + const struct pipe_poly_stipple *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct pstip_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int wincoordInput; + int maxInput; + uint samplersUsed; /**< bitfield of samplers used */ + int freeSampler; /** an available sampler for the pstipple */ + int texTemp; /**< temp registers */ + int numImmed; + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for a free sampler, a free input attrib, and two free temp regs. + */ +static void +pstip_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + uint i; + for (i = decl->DeclarationRange.First; + i <= decl->DeclarationRange.Last; i++) { + pctx->samplersUsed |= 1 << i; + } + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + pctx->maxInput = MAX2(pctx->maxInput, (int) decl->DeclarationRange.Last); + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) + pctx->wincoordInput = (int) decl->DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->DeclarationRange.First; + i <= decl->DeclarationRange.Last; i++) { + pctx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +static void +pstip_transform_immed(struct tgsi_transform_context *ctx, + struct tgsi_full_immediate *immed) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + pctx->numImmed++; +} + + +/** + * Find the lowest zero bit in the given word, or -1 if bitfield is all ones. + */ +static int +free_bit(uint bitfield) +{ + int i; + for (i = 0; i < 32; i++) { + if ((bitfield & (1 << i)) == 0) + return i; + } + return -1; +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +pstip_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + + if (pctx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction newInst; + uint i; + int wincoordInput; + + /* find free sampler */ + pctx->freeSampler = free_bit(pctx->samplersUsed); + if (pctx->freeSampler >= PIPE_MAX_SAMPLERS) + pctx->freeSampler = PIPE_MAX_SAMPLERS - 1; + + if (pctx->wincoordInput < 0) + wincoordInput = pctx->maxInput + 1; + else + wincoordInput = pctx->wincoordInput; + + /* find one free temp reg */ + for (i = 0; i < 32; i++) { + if ((pctx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (pctx->texTemp < 0) + pctx->texTemp = i; + else + break; + } + } + assert(pctx->texTemp >= 0); + + if (pctx->wincoordInput < 0) { + /* declare new position input reg */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; + decl.Semantic.SemanticIndex = 0; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = wincoordInput; + ctx->emit_declaration(ctx, &decl); + } + + /* declare new sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = pctx->freeSampler; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = pctx->texTemp; + ctx->emit_declaration(ctx, &decl); + + /* emit immediate = {1/32, 1/32, 1, 1} + * The index/position of this immediate will be pctx->numImmed + */ + { + static const float value[4] = { 1.0/32, 1.0/32, 1.0, 1.0 }; + struct tgsi_full_immediate immed; + uint size = 4; + immed = tgsi_default_full_immediate(); + immed.Immediate.Size = 1 + size; /* one for the token itself */ + immed.u.Pointer = (void *) value; + ctx->emit_immediate(ctx, &immed); + } + + pctx->firstInstruction = FALSE; + + + /* + * Insert new MUL/TEX/KILP instructions at start of program + * Take gl_FragCoord, divide by 32 (stipple size), sample the + * texture and kill fragment if needed. + * + * We'd like to use non-normalized texcoords to index into a RECT + * texture, but we can only use GL_REPEAT wrap mode with normalized + * texcoords. Darn. + */ + + /* MUL texTemp, INPUT[wincoord], 1/32; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = wincoordInput; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_IMMEDIATE; + newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->numImmed; + ctx->emit_instruction(ctx, &newInst); + + /* TEX texTemp, texTemp, sampler; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_TEX; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->freeSampler; + ctx->emit_instruction(ctx, &newInst); + + /* KIL -texTemp; # if -texTemp < 0, KILL fragment */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_KIL; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + ctx->emit_instruction(ctx, &newInst); + } + + /* emit this instruction */ + ctx->emit_instruction(ctx, inst); +} + + +/** + * Generate the frag shader we'll use for doing polygon stipple. + * This will be the user's shader prefixed with a TEX and KIL instruction. + */ +static boolean +generate_pstip_fs(struct pstip_stage *pstip) +{ + const struct pipe_shader_state *orig_fs = &pstip->fs->state; + /*struct draw_context *draw = pstip->stage.draw;*/ + struct pipe_shader_state pstip_fs; + struct pstip_transform_context transform; + +#define MAX 1000 + + pstip_fs = *orig_fs; /* copy to init */ + pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + if (pstip_fs.tokens == NULL) + return FALSE; + + memset(&transform, 0, sizeof(transform)); + transform.wincoordInput = -1; + transform.maxInput = -1; + transform.texTemp = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = pstip_transform_inst; + transform.base.transform_declaration = pstip_transform_decl; + transform.base.transform_immediate = pstip_transform_immed; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) pstip_fs.tokens, + MAX, &transform.base); + +#if 0 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(pstip_fs.tokens, 0); +#endif + + pstip->fs->sampler_unit = transform.freeSampler; + assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS); + + pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); + + return TRUE; +} + + +/** + * Load texture image with current stipple pattern. + */ +static void +pstip_update_texture(struct pstip_stage *pstip) +{ + static const uint bit31 = 1 << 31; + struct pipe_context *pipe = pstip->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_surface *surface; + const uint *stipple = pstip->state.stipple->stipple; + uint i, j; + ubyte *data; + + /* XXX: want to avoid flushing just because we use stipple: + */ + pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL ); + + surface = screen->get_tex_surface(screen, pstip->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + data = screen->surface_map(screen, surface, + PIPE_BUFFER_USAGE_CPU_WRITE); + + /* + * Load alpha texture. + * Note: 0 means keep the fragment, 255 means kill it. + * We'll negate the texel value and use KILP which kills if value + * is negative. + */ + for (i = 0; i < 32; i++) { + for (j = 0; j < 32; j++) { + if (stipple[i] & (bit31 >> j)) { + /* fragment "on" */ + data[i * surface->stride + j] = 0; + } + else { + /* fragment "off" */ + data[i * surface->stride + j] = 255; + } + } + } + + /* unmap */ + screen->surface_unmap(screen, surface); + screen->tex_surface_release(screen, &surface); +} + + +/** + * Create the texture map we'll use for stippling. + */ +static boolean +pstip_create_texture(struct pstip_stage *pstip) +{ + struct pipe_context *pipe = pstip->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_texture texTemp; + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ + texTemp.last_level = 0; + texTemp.width[0] = 32; + texTemp.height[0] = 32; + texTemp.depth[0] = 1; + pf_get_block(texTemp.format, &texTemp.block); + + pstip->texture = screen->texture_create(screen, &texTemp); + if (pstip->texture == NULL) + return FALSE; + + return TRUE; +} + + +/** + * Create the sampler CSO that'll be used for stippling. + */ +static boolean +pstip_create_sampler(struct pstip_stage *pstip) +{ + struct pipe_sampler_state sampler; + struct pipe_context *pipe = pstip->pipe; + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.normalized_coords = 1; + sampler.min_lod = 0.0f; + sampler.max_lod = 0.0f; + + pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler); + if (pstip->sampler_cso == NULL) + return FALSE; + + return TRUE; +} + + +/** + * When we're about to draw our first stipple polygon in a batch, this function + * is called to tell the driver to bind our modified fragment shader. + */ +static boolean +bind_pstip_fragment_shader(struct pstip_stage *pstip) +{ + struct draw_context *draw = pstip->stage.draw; + if (!pstip->fs->pstip_fs && + !generate_pstip_fs(pstip)) + return FALSE; + + draw->suspend_flushing = TRUE; + pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs); + draw->suspend_flushing = FALSE; + return TRUE; +} + + +static INLINE struct pstip_stage * +pstip_stage( struct draw_stage *stage ) +{ + return (struct pstip_stage *) stage; +} + + +static void +pstip_first_tri(struct draw_stage *stage, struct prim_header *header) +{ + struct pstip_stage *pstip = pstip_stage(stage); + struct pipe_context *pipe = pstip->pipe; + struct draw_context *draw = stage->draw; + uint num_samplers; + + assert(stage->draw->rasterizer->poly_stipple_enable); + + /* bind our fragprog */ + if (!bind_pstip_fragment_shader(pstip)) { + stage->tri = draw_pipe_passthrough_tri; + stage->tri(stage, header); + return; + } + + + /* how many samplers? */ + /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ + num_samplers = MAX2(pstip->num_textures, pstip->num_samplers); + num_samplers = MAX2(num_samplers, pstip->fs->sampler_unit + 1); + + /* plug in our sampler, texture */ + pstip->state.samplers[pstip->fs->sampler_unit] = pstip->sampler_cso; + pipe_texture_reference(&pstip->state.textures[pstip->fs->sampler_unit], + pstip->texture); + + assert(num_samplers <= PIPE_MAX_SAMPLERS); + + draw->suspend_flushing = TRUE; + pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers); + pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); + draw->suspend_flushing = FALSE; + + /* now really draw first triangle */ + stage->tri = draw_pipe_passthrough_tri; + stage->tri(stage, header); +} + + +static void +pstip_flush(struct draw_stage *stage, unsigned flags) +{ + struct draw_context *draw = stage->draw; + struct pstip_stage *pstip = pstip_stage(stage); + struct pipe_context *pipe = pstip->pipe; + + stage->tri = pstip_first_tri; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader, texture, sampler state */ + draw->suspend_flushing = TRUE; + pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); + pstip->driver_bind_sampler_states(pipe, pstip->num_samplers, + pstip->state.samplers); + pstip->driver_set_sampler_textures(pipe, pstip->num_textures, + pstip->state.textures); + draw->suspend_flushing = FALSE; +} + + +static void +pstip_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +pstip_destroy(struct draw_stage *stage) +{ + struct pstip_stage *pstip = pstip_stage(stage); + uint i; + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + pipe_texture_reference(&pstip->state.textures[i], NULL); + } + + pstip->pipe->delete_sampler_state(pstip->pipe, pstip->sampler_cso); + + pipe_texture_release(&pstip->texture); + + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct pstip_stage * +draw_pstip_stage(struct draw_context *draw) +{ + struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage); + + draw_alloc_temp_verts( &pstip->stage, 8 ); + + pstip->stage.draw = draw; + pstip->stage.next = NULL; + pstip->stage.point = draw_pipe_passthrough_point; + pstip->stage.line = draw_pipe_passthrough_line; + pstip->stage.tri = pstip_first_tri; + pstip->stage.flush = pstip_flush; + pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter; + pstip->stage.destroy = pstip_destroy; + + return pstip; +} + + +static struct pstip_stage * +pstip_stage_from_pipe(struct pipe_context *pipe) +{ + struct draw_context *draw = (struct draw_context *) pipe->draw; + return pstip_stage(draw->pipeline.pstipple); +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +pstip_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = CALLOC_STRUCT(pstip_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs); + } + + return aafs; +} + + +static void +pstip_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + /* save current */ + pstip->fs = aafs; + /* pass-through */ + pstip->driver_bind_fs_state(pstip->pipe, + (aafs ? aafs->driver_fs : NULL)); +} + + +static void +pstip_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + /* pass-through */ + pstip->driver_delete_fs_state(pstip->pipe, aafs->driver_fs); + FREE(aafs); +} + + +static void +pstip_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + uint i; + + /* save current */ + memcpy(pstip->state.samplers, sampler, num * sizeof(void *)); + for (i = num; i < PIPE_MAX_SAMPLERS; i++) { + pstip->state.samplers[i] = NULL; + } + + pstip->num_samplers = num; + /* pass-through */ + pstip->driver_bind_sampler_states(pstip->pipe, num, sampler); +} + + +static void +pstip_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + uint i; + + /* save current */ + for (i = 0; i < num; i++) { + pipe_texture_reference(&pstip->state.textures[i], texture[i]); + } + for (; i < PIPE_MAX_SAMPLERS; i++) { + pipe_texture_reference(&pstip->state.textures[i], NULL); + } + + pstip->num_textures = num; + + /* pass-through */ + pstip->driver_set_sampler_textures(pstip->pipe, num, texture); +} + + +static void +pstip_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + + /* save current */ + pstip->state.stipple = stipple; + + /* pass-through */ + pstip->driver_set_polygon_stipple(pstip->pipe, stipple); + + pstip_update_texture(pstip); +} + + +/** + * Called by drivers that want to install this polygon stipple stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for polygon stipple. + */ +boolean +draw_install_pstipple_stage(struct draw_context *draw, + struct pipe_context *pipe) +{ + struct pstip_stage *pstip; + + pipe->draw = (void *) draw; + + /* + * Create / install pgon stipple drawing / prim stage + */ + pstip = draw_pstip_stage( draw ); + if (pstip == NULL) + goto fail; + + draw->pipeline.pstipple = &pstip->stage; + + pstip->pipe = pipe; + + /* create special texture, sampler state */ + if (!pstip_create_texture(pstip)) + goto fail; + + if (!pstip_create_sampler(pstip)) + goto fail; + + /* save original driver functions */ + pstip->driver_create_fs_state = pipe->create_fs_state; + pstip->driver_bind_fs_state = pipe->bind_fs_state; + pstip->driver_delete_fs_state = pipe->delete_fs_state; + + pstip->driver_bind_sampler_states = pipe->bind_sampler_states; + pstip->driver_set_sampler_textures = pipe->set_sampler_textures; + pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; + + /* override the driver's functions */ + pipe->create_fs_state = pstip_create_fs_state; + pipe->bind_fs_state = pstip_bind_fs_state; + pipe->delete_fs_state = pstip_delete_fs_state; + + pipe->bind_sampler_states = pstip_bind_sampler_states; + pipe->set_sampler_textures = pstip_set_sampler_textures; + pipe->set_polygon_stipple = pstip_set_polygon_stipple; + + return TRUE; + + fail: + if (pstip) + pstip->stage.destroy( &pstip->stage ); + + return FALSE; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c new file mode 100644 index 0000000000..b65e2aa102 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -0,0 +1,250 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +/* Implement line stipple by cutting lines up into smaller lines. + * There are hundreds of ways to implement line stipple, this is one + * choice that should work in all situations, requires no state + * manipulations, but with a penalty in terms of large amounts of + * generated geometry. + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "draw/draw_pipe.h" + + +/** Subclass of draw_stage */ +struct stipple_stage { + struct draw_stage stage; + float counter; + uint pattern; + uint factor; +}; + + +static INLINE struct stipple_stage * +stipple_stage(struct draw_stage *stage) +{ + return (struct stipple_stage *) stage; +} + + +/** + * Compute interpolated vertex attributes for 'dst' at position 't' + * between 'v0' and 'v1'. + * XXX using linear interpolation for all attribs at this time. + */ +static void +screen_interp( struct draw_context *draw, + struct vertex_header *dst, + float t, + const struct vertex_header *v0, + const struct vertex_header *v1 ) +{ + uint attr; + for (attr = 0; attr < draw->vs.num_vs_outputs; attr++) { + const float *val0 = v0->data[attr]; + const float *val1 = v1->data[attr]; + float *newv = dst->data[attr]; + uint i; + for (i = 0; i < 4; i++) { + newv[i] = val0[i] + t * (val1[i] - val0[i]); + } + } +} + + +static void +emit_segment(struct draw_stage *stage, struct prim_header *header, + float t0, float t1) +{ + struct vertex_header *v0new = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1new = dup_vert(stage, header->v[1], 1); + struct prim_header newprim = *header; + + if (t0 > 0.0) { + screen_interp( stage->draw, v0new, t0, header->v[0], header->v[1] ); + newprim.v[0] = v0new; + } + + if (t1 < 1.0) { + screen_interp( stage->draw, v1new, t1, header->v[0], header->v[1] ); + newprim.v[1] = v1new; + } + + stage->next->line( stage->next, &newprim ); +} + + +static INLINE unsigned +stipple_test(int counter, ushort pattern, int factor) +{ + int b = (counter / factor) & 0xf; + return (1 << b) & pattern; +} + + +static void +stipple_line(struct draw_stage *stage, struct prim_header *header) +{ + struct stipple_stage *stipple = stipple_stage(stage); + struct vertex_header *v0 = header->v[0]; + struct vertex_header *v1 = header->v[1]; + const unsigned pos = stage->draw->vs.position_output; + const float *pos0 = v0->data[pos]; + const float *pos1 = v1->data[pos]; + float start = 0; + int state = 0; + + float x0 = pos0[0]; + float x1 = pos1[0]; + float y0 = pos0[1]; + float y1 = pos1[1]; + + float dx = x0 > x1 ? x0 - x1 : x1 - x0; + float dy = y0 > y1 ? y0 - y1 : y1 - y0; + + float length = MAX2(dx, dy); + int i; + + if (header->flags & DRAW_PIPE_RESET_STIPPLE) + stipple->counter = 0; + + + /* XXX ToDo: intead of iterating pixel-by-pixel, use a look-up table. + */ + for (i = 0; i < length; i++) { + int result = stipple_test( (int) stipple->counter+i, + (ushort) stipple->pattern, stipple->factor ); + if (result != state) { + /* changing from "off" to "on" or vice versa */ + if (state) { + if (start != i) { + /* finishing an "on" segment */ + emit_segment( stage, header, start / length, i / length ); + } + } + else { + /* starting an "on" segment */ + start = (float) i; + } + state = result; + } + } + + if (state && start < length) + emit_segment( stage, header, start / length, 1.0 ); + + stipple->counter += length; +} + + +static void +reset_stipple_counter(struct draw_stage *stage) +{ + struct stipple_stage *stipple = stipple_stage(stage); + stipple->counter = 0; + stage->next->reset_stipple_counter( stage->next ); +} + +static void +stipple_reset_point(struct draw_stage *stage, struct prim_header *header) +{ + struct stipple_stage *stipple = stipple_stage(stage); + stipple->counter = 0; + stage->next->point(stage->next, header); +} + +static void +stipple_reset_tri(struct draw_stage *stage, struct prim_header *header) +{ + struct stipple_stage *stipple = stipple_stage(stage); + stipple->counter = 0; + stage->next->tri(stage->next, header); +} + + +static void +stipple_first_line(struct draw_stage *stage, + struct prim_header *header) +{ + struct stipple_stage *stipple = stipple_stage(stage); + struct draw_context *draw = stage->draw; + + stipple->pattern = draw->rasterizer->line_stipple_pattern; + stipple->factor = draw->rasterizer->line_stipple_factor + 1; + + stage->line = stipple_line; + stage->line( stage, header ); +} + + +static void +stipple_flush(struct draw_stage *stage, unsigned flags) +{ + stage->line = stipple_first_line; + stage->next->flush( stage->next, flags ); +} + + + + +static void +stipple_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create line stippler stage + */ +struct draw_stage *draw_stipple_stage( struct draw_context *draw ) +{ + struct stipple_stage *stipple = CALLOC_STRUCT(stipple_stage); + + draw_alloc_temp_verts( &stipple->stage, 2 ); + + stipple->stage.draw = draw; + stipple->stage.next = NULL; + stipple->stage.point = stipple_reset_point; + stipple->stage.line = stipple_first_line; + stipple->stage.tri = stipple_reset_tri; + stipple->stage.reset_stipple_counter = reset_stipple_counter; + stipple->stage.flush = stipple_flush; + stipple->stage.destroy = stipple_destroy; + + return &stipple->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c new file mode 100644 index 0000000000..c329d92339 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -0,0 +1,199 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw_vs.h" +#include "draw_pipe.h" + +struct twoside_stage { + struct draw_stage stage; + float sign; /**< +1 or -1 */ + uint attrib_front0, attrib_back0; + uint attrib_front1, attrib_back1; +}; + + +static INLINE struct twoside_stage *twoside_stage( struct draw_stage *stage ) +{ + return (struct twoside_stage *)stage; +} + + + + +/** + * Copy back color(s) to front color(s). + */ +static INLINE struct vertex_header * +copy_bfc( struct twoside_stage *twoside, + const struct vertex_header *v, + unsigned idx ) +{ + struct vertex_header *tmp = dup_vert( &twoside->stage, v, idx ); + + if (twoside->attrib_back0) { + COPY_4FV(tmp->data[twoside->attrib_front0], + tmp->data[twoside->attrib_back0]); + } + if (twoside->attrib_back1) { + COPY_4FV(tmp->data[twoside->attrib_front1], + tmp->data[twoside->attrib_back1]); + } + + return tmp; +} + + +/* Twoside tri: + */ +static void twoside_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct twoside_stage *twoside = twoside_stage(stage); + + if (header->det * twoside->sign < 0.0) { + /* this is a back-facing triangle */ + struct prim_header tmp; + + tmp.det = header->det; + tmp.flags = header->flags; + tmp.pad = header->pad; + /* copy back attribs to front attribs */ + tmp.v[0] = copy_bfc(twoside, header->v[0], 0); + tmp.v[1] = copy_bfc(twoside, header->v[1], 1); + tmp.v[2] = copy_bfc(twoside, header->v[2], 2); + + stage->next->tri( stage->next, &tmp ); + } + else { + stage->next->tri( stage->next, header ); + } +} + + + +static void twoside_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct twoside_stage *twoside = twoside_stage(stage); + const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; + uint i; + + twoside->attrib_front0 = 0; + twoside->attrib_front1 = 0; + twoside->attrib_back0 = 0; + twoside->attrib_back1 = 0; + + /* Find which vertex shader outputs are front/back colors */ + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR) { + if (vs->info.output_semantic_index[i] == 0) + twoside->attrib_front0 = i; + else + twoside->attrib_front1 = i; + } + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + if (vs->info.output_semantic_index[i] == 0) + twoside->attrib_back0 = i; + else + twoside->attrib_back1 = i; + } + } + + if (!twoside->attrib_back0) + twoside->attrib_front0 = 0; + + if (!twoside->attrib_back1) + twoside->attrib_front1 = 0; + + /* + * We'll multiply the primitive's determinant by this sign to determine + * if the triangle is back-facing (negative). + * sign = -1 for CCW, +1 for CW + */ + twoside->sign = (stage->draw->rasterizer->front_winding == PIPE_WINDING_CCW) ? -1.0f : 1.0f; + + stage->tri = twoside_tri; + stage->tri( stage, header ); +} + + +static void twoside_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->tri = twoside_first_tri; + stage->next->flush( stage->next, flags ); +} + + +static void twoside_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void twoside_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create twoside pipeline stage. + */ +struct draw_stage *draw_twoside_stage( struct draw_context *draw ) +{ + struct twoside_stage *twoside = CALLOC_STRUCT(twoside_stage); + if (twoside == NULL) + goto fail; + + if (!draw_alloc_temp_verts( &twoside->stage, 3 )) + goto fail; + + twoside->stage.draw = draw; + twoside->stage.next = NULL; + twoside->stage.point = draw_pipe_passthrough_point; + twoside->stage.line = draw_pipe_passthrough_line; + twoside->stage.tri = twoside_first_tri; + twoside->stage.flush = twoside_flush; + twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter; + twoside->stage.destroy = twoside_destroy; + + return &twoside->stage; + + fail: + if (twoside) + twoside->stage.destroy( &twoside->stage ); + + return NULL; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c new file mode 100644 index 0000000000..68835fd1a5 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -0,0 +1,203 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Drawing stage for handling glPolygonMode(line/point). + * Convert triangles to points or lines as needed. + */ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" +#include "pipe/p_defines.h" +#include "draw_private.h" +#include "draw_pipe.h" + + +struct unfilled_stage { + struct draw_stage stage; + + /** [0] = front face, [1] = back face. + * legal values: PIPE_POLYGON_MODE_FILL, PIPE_POLYGON_MODE_LINE, + * and PIPE_POLYGON_MODE_POINT, + */ + unsigned mode[2]; +}; + + +static INLINE struct unfilled_stage *unfilled_stage( struct draw_stage *stage ) +{ + return (struct unfilled_stage *)stage; +} + + + +static void point( struct draw_stage *stage, + struct vertex_header *v0 ) +{ + struct prim_header tmp; + tmp.v[0] = v0; + stage->next->point( stage->next, &tmp ); +} + +static void line( struct draw_stage *stage, + struct vertex_header *v0, + struct vertex_header *v1 ) +{ + struct prim_header tmp; + tmp.v[0] = v0; + tmp.v[1] = v1; + stage->next->line( stage->next, &tmp ); +} + + +static void points( struct draw_stage *stage, + struct prim_header *header ) +{ + struct vertex_header *v0 = header->v[0]; + struct vertex_header *v1 = header->v[1]; + struct vertex_header *v2 = header->v[2]; + + if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) point( stage, v0 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) point( stage, v1 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) point( stage, v2 ); +} + + +static void lines( struct draw_stage *stage, + struct prim_header *header ) +{ + struct vertex_header *v0 = header->v[0]; + struct vertex_header *v1 = header->v[1]; + struct vertex_header *v2 = header->v[2]; + + if (header->flags & DRAW_PIPE_RESET_STIPPLE) + stage->next->reset_stipple_counter( stage->next ); + + if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) line( stage, v2, v0 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) line( stage, v0, v1 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) line( stage, v1, v2 ); +} + + +/* Unfilled tri: + * + * Note edgeflags in the vertex struct is not sufficient as we will + * need to manipulate them when decomposing primitives. + * + * We currently keep the vertex edgeflag and primitive edgeflag mask + * separate until the last possible moment. + */ +static void unfilled_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct unfilled_stage *unfilled = unfilled_stage(stage); + unsigned mode = unfilled->mode[header->det >= 0.0]; + + switch (mode) { + case PIPE_POLYGON_MODE_FILL: + stage->next->tri( stage->next, header ); + break; + case PIPE_POLYGON_MODE_LINE: + lines( stage, header ); + break; + case PIPE_POLYGON_MODE_POINT: + points( stage, header ); + break; + default: + assert(0); + } +} + + +static void unfilled_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct unfilled_stage *unfilled = unfilled_stage(stage); + + unfilled->mode[0] = stage->draw->rasterizer->fill_ccw; /* front */ + unfilled->mode[1] = stage->draw->rasterizer->fill_cw; /* back */ + + stage->tri = unfilled_tri; + stage->tri( stage, header ); +} + + + +static void unfilled_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->next->flush( stage->next, flags ); + + stage->tri = unfilled_first_tri; +} + + +static void unfilled_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void unfilled_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create unfilled triangle stage. + */ +struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) +{ + struct unfilled_stage *unfilled = CALLOC_STRUCT(unfilled_stage); + if (unfilled == NULL) + goto fail; + + if (!draw_alloc_temp_verts( &unfilled->stage, 0 )) + goto fail; + + unfilled->stage.draw = draw; + unfilled->stage.next = NULL; + unfilled->stage.tmp = NULL; + unfilled->stage.point = draw_pipe_passthrough_point; + unfilled->stage.line = draw_pipe_passthrough_line; + unfilled->stage.tri = unfilled_first_tri; + unfilled->stage.flush = unfilled_flush; + unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter; + unfilled->stage.destroy = unfilled_destroy; + + return &unfilled->stage; + + fail: + if (unfilled) + unfilled->stage.destroy( &unfilled->stage ); + + return NULL; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_util.c b/src/gallium/auxiliary/draw/draw_pipe_util.c new file mode 100644 index 0000000000..e22e5fed0c --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_util.c @@ -0,0 +1,137 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" +#include "draw/draw_private.h" +#include "draw/draw_pipe.h" + + + +void +draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + +void +draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + +void +draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + + + + +/* This is only used for temporary verts. + */ +#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) + + +/** + * Allocate space for temporary post-transform vertices, such as for clipping. + */ +boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ) +{ + assert(!stage->tmp); + + stage->tmp = NULL; + stage->nr_tmps = nr; + + if (nr != 0) + { + unsigned i; + ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr ); + + if (store == NULL) + return FALSE; + + stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr ); + if (stage->tmp == NULL) { + FREE(store); + return FALSE; + } + + for (i = 0; i < nr; i++) + stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE); + } + + return TRUE; +} + + +void draw_free_temp_verts( struct draw_stage *stage ) +{ + if (stage->tmp) { + FREE( stage->tmp[0] ); + FREE( stage->tmp ); + stage->tmp = NULL; + } +} + + +/* Reset vertex ids. This is basically a type of flush. + * + * Called only from draw_pipe_vbuf.c + */ +void draw_reset_vertex_ids(struct draw_context *draw) +{ + struct draw_stage *stage = draw->pipeline.first; + + while (stage) { + unsigned i; + + for (i = 0; i < stage->nr_tmps; i++) + stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID; + + stage = stage->next; + } + + if (draw->pipeline.verts) + { + unsigned i; + char *verts = draw->pipeline.verts; + unsigned stride = draw->pipeline.vertex_stride; + + for (i = 0; i < draw->pipeline.vertex_count; i++) { + ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID; + verts += stride; + } + } +} + diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c new file mode 100644 index 0000000000..f34c68728e --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -0,0 +1,317 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" +#include "pipe/p_defines.h" +#include "draw_private.h" +#include "draw_pipe.h" +#include "draw_context.h" + +static boolean points( unsigned prim ) +{ + return (prim == PIPE_PRIM_POINTS); +} + +static boolean lines( unsigned prim ) +{ + return (prim == PIPE_PRIM_LINES || + prim == PIPE_PRIM_LINE_STRIP || + prim == PIPE_PRIM_LINE_LOOP); +} + +static boolean triangles( unsigned prim ) +{ + return prim >= PIPE_PRIM_TRIANGLES; +} + +/** + * Check if we need any special pipeline stages, or whether + * prims/verts can go through untouched. Don't test for bypass + * clipping or vs modes, this function is just about the primitive + * pipeline stages. + */ +boolean +draw_need_pipeline(const struct draw_context *draw, + const struct pipe_rasterizer_state *rasterizer, + unsigned int prim ) +{ + /* Don't have to worry about triangles turning into lines/points + * and triggering the pipeline, because we have to trigger the + * pipeline *anyway* if unfilled mode is active. + */ + if (lines(prim)) + { + /* line stipple */ + if (rasterizer->line_stipple_enable && draw->pipeline.line_stipple) + return TRUE; + + /* wide lines */ + if (rasterizer->line_width > draw->pipeline.wide_line_threshold) + return TRUE; + + /* AA lines */ + if (rasterizer->line_smooth && draw->pipeline.aaline) + return TRUE; + } + + if (points(prim)) + { + /* large points */ + if (rasterizer->point_size > draw->pipeline.wide_point_threshold) + return TRUE; + + /* AA points */ + if (rasterizer->point_smooth && draw->pipeline.aapoint) + return TRUE; + + /* point sprites */ + if (rasterizer->point_sprite && draw->pipeline.point_sprite) + return TRUE; + } + + + if (triangles(prim)) + { + /* polygon stipple */ + if (rasterizer->poly_stipple_enable && draw->pipeline.pstipple) + return TRUE; + + /* unfilled polygons */ + if (rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) + return TRUE; + + /* polygon offset */ + if (rasterizer->offset_cw || rasterizer->offset_ccw) + return TRUE; + + /* two-side lighting */ + if (rasterizer->light_twoside) + return TRUE; + } + + /* polygon cull - this is difficult - hardware can cull just fine + * most of the time (though sometimes CULL_NEITHER is unsupported. + * + * Generally this isn't a reason to require the pipeline, though. + * + if (rasterizer->cull_mode) + return TRUE; + */ + + return FALSE; +} + + + +/** + * Rebuild the rendering pipeline. + */ +static struct draw_stage *validate_pipeline( struct draw_stage *stage ) +{ + struct draw_context *draw = stage->draw; + struct draw_stage *next = draw->pipeline.rasterize; + int need_det = 0; + int precalc_flat = 0; + boolean wide_lines, wide_points; + + /* Set the validate's next stage to the rasterize stage, so that it + * can be found later if needed for flushing. + */ + stage->next = next; + + /* drawing wide lines? */ + wide_lines = (draw->rasterizer->line_width > draw->pipeline.wide_line_threshold + && !draw->rasterizer->line_smooth); + + /* drawing large points? */ + if (draw->rasterizer->point_sprite && draw->pipeline.point_sprite) + wide_points = TRUE; + else if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) + wide_points = FALSE; + else if (draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) + wide_points = TRUE; + else + wide_points = FALSE; + + /* + * NOTE: we build up the pipeline in end-to-start order. + * + * TODO: make the current primitive part of the state and build + * shorter pipelines for lines & points. + */ + + if (draw->rasterizer->line_smooth && draw->pipeline.aaline) { + draw->pipeline.aaline->next = next; + next = draw->pipeline.aaline; + } + + if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) { + draw->pipeline.aapoint->next = next; + next = draw->pipeline.aapoint; + } + + if (wide_lines) { + draw->pipeline.wide_line->next = next; + next = draw->pipeline.wide_line; + precalc_flat = 1; + } + + if (wide_points || draw->rasterizer->point_sprite) { + draw->pipeline.wide_point->next = next; + next = draw->pipeline.wide_point; + } + + if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) { + draw->pipeline.stipple->next = next; + next = draw->pipeline.stipple; + precalc_flat = 1; /* only needed for lines really */ + } + + if (draw->rasterizer->poly_stipple_enable + && draw->pipeline.pstipple) { + draw->pipeline.pstipple->next = next; + next = draw->pipeline.pstipple; + } + + if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) { + draw->pipeline.unfilled->next = next; + next = draw->pipeline.unfilled; + precalc_flat = 1; /* only needed for triangles really */ + need_det = 1; + } + + if (draw->rasterizer->flatshade && precalc_flat) { + draw->pipeline.flatshade->next = next; + next = draw->pipeline.flatshade; + } + + if (draw->rasterizer->offset_cw || + draw->rasterizer->offset_ccw) { + draw->pipeline.offset->next = next; + next = draw->pipeline.offset; + need_det = 1; + } + + if (draw->rasterizer->light_twoside) { + draw->pipeline.twoside->next = next; + next = draw->pipeline.twoside; + need_det = 1; + } + + /* Always run the cull stage as we calculate determinant there + * also. + * + * This can actually be a win as culling out the triangles can lead + * to less work emitting vertices, smaller vertex buffers, etc. + * It's difficult to say whether this will be true in general. + */ + if (need_det || draw->rasterizer->cull_mode) { + draw->pipeline.cull->next = next; + next = draw->pipeline.cull; + } + + /* Clip stage + */ + if (!draw->bypass_clipping) + { + draw->pipeline.clip->next = next; + next = draw->pipeline.clip; + } + + + draw->pipeline.first = next; + return next; +} + +static void validate_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct draw_stage *pipeline = validate_pipeline( stage ); + pipeline->tri( pipeline, header ); +} + +static void validate_line( struct draw_stage *stage, + struct prim_header *header ) +{ + struct draw_stage *pipeline = validate_pipeline( stage ); + pipeline->line( pipeline, header ); +} + +static void validate_point( struct draw_stage *stage, + struct prim_header *header ) +{ + struct draw_stage *pipeline = validate_pipeline( stage ); + pipeline->point( pipeline, header ); +} + +static void validate_reset_stipple_counter( struct draw_stage *stage ) +{ + struct draw_stage *pipeline = validate_pipeline( stage ); + pipeline->reset_stipple_counter( pipeline ); +} + +static void validate_flush( struct draw_stage *stage, + unsigned flags ) +{ + /* May need to pass a backend flush on to the rasterize stage. + */ + if (stage->next) + stage->next->flush( stage->next, flags ); +} + + +static void validate_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create validate pipeline stage. + */ +struct draw_stage *draw_validate_stage( struct draw_context *draw ) +{ + struct draw_stage *stage = CALLOC_STRUCT(draw_stage); + if (stage == NULL) + return NULL; + + stage->draw = draw; + stage->next = NULL; + stage->point = validate_point; + stage->line = validate_line; + stage->tri = validate_tri; + stage->flush = validate_flush; + stage->reset_stipple_counter = validate_reset_stipple_counter; + stage->destroy = validate_destroy; + + return stage; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c new file mode 100644 index 0000000000..9825e116c3 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -0,0 +1,494 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Vertex buffer drawing stage. + * + * \author José Fonseca <jrfonsec@tungstengraphics.com> + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_debug.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "draw_vbuf.h" +#include "draw_private.h" +#include "draw_vertex.h" +#include "draw_pipe.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" + + +/** + * Vertex buffer emit stage. + */ +struct vbuf_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct vbuf_render *render; + + const struct vertex_info *vinfo; + + /** Vertex size in bytes */ + unsigned vertex_size; + + struct translate *translate; + + /* FIXME: we have no guarantee that 'unsigned' is 32bit */ + + /** Vertices in hardware format */ + unsigned *vertices; + unsigned *vertex_ptr; + unsigned max_vertices; + unsigned nr_vertices; + + /** Indices */ + ushort *indices; + unsigned max_indices; + unsigned nr_indices; + + /* Cache point size somewhere it's address won't change: + */ + float point_size; + + struct translate_cache *cache; +}; + + +/** + * Basically a cast wrapper. + */ +static INLINE struct vbuf_stage * +vbuf_stage( struct draw_stage *stage ) +{ + assert(stage); + return (struct vbuf_stage *)stage; +} + + +static void vbuf_flush_indices( struct vbuf_stage *vbuf ); +static void vbuf_flush_vertices( struct vbuf_stage *vbuf ); +static void vbuf_alloc_vertices( struct vbuf_stage *vbuf ); + + +static INLINE boolean +overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz ) +{ + unsigned long used = (unsigned long) ((char *)ptr - (char *)map); + return (used + bytes) > bufsz; +} + + +static INLINE void +check_space( struct vbuf_stage *vbuf, unsigned nr ) +{ + if (vbuf->nr_vertices + nr > vbuf->max_vertices ) { + vbuf_flush_vertices(vbuf); + vbuf_alloc_vertices(vbuf); + } + + if (vbuf->nr_indices + nr > vbuf->max_indices ) + vbuf_flush_indices(vbuf); +} + + + + +/** + * Extract the needed fields from post-transformed vertex and emit + * a hardware(driver) vertex. + * Recall that the vertices are constructed by the 'draw' module and + * have a couple of slots at the beginning (1-dword header, 4-dword + * clip pos) that we ignore here. We only use the vertex->data[] fields. + */ +static INLINE ushort +emit_vertex( struct vbuf_stage *vbuf, + struct vertex_header *vertex ) +{ + if(vertex->vertex_id == UNDEFINED_VERTEX_ID) { + /* Hmm - vertices are emitted one at a time - better make sure + * set_buffer is efficient. Consider a special one-shot mode for + * translate. + */ + /* Note: we really do want data[0] here, not data[pos]: + */ + vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); + vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); + + if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); + + vbuf->vertex_ptr += vbuf->vertex_size/4; + vertex->vertex_id = vbuf->nr_vertices++; + } + + return (ushort)vertex->vertex_id; +} + + +static void +vbuf_tri( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + unsigned i; + + check_space( vbuf, 3 ); + + for (i = 0; i < 3; i++) { + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] ); + } +} + + +static void +vbuf_line( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + unsigned i; + + check_space( vbuf, 2 ); + + for (i = 0; i < 2; i++) { + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] ); + } +} + + +static void +vbuf_point( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + check_space( vbuf, 1 ); + + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[0] ); +} + + + + +/** + * Set the prim type for subsequent vertices. + * This may result in a new vertex size. The existing vbuffer (if any) + * will be flushed if needed and a new one allocated. + */ +static void +vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) +{ + struct translate_key hw_key; + unsigned dst_offset; + unsigned i; + + vbuf->render->set_primitive(vbuf->render, prim); + + /* Must do this after set_primitive() above: + * + * XXX: need some state managment to track when this needs to be + * recalculated. The driver should tell us whether there was a + * state change. + */ + vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render); + + if (vbuf->vertex_size != vbuf->vinfo->size * sizeof(float)) { + vbuf_flush_vertices(vbuf); + vbuf->vertex_size = vbuf->vinfo->size * sizeof(float); + } + + /* Translate from pipeline vertices to hw vertices. + */ + dst_offset = 0; + + for (i = 0; i < vbuf->vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned src_buffer = 0; + unsigned output_format; + unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * sizeof(float) ); + + switch (vbuf->vinfo->attrib[i].emit) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + src_buffer = 1; + src_offset = 0; + break; + case EMIT_4UB: + output_format = PIPE_FORMAT_B8G8R8A8_UNORM; + emit_sz = 4 * sizeof(ubyte); + break; + default: + assert(0); + output_format = PIPE_FORMAT_NONE; + emit_sz = 0; + break; + } + + hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + hw_key.element[i].input_buffer = src_buffer; + hw_key.element[i].input_offset = src_offset; + hw_key.element[i].output_format = output_format; + hw_key.element[i].output_offset = dst_offset; + + dst_offset += emit_sz; + } + + hw_key.nr_elements = vbuf->vinfo->num_attribs; + hw_key.output_stride = vbuf->vinfo->size * 4; + + /* Don't bother with caching at this stage: + */ + if (!vbuf->translate || + translate_key_compare(&vbuf->translate->key, &hw_key) != 0) + { + translate_key_sanitize(&hw_key); + vbuf->translate = translate_cache_find(vbuf->cache, &hw_key); + + vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0); + } + + vbuf->point_size = vbuf->stage.draw->rasterizer->point_size; + + /* Allocate new buffer? + */ + if (!vbuf->vertices) + vbuf_alloc_vertices(vbuf); +} + + +static void +vbuf_first_tri( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + vbuf_flush_indices( vbuf ); + stage->tri = vbuf_tri; + vbuf_set_prim(vbuf, PIPE_PRIM_TRIANGLES); + stage->tri( stage, prim ); +} + + +static void +vbuf_first_line( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + vbuf_flush_indices( vbuf ); + stage->line = vbuf_line; + vbuf_set_prim(vbuf, PIPE_PRIM_LINES); + stage->line( stage, prim ); +} + + +static void +vbuf_first_point( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + vbuf_flush_indices( vbuf ); + stage->point = vbuf_point; + vbuf_set_prim(vbuf, PIPE_PRIM_POINTS); + stage->point( stage, prim ); +} + + +static void +vbuf_flush_indices( struct vbuf_stage *vbuf ) +{ + if(!vbuf->nr_indices) + return; + + assert((uint) (vbuf->vertex_ptr - vbuf->vertices) == + vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned)); + + vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices); + + vbuf->nr_indices = 0; +} + + +/** + * Flush existing vertex buffer and allocate a new one. + * + * XXX: We separate flush-on-index-full and flush-on-vb-full, but may + * raise issues uploading vertices if the hardware wants to flush when + * we flush. + */ +static void +vbuf_flush_vertices( struct vbuf_stage *vbuf ) +{ + if(vbuf->vertices) { + vbuf_flush_indices(vbuf); + + /* Reset temporary vertices ids */ + if(vbuf->nr_vertices) + draw_reset_vertex_ids( vbuf->stage.draw ); + + /* Free the vertex buffer */ + vbuf->render->release_vertices(vbuf->render, + vbuf->vertices, + vbuf->vertex_size, + vbuf->nr_vertices); + vbuf->max_vertices = vbuf->nr_vertices = 0; + vbuf->vertex_ptr = vbuf->vertices = NULL; + + } +} + + +static void +vbuf_alloc_vertices( struct vbuf_stage *vbuf ) +{ + assert(!vbuf->nr_indices); + assert(!vbuf->vertices); + + /* Allocate a new vertex buffer */ + vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size; + + /* even number */ + vbuf->max_vertices = vbuf->max_vertices & ~1; + + /* Must always succeed -- driver gives us a + * 'max_vertex_buffer_bytes' which it guarantees it can allocate, + * and it will flush itself if necessary to do so. If this does + * fail, we are basically without usable hardware. + */ + vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render, + (ushort) vbuf->vertex_size, + (ushort) vbuf->max_vertices); + vbuf->vertex_ptr = vbuf->vertices; +} + + + +static void +vbuf_flush( struct draw_stage *stage, unsigned flags ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + vbuf_flush_indices( vbuf ); + + stage->point = vbuf_first_point; + stage->line = vbuf_first_line; + stage->tri = vbuf_first_tri; + + if (flags & DRAW_FLUSH_BACKEND) + vbuf_flush_vertices( vbuf ); +} + + +static void +vbuf_reset_stipple_counter( struct draw_stage *stage ) +{ + /* XXX: Need to do something here for hardware with linestipple. + */ + (void) stage; +} + + +static void vbuf_destroy( struct draw_stage *stage ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + if(vbuf->indices) + align_free( vbuf->indices ); + + if (vbuf->render) + vbuf->render->destroy( vbuf->render ); + + if (vbuf->cache) + translate_cache_destroy(vbuf->cache); + + FREE( stage ); +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *draw_vbuf_stage( struct draw_context *draw, + struct vbuf_render *render ) +{ + struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage); + if (vbuf == NULL) + goto fail; + + vbuf->stage.draw = draw; + vbuf->stage.point = vbuf_first_point; + vbuf->stage.line = vbuf_first_line; + vbuf->stage.tri = vbuf_first_tri; + vbuf->stage.flush = vbuf_flush; + vbuf->stage.reset_stipple_counter = vbuf_reset_stipple_counter; + vbuf->stage.destroy = vbuf_destroy; + + vbuf->render = render; + vbuf->max_indices = MAX2(render->max_indices, UNDEFINED_VERTEX_ID-1); + + vbuf->indices = (ushort *) align_malloc( vbuf->max_indices * + sizeof(vbuf->indices[0]), + 16 ); + if (!vbuf->indices) + goto fail; + + vbuf->cache = translate_cache_create(); + if (!vbuf->cache) + goto fail; + + + vbuf->vertices = NULL; + vbuf->vertex_ptr = vbuf->vertices; + + return &vbuf->stage; + + fail: + if (vbuf) + vbuf_destroy(&vbuf->stage); + + return NULL; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c new file mode 100644 index 0000000000..184e363594 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -0,0 +1,180 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "draw_private.h" +#include "draw_pipe.h" + + +struct wideline_stage { + struct draw_stage stage; + + float half_line_width; +}; + + + +static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage ) +{ + return (struct wideline_stage *)stage; +} + + + +/** + * Draw a wide line by drawing a quad (two triangles). + * XXX need to disable polygon stipple. + */ +static void wideline_line( struct draw_stage *stage, + struct prim_header *header ) +{ + /*const struct wideline_stage *wide = wideline_stage(stage);*/ + const unsigned pos = stage->draw->vs.position_output; + const float half_width = 0.5f * stage->draw->rasterizer->line_width; + + struct prim_header tri; + + struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); + struct vertex_header *v2 = dup_vert(stage, header->v[1], 2); + struct vertex_header *v3 = dup_vert(stage, header->v[1], 3); + + float *pos0 = v0->data[pos]; + float *pos1 = v1->data[pos]; + float *pos2 = v2->data[pos]; + float *pos3 = v3->data[pos]; + + const float dx = fabsf(pos0[0] - pos2[0]); + const float dy = fabsf(pos0[1] - pos2[1]); + + /* small tweak to meet GL specification */ + const float bias = 0.125f; + + /* + * Draw wide line as a quad (two tris) by "stretching" the line along + * X or Y. + * We need to tweak coords in several ways to be conformant here. + */ + + if (dx > dy) { + /* x-major line */ + pos0[1] = pos0[1] - half_width - bias; + pos1[1] = pos1[1] + half_width - bias; + pos2[1] = pos2[1] - half_width - bias; + pos3[1] = pos3[1] + half_width - bias; + if (pos0[0] < pos2[0]) { + /* left to right line */ + pos0[0] -= 0.5f; + pos1[0] -= 0.5f; + pos2[0] -= 0.5f; + pos3[0] -= 0.5f; + } + else { + /* right to left line */ + pos0[0] += 0.5f; + pos1[0] += 0.5f; + pos2[0] += 0.5f; + pos3[0] += 0.5f; + } + } + else { + /* y-major line */ + pos0[0] = pos0[0] - half_width + bias; + pos1[0] = pos1[0] + half_width + bias; + pos2[0] = pos2[0] - half_width + bias; + pos3[0] = pos3[0] + half_width + bias; + if (pos0[1] < pos2[1]) { + /* top to bottom line */ + pos0[1] -= 0.5f; + pos1[1] -= 0.5f; + pos2[1] -= 0.5f; + pos3[1] -= 0.5f; + } + else { + /* bottom to top line */ + pos0[1] += 0.5f; + pos1[1] += 0.5f; + pos2[1] += 0.5f; + pos3[1] += 0.5f; + } + } + + tri.det = header->det; /* only the sign matters */ + tri.v[0] = v0; + tri.v[1] = v2; + tri.v[2] = v3; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v0; + tri.v[1] = v3; + tri.v[2] = v1; + stage->next->tri( stage->next, &tri ); +} + + +static void wideline_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->next->flush( stage->next, flags ); +} + + +static void wideline_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void wideline_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) +{ + struct wideline_stage *wide = CALLOC_STRUCT(wideline_stage); + + draw_alloc_temp_verts( &wide->stage, 4 ); + + wide->stage.draw = draw; + wide->stage.next = NULL; + wide->stage.point = draw_pipe_passthrough_point; + wide->stage.line = wideline_line; + wide->stage.tri = draw_pipe_passthrough_tri; + wide->stage.flush = wideline_flush; + wide->stage.reset_stipple_counter = wideline_reset_stipple_counter; + wide->stage.destroy = wideline_destroy; + + return &wide->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c new file mode 100644 index 0000000000..e1af9e56a2 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -0,0 +1,297 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw_vs.h" +#include "draw_pipe.h" + + +struct widepoint_stage { + struct draw_stage stage; + + float half_point_size; + float point_size_min; + float point_size_max; + + float xbias; + float ybias; + + uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; + uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS]; + uint num_texcoords; + + int psize_slot; + + int point_coord_fs_input; /**< input for pointcoord (and fog) */ +}; + + + +static INLINE struct widepoint_stage * +widepoint_stage( struct draw_stage *stage ) +{ + return (struct widepoint_stage *)stage; +} + + + + +/** + * Set the vertex texcoords for sprite mode. + * Coords may be left untouched or set to a right-side-up or upside-down + * orientation. + */ +static void set_texcoords(const struct widepoint_stage *wide, + struct vertex_header *v, const float tc[4]) +{ + uint i; + for (i = 0; i < wide->num_texcoords; i++) { + if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) { + uint j = wide->texcoord_slot[i]; + v->data[j][0] = tc[0]; + if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT) + v->data[j][1] = 1.0f - tc[1]; + else + v->data[j][1] = tc[1]; + v->data[j][2] = tc[2]; + v->data[j][3] = tc[3]; + } + } + + if (wide->point_coord_fs_input >= 0) { + /* put gl_PointCoord into extra vertex output's zw components */ + uint k = wide->stage.draw->extra_vp_outputs.slot; + v->data[k][2] = tc[0]; + v->data[k][3] = tc[1]; + } +} + + +/* If there are lots of sprite points (and why wouldn't there be?) it + * would probably be more sensible to change hardware setup to + * optimize this rather than doing the whole thing in software like + * this. + */ +static void widepoint_point( struct draw_stage *stage, + struct prim_header *header ) +{ + const struct widepoint_stage *wide = widepoint_stage(stage); + const unsigned pos = stage->draw->vs.position_output; + const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; + float half_size; + float left_adj, right_adj, bot_adj, top_adj; + + struct prim_header tri; + + /* four dups of original vertex */ + struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); + struct vertex_header *v2 = dup_vert(stage, header->v[0], 2); + struct vertex_header *v3 = dup_vert(stage, header->v[0], 3); + + float *pos0 = v0->data[pos]; + float *pos1 = v1->data[pos]; + float *pos2 = v2->data[pos]; + float *pos3 = v3->data[pos]; + + /* point size is either per-vertex or fixed size */ + if (wide->psize_slot >= 0) { + half_size = header->v[0]->data[wide->psize_slot][0]; + + /* XXX: temporary -- do this in the vertex shader?? + */ + half_size = CLAMP(half_size, + wide->point_size_min, + wide->point_size_max); + + half_size *= 0.5f; + } + else { + half_size = wide->half_point_size; + } + + left_adj = -half_size + wide->xbias; + right_adj = half_size + wide->xbias; + bot_adj = half_size + wide->ybias; + top_adj = -half_size + wide->ybias; + + pos0[0] += left_adj; + pos0[1] += top_adj; + + pos1[0] += left_adj; + pos1[1] += bot_adj; + + pos2[0] += right_adj; + pos2[1] += top_adj; + + pos3[0] += right_adj; + pos3[1] += bot_adj; + + if (sprite) { + static const float tex00[4] = { 0, 0, 0, 1 }; + static const float tex01[4] = { 0, 1, 0, 1 }; + static const float tex11[4] = { 1, 1, 0, 1 }; + static const float tex10[4] = { 1, 0, 0, 1 }; + set_texcoords( wide, v0, tex00 ); + set_texcoords( wide, v1, tex01 ); + set_texcoords( wide, v2, tex10 ); + set_texcoords( wide, v3, tex11 ); + } + + tri.det = header->det; /* only the sign matters */ + tri.v[0] = v0; + tri.v[1] = v2; + tri.v[2] = v3; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v0; + tri.v[1] = v3; + tri.v[2] = v1; + stage->next->tri( stage->next, &tri ); +} + + +static void widepoint_first_point( struct draw_stage *stage, + struct prim_header *header ) +{ + struct widepoint_stage *wide = widepoint_stage(stage); + struct draw_context *draw = stage->draw; + + wide->half_point_size = 0.5f * draw->rasterizer->point_size; + wide->point_size_min = draw->rasterizer->point_size_min; + wide->point_size_max = draw->rasterizer->point_size_max; + wide->xbias = 0.0; + wide->ybias = 0.0; + + if (draw->rasterizer->gl_rasterization_rules) { + wide->xbias = 0.125; + } + + /* XXX we won't know the real size if it's computed by the vertex shader! */ + if ((draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) || + (draw->rasterizer->point_sprite && draw->pipeline.point_sprite)) { + stage->point = widepoint_point; + } + else { + stage->point = draw_pipe_passthrough_point; + } + + if (draw->rasterizer->point_sprite) { + /* find vertex shader texcoord outputs */ + const struct draw_vertex_shader *vs = draw->vs.vertex_shader; + uint i, j = 0; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { + wide->texcoord_slot[j] = i; + wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j]; + j++; + } + } + wide->num_texcoords = j; + + /* find fragment shader PointCoord/Fog input */ + wide->point_coord_fs_input = 0; /* XXX fix this! */ + + /* setup extra vp output (point coord implemented as a texcoord) */ + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = 0; + draw->extra_vp_outputs.slot = draw->vs.num_vs_outputs; + } + else { + wide->point_coord_fs_input = -1; + draw->extra_vp_outputs.slot = 0; + } + + wide->psize_slot = -1; + if (draw->rasterizer->point_size_per_vertex) { + /* find PSIZ vertex output */ + const struct draw_vertex_shader *vs = draw->vs.vertex_shader; + uint i; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + wide->psize_slot = i; + break; + } + } + } + + stage->point( stage, header ); +} + + +static void widepoint_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->point = widepoint_first_point; + stage->next->flush( stage->next, flags ); +} + + +static void widepoint_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void widepoint_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) +{ + struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage); + if (wide == NULL) + goto fail; + + if (!draw_alloc_temp_verts( &wide->stage, 4 )) + goto fail; + + wide->stage.draw = draw; + wide->stage.next = NULL; + wide->stage.point = widepoint_first_point; + wide->stage.line = draw_pipe_passthrough_line; + wide->stage.tri = draw_pipe_passthrough_tri; + wide->stage.flush = widepoint_flush; + wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter; + wide->stage.destroy = widepoint_destroy; + + return &wide->stage; + + fail: + if (wide) + wide->stage.destroy( &wide->stage ); + + return NULL; +} diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h new file mode 100644 index 0000000000..5d531146c5 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -0,0 +1,313 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Private data structures, etc for the draw module. + */ + + +/** + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + + +#ifndef DRAW_PRIVATE_H +#define DRAW_PRIVATE_H + + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" + +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_scan.h" + + +struct pipe_context; +struct gallivm_prog; +struct gallivm_cpu_engine; +struct draw_vertex_shader; +struct draw_context; +struct draw_stage; +struct vbuf_render; + + +/** + * Basic vertex info. + * Carry some useful information around with the vertices in the prim pipe. + */ +struct vertex_header { + unsigned clipmask:12; + unsigned edgeflag:1; + unsigned pad:3; + unsigned vertex_id:16; + + float clip[4]; + + /* This will probably become float (*data)[4] soon: + */ + float data[][4]; +}; + +/* NOTE: It should match vertex_id size above */ +#define UNDEFINED_VERTEX_ID 0xffff + + +/** + * Private context for the drawing module. + */ +struct draw_context +{ + /** Drawing/primitive pipeline stages */ + struct { + struct draw_stage *first; /**< one of the following */ + + struct draw_stage *validate; + + /* stages (in logical order) */ + struct draw_stage *flatshade; + struct draw_stage *clip; + struct draw_stage *cull; + struct draw_stage *twoside; + struct draw_stage *offset; + struct draw_stage *unfilled; + struct draw_stage *stipple; + struct draw_stage *aapoint; + struct draw_stage *aaline; + struct draw_stage *pstipple; + struct draw_stage *wide_line; + struct draw_stage *wide_point; + struct draw_stage *rasterize; + + float wide_point_threshold; /**< convert pnts to tris if larger than this */ + float wide_line_threshold; /**< convert lines to tris if wider than this */ + boolean line_stipple; /**< do line stipple? */ + boolean point_sprite; /**< convert points to quads for sprites? */ + + /* Temporary storage while the pipeline is being run: + */ + char *verts; + unsigned vertex_stride; + unsigned vertex_count; + } pipeline; + + + struct vbuf_render *render; + + /* Support prototype passthrough path: + */ + struct { + struct { + struct draw_pt_middle_end *fetch_emit; + struct draw_pt_middle_end *fetch_shade_emit; + struct draw_pt_middle_end *general; + } middle; + + struct { + struct draw_pt_front_end *vcache; + struct draw_pt_front_end *varray; + } front; + + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_buffers; + + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_elements; + + /* user-space vertex data, buffers */ + struct { + const unsigned *edgeflag; + + /** vertex element/index buffer (ex: glDrawElements) */ + const void *elts; + /** bytes per index (0, 1, 2 or 4) */ + unsigned eltSize; + unsigned min_index; + unsigned max_index; + + /** vertex arrays */ + const void *vbuffer[PIPE_MAX_ATTRIBS]; + + /** constant buffer (for vertex shader) */ + const void *constants; + } user; + + boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */ + boolean no_fse; /* disable FSE even when it is correct */ + } pt; + + struct { + boolean bypass_clipping; + boolean bypass_vs; + } driver; + + boolean flushing; /**< debugging/sanity */ + boolean suspend_flushing; /**< internally set */ + boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */ + + boolean force_passthrough; /**< never clip or shade */ + + /* pipe state that we need: */ + const struct pipe_rasterizer_state *rasterizer; + struct pipe_viewport_state viewport; + boolean identity_viewport; + + struct { + struct draw_vertex_shader *vertex_shader; + uint num_vs_outputs; /**< convenience, from vertex_shader */ + uint position_output; + + /** TGSI program interpreter runtime state */ + struct tgsi_exec_machine machine; + + /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. + */ + struct gallivm_cpu_engine *engine; + + /* Here's another one: + */ + struct aos_machine *aos_machine; + + + const float (*aligned_constants)[4]; + + const float (*aligned_constant_storage)[4]; + unsigned const_storage_size; + + + struct translate *fetch; + struct translate_cache *fetch_cache; + struct translate *emit; + struct translate_cache *emit_cache; + } vs; + + /* Clip derived state: + */ + float plane[12][4]; + unsigned nr_planes; + + /* If a prim stage introduces new vertex attributes, they'll be stored here + */ + struct { + uint semantic_name; + uint semantic_index; + int slot; + } extra_vp_outputs; + + unsigned reduced_prim; + + void *driver_private; +}; + + +/******************************************************************************* + * Vertex shader code: + */ +boolean draw_vs_init( struct draw_context *draw ); +void draw_vs_destroy( struct draw_context *draw ); + +void draw_vs_set_viewport( struct draw_context *, + const struct pipe_viewport_state * ); + +void draw_vs_set_constants( struct draw_context *, + const float (*constants)[4], + unsigned size ); + + + + +/******************************************************************************* + * Vertex processing (was passthrough) code: + */ +boolean draw_pt_init( struct draw_context *draw ); +void draw_pt_destroy( struct draw_context *draw ); +void draw_pt_reset_vertex_ids( struct draw_context *draw ); + + +/******************************************************************************* + * Primitive processing (pipeline) code: + */ + +boolean draw_pipeline_init( struct draw_context *draw ); +void draw_pipeline_destroy( struct draw_context *draw ); + + + + + +/* We use the top few bits in the elts[] parameter to convey a little + * API information. This limits the number of vertices we can address + * to only 4096 -- if that becomes a problem, we can switch to 32-bit + * draw indices. + * + * These flags expected at first vertex of lines & triangles when + * unfilled and/or line stipple modes are operational. + */ +#define DRAW_PIPE_MAX_VERTICES (0x1<<12) +#define DRAW_PIPE_EDGE_FLAG_0 (0x1<<12) +#define DRAW_PIPE_EDGE_FLAG_1 (0x2<<12) +#define DRAW_PIPE_EDGE_FLAG_2 (0x4<<12) +#define DRAW_PIPE_EDGE_FLAG_ALL (0x7<<12) +#define DRAW_PIPE_RESET_STIPPLE (0x8<<12) +#define DRAW_PIPE_FLAG_MASK (0xf<<12) + +void draw_pipeline_run( struct draw_context *draw, + unsigned prim, + struct vertex_header *vertices, + unsigned vertex_count, + unsigned stride, + const ushort *elts, + unsigned count ); + +void draw_pipeline_run_linear( struct draw_context *draw, + unsigned prim, + struct vertex_header *vertices, + unsigned count, + unsigned stride ); + + + +void draw_pipeline_flush( struct draw_context *draw, + unsigned flags ); + + + +/******************************************************************************* + * Flushing + */ + +#define DRAW_FLUSH_STATE_CHANGE 0x8 +#define DRAW_FLUSH_BACKEND 0x10 + + +void draw_do_flush( struct draw_context *draw, unsigned flags ); + + + + +#endif /* DRAW_PRIVATE_H */ diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c new file mode 100644 index 0000000000..87ec6ae20c --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -0,0 +1,209 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_pt.h" + +static unsigned trim( unsigned count, unsigned first, unsigned incr ) +{ + if (count < first) + return 0; + return count - (count - first) % incr; +} + + + +/* Overall we split things into: + * - frontend -- prepare fetch_elts, draw_elts - eg vcache + * - middle -- fetch, shade, cliptest, viewport + * - pipeline -- the prim pipeline: clipping, wide lines, etc + * - backend -- the vbuf_render provided by the driver. + */ +static boolean +draw_pt_arrays(struct draw_context *draw, + unsigned prim, + unsigned start, + unsigned count) +{ + struct draw_pt_front_end *frontend = NULL; + struct draw_pt_middle_end *middle = NULL; + unsigned opt = 0; + + /* Sanitize primitive length: + */ + { + unsigned first, incr; + draw_pt_split_prim(prim, &first, &incr); + count = trim(count, first, incr); + if (count < first) + return TRUE; + } + + if (!draw->force_passthrough) { + if (!draw->render) { + opt |= PT_PIPELINE; + } + + if (draw_need_pipeline(draw, + draw->rasterizer, + prim)) { + opt |= PT_PIPELINE; + } + + if (!draw->bypass_clipping && !draw->pt.test_fse) { + opt |= PT_CLIPTEST; + } + + if (!draw->rasterizer->bypass_vs) { + opt |= PT_SHADE; + } + } + + if (opt == 0) + middle = draw->pt.middle.fetch_emit; + else if (opt == PT_SHADE && !draw->pt.no_fse) + middle = draw->pt.middle.fetch_shade_emit; + else + middle = draw->pt.middle.general; + + + /* Pick the right frontend + */ + if (draw->pt.user.elts || (opt & PT_PIPELINE)) { + frontend = draw->pt.front.vcache; + } else { + frontend = draw->pt.front.varray; + } + + frontend->prepare( frontend, prim, middle, opt ); + + frontend->run(frontend, + draw_pt_elt_func(draw), + draw_pt_elt_ptr(draw, start), + count); + + frontend->finish( frontend ); + + return TRUE; +} + + +boolean draw_pt_init( struct draw_context *draw ) +{ + draw->pt.test_fse = debug_get_bool_option("DRAW_FSE", FALSE); + draw->pt.no_fse = debug_get_bool_option("DRAW_NO_FSE", FALSE); + + draw->pt.front.vcache = draw_pt_vcache( draw ); + if (!draw->pt.front.vcache) + return FALSE; + + draw->pt.front.varray = draw_pt_varray(draw); + if (!draw->pt.front.varray) + return FALSE; + + draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw ); + if (!draw->pt.middle.fetch_emit) + return FALSE; + + draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw ); + if (!draw->pt.middle.fetch_shade_emit) + return FALSE; + + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); + if (!draw->pt.middle.general) + return FALSE; + + return TRUE; +} + + +void draw_pt_destroy( struct draw_context *draw ) +{ + if (draw->pt.middle.general) { + draw->pt.middle.general->destroy( draw->pt.middle.general ); + draw->pt.middle.general = NULL; + } + + if (draw->pt.middle.fetch_emit) { + draw->pt.middle.fetch_emit->destroy( draw->pt.middle.fetch_emit ); + draw->pt.middle.fetch_emit = NULL; + } + + if (draw->pt.middle.fetch_shade_emit) { + draw->pt.middle.fetch_shade_emit->destroy( draw->pt.middle.fetch_shade_emit ); + draw->pt.middle.fetch_shade_emit = NULL; + } + + if (draw->pt.front.vcache) { + draw->pt.front.vcache->destroy( draw->pt.front.vcache ); + draw->pt.front.vcache = NULL; + } + + if (draw->pt.front.varray) { + draw->pt.front.varray->destroy( draw->pt.front.varray ); + draw->pt.front.varray = NULL; + } +} + + + + +/** + * Draw vertex arrays + * This is the main entrypoint into the drawing module. + * \param prim one of PIPE_PRIM_x + * \param start index of first vertex to draw + * \param count number of vertices to draw + */ +void +draw_arrays(struct draw_context *draw, unsigned prim, + unsigned start, unsigned count) +{ + unsigned reduced_prim = draw_pt_reduced_prim(prim); + if (reduced_prim != draw->reduced_prim) { + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->reduced_prim = reduced_prim; + } + + /* drawing done here: */ + draw_pt_arrays(draw, prim, start, count); +} + +boolean draw_pt_get_edgeflag( struct draw_context *draw, + unsigned idx ) +{ + if (draw->pt.user.edgeflag) + return (draw->pt.user.edgeflag[idx/32] & (1 << (idx%32))) != 0; + else + return 1; +} diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h new file mode 100644 index 0000000000..c02f229110 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -0,0 +1,235 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef DRAW_PT_H +#define DRAW_PT_H + +#include "pipe/p_compiler.h" + +typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx ); + +struct draw_pt_middle_end; +struct draw_context; + + +#define PT_SHADE 0x1 +#define PT_CLIPTEST 0x2 +#define PT_PIPELINE 0x4 +#define PT_MAX_MIDDLE 0x8 + + +/* The "front end" - prepare sets of fetch, draw elements for the + * middle end. + * + * Currenly one version of this: + * - vcache - catchall implementation, decomposes to TRI/LINE/POINT prims + * Later: + * - varray, varray_split + * - velement, velement_split + * + * Currenly only using the vcache version. + */ +struct draw_pt_front_end { + void (*prepare)( struct draw_pt_front_end *, + unsigned prim, + struct draw_pt_middle_end *, + unsigned opt ); + + void (*run)( struct draw_pt_front_end *, + pt_elt_func elt_func, + const void *elt_ptr, + unsigned count ); + + void (*finish)( struct draw_pt_front_end * ); + void (*destroy)( struct draw_pt_front_end * ); +}; + + +/* The "middle end" - prepares actual hardware vertices for the + * hardware backend. + * + * Currently two versions of this: + * - fetch, vertex shade, cliptest, prim-pipeline + * - fetch, emit (ie passthrough) + */ +struct draw_pt_middle_end { + void (*prepare)( struct draw_pt_middle_end *, + unsigned prim, + unsigned opt, + unsigned *max_vertices ); + + void (*run)( struct draw_pt_middle_end *, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ); + + void (*run_linear)(struct draw_pt_middle_end *, + unsigned start, + unsigned count); + + /* Transform all vertices in a linear range and then draw them with + * the supplied element list. May fail and return FALSE. + */ + boolean (*run_linear_elts)( struct draw_pt_middle_end *, + unsigned fetch_start, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ); + + int (*get_max_vertex_count)( struct draw_pt_middle_end * ); + + void (*finish)( struct draw_pt_middle_end * ); + void (*destroy)( struct draw_pt_middle_end * ); +}; + + +/* The "back end" - supplied by the driver, defined in draw_vbuf.h. + */ +struct vbuf_render; +struct vertex_header; + + +/* Helper functions. + */ +pt_elt_func draw_pt_elt_func( struct draw_context *draw ); +const void *draw_pt_elt_ptr( struct draw_context *draw, + unsigned start ); + +/* Frontends: + * + * Currently only the general-purpose vcache implementation, could add + * a special case for tiny vertex buffers. + */ +struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); +struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); + + +/* Middle-ends: + * + * Currently one general-purpose case which can do all possibilities, + * at the slight expense of creating a vertex_header in some cases + * unecessarily. + * + * The special case fetch_emit code avoids pipeline vertices + * altogether and builds hardware vertices directly from API + * vertex_elements. + */ +struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); +struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ); +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); + + +/* More helpers: + */ +boolean draw_pt_get_edgeflag( struct draw_context *draw, + unsigned idx ); + + +/******************************************************************************* + * HW vertex emit: + */ +struct pt_emit; + +void draw_pt_emit_prepare( struct pt_emit *emit, + unsigned prim, + unsigned *max_vertices ); + +void draw_pt_emit( struct pt_emit *emit, + const float (*vertex_data)[4], + unsigned vertex_count, + unsigned stride, + const ushort *elts, + unsigned count ); + +void draw_pt_emit_linear( struct pt_emit *emit, + const float (*vertex_data)[4], + unsigned vertex_count, + unsigned stride, + unsigned start, + unsigned count ); + +void draw_pt_emit_destroy( struct pt_emit *emit ); + +struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); + + +/******************************************************************************* + * API vertex fetch: + */ + +struct pt_fetch; +void draw_pt_fetch_prepare( struct pt_fetch *fetch, + unsigned vertex_size ); + +void draw_pt_fetch_run( struct pt_fetch *fetch, + const unsigned *elts, + unsigned count, + char *verts ); + +void draw_pt_fetch_run_linear( struct pt_fetch *fetch, + unsigned start, + unsigned count, + char *verts ); + +void draw_pt_fetch_destroy( struct pt_fetch *fetch ); + +struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ); + +/******************************************************************************* + * Post-VS: cliptest, rhw, viewport + */ +struct pt_post_vs; + +boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, + struct vertex_header *pipeline_verts, + unsigned stride, + unsigned count ); + +void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, + boolean bypass_clipping, + boolean identity_viewport, + boolean opengl ); + +struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ); + +void draw_pt_post_vs_destroy( struct pt_post_vs *pvs ); + + +/******************************************************************************* + * Utils: + */ +void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr); +unsigned draw_pt_reduced_prim(unsigned prim); + + +#endif diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h new file mode 100644 index 0000000000..3fb0695687 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h @@ -0,0 +1,153 @@ + + +static void FUNC( ARGS, + unsigned count ) +{ + LOCAL_VARS; + + switch (prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i ++) { + POINT( (i + 0) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) { + LINE( DRAW_PIPE_RESET_STIPPLE, + (i + 0), + (i + 1)); + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + flags = DRAW_PIPE_RESET_STIPPLE; + + for (i = 1; i < count; i++, flags = 0) { + LINE( flags, + (i - 1), + (i )); + } + + LINE( flags, + (i - 1), + (0 )); + } + break; + + case PIPE_PRIM_LINE_STRIP: + flags = DRAW_PIPE_RESET_STIPPLE; + for (i = 1; i < count; i++, flags = 0) { + LINE( flags, + (i - 1), + (i )); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) { + TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + (i + 0), + (i + 1), + (i + 2 )); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + (i + 0), + (i + 1 + (i&1)), + (i + 2 - (i&1))); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + (i + 0 + (i&1)), + (i + 1 - (i&1)), + (i + 2 )); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + (i + 1), + (i + 2), + (0 )); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + (0), + (i + 1), + (i + 2 )); + } + } + } + break; + + + case PIPE_PRIM_QUADS: + for (i = 0; i+3 < count; i += 4) { + QUAD( (i + 0), + (i + 1), + (i + 2), + (i + 3)); + } + break; + + case PIPE_PRIM_QUAD_STRIP: + for (i = 0; i+3 < count; i += 2) { + QUAD( (i + 2), + (i + 0), + (i + 1), + (i + 3)); + } + break; + + case PIPE_PRIM_POLYGON: + { + /* These bitflags look a little odd because we submit the + * vertices as (1,2,0) to satisfy flatshade requirements. + */ + const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; + const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; + + flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + + for (i = 0; i+2 < count; i++, flags = edge_middle) { + + if (i + 3 == count) + flags |= edge_last; + + TRIANGLE( flags, + (i + 1), + (i + 2), + (0)); + } + } + break; + + default: + assert(0); + break; + } + + FLUSH; +} + + +#undef TRIANGLE +#undef QUAD +#undef POINT +#undef LINE +#undef FUNC diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c new file mode 100644 index 0000000000..b7780fb507 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_elts.c @@ -0,0 +1,88 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "draw/draw_pt.h" +#include "draw/draw_private.h" + +/* Neat get_elt func that also works for varrays drawing by encoding + * the start value into a pointer. + */ + +static unsigned elt_uint( const void *elts, unsigned idx ) +{ + return *(((const uint *)elts) + idx); +} + +static unsigned elt_ushort( const void *elts, unsigned idx ) +{ + return *(((const ushort *)elts) + idx); +} + +static unsigned elt_ubyte( const void *elts, unsigned idx ) +{ + return *(((const ubyte *)elts) + idx); +} + +static unsigned elt_vert( const void *elts, unsigned idx ) +{ + return (const ubyte *)elts - (const ubyte *)NULL + idx; +} + +pt_elt_func draw_pt_elt_func( struct draw_context *draw ) +{ + switch (draw->pt.user.eltSize) { + case 0: return &elt_vert; + case 1: return &elt_ubyte; + case 2: return &elt_ushort; + case 4: return &elt_uint; + default: return NULL; + } +} + +const void *draw_pt_elt_ptr( struct draw_context *draw, + unsigned start ) +{ + const char *elts = draw->pt.user.elts; + + switch (draw->pt.user.eltSize) { + case 0: + return (const void *)(((const ubyte *)NULL) + start); + case 1: + return (const void *)(((const ubyte *)elts) + start); + case 2: + return (const void *)(((const ushort *)elts) + start); + case 4: + return (const void *)(((const uint *)elts) + start); + default: + return NULL; + } +} diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c new file mode 100644 index 0000000000..d520b05869 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -0,0 +1,298 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" + +struct pt_emit { + struct draw_context *draw; + + struct translate *translate; + + struct translate_cache *cache; + unsigned prim; + + const struct vertex_info *vinfo; +}; + +void draw_pt_emit_prepare( struct pt_emit *emit, + unsigned prim, + unsigned *max_vertices ) +{ + struct draw_context *draw = emit->draw; + const struct vertex_info *vinfo; + unsigned dst_offset; + struct translate_key hw_key; + unsigned i; + boolean ok; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + + /* XXX: may need to defensively reset this later on as clipping can + * clobber this state in the render backend. + */ + emit->prim = prim; + + ok = draw->render->set_primitive(draw->render, emit->prim); + if (!ok) { + assert(0); + return; + } + + /* Must do this after set_primitive() above: + */ + emit->vinfo = vinfo = draw->render->get_vertex_info(draw->render); + + + /* Translate from pipeline vertices to hw vertices. + */ + dst_offset = 0; + for (i = 0; i < vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned src_buffer = 0; + unsigned output_format; + unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) ); + + + + switch (vinfo->attrib[i].emit) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + src_buffer = 1; + src_offset = 0; + break; + case EMIT_4UB: + output_format = PIPE_FORMAT_B8G8R8A8_UNORM; + emit_sz = 4 * sizeof(ubyte); + break; + default: + assert(0); + output_format = PIPE_FORMAT_NONE; + emit_sz = 0; + break; + } + + hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + hw_key.element[i].input_buffer = src_buffer; + hw_key.element[i].input_offset = src_offset; + hw_key.element[i].output_format = output_format; + hw_key.element[i].output_offset = dst_offset; + + dst_offset += emit_sz; + } + + hw_key.nr_elements = vinfo->num_attribs; + hw_key.output_stride = vinfo->size * 4; + + if (!emit->translate || + translate_key_compare(&emit->translate->key, &hw_key) != 0) + { + translate_key_sanitize(&hw_key); + emit->translate = translate_cache_find(emit->cache, &hw_key); + } + + *max_vertices = (draw->render->max_vertex_buffer_bytes / + (vinfo->size * 4)); + + /* even number */ + *max_vertices = *max_vertices & ~1; +} + + +void draw_pt_emit( struct pt_emit *emit, + const float (*vertex_data)[4], + unsigned vertex_count, + unsigned stride, + const ushort *elts, + unsigned count ) +{ + struct draw_context *draw = emit->draw; + struct translate *translate = emit->translate; + struct vbuf_render *render = draw->render; + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + /* XXX: and work out some way to coordinate the render primitive + * between vbuf.c and here... + */ + if (!draw->render->set_primitive(draw->render, emit->prim)) { + assert(0); + return; + } + + hw_verts = render->allocate_vertices(render, + (ushort)translate->key.output_stride, + (ushort)vertex_count); + if (!hw_verts) { + assert(0); + return; + } + + translate->set_buffer(translate, + 0, + vertex_data, + stride ); + + translate->set_buffer(translate, + 1, + &draw->rasterizer->point_size, + 0); + + translate->run( translate, + 0, + vertex_count, + hw_verts ); + + render->draw(render, + elts, + count); + + render->release_vertices(render, + hw_verts, + translate->key.output_stride, + vertex_count); +} + + +void draw_pt_emit_linear(struct pt_emit *emit, + const float (*vertex_data)[4], + unsigned vertex_count, + unsigned stride, + unsigned start, + unsigned count) +{ + struct draw_context *draw = emit->draw; + struct translate *translate = emit->translate; + struct vbuf_render *render = draw->render; + void *hw_verts; + +#if 0 + debug_printf("Linear emit\n"); +#endif + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + /* XXX: and work out some way to coordinate the render primitive + * between vbuf.c and here... + */ + if (!draw->render->set_primitive(draw->render, emit->prim)) { + assert(0); + return; + } + + hw_verts = render->allocate_vertices(render, + (ushort)translate->key.output_stride, + (ushort)count); + if (!hw_verts) { + assert(0); + return; + } + + translate->set_buffer(translate, 0, + vertex_data, stride); + + translate->set_buffer(translate, 1, + &draw->rasterizer->point_size, + 0); + + translate->run(translate, + 0, + vertex_count, + hw_verts); + + if (0) { + unsigned i; + for (i = 0; i < vertex_count; i++) { + debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i); + draw_dump_emitted_vertex( emit->vinfo, + (const uint8_t *)hw_verts + + translate->key.output_stride * i ); + } + } + + + render->draw_arrays(render, start, count); + + render->release_vertices(render, + hw_verts, + translate->key.output_stride, + vertex_count); +} + +struct pt_emit *draw_pt_emit_create( struct draw_context *draw ) +{ + struct pt_emit *emit = CALLOC_STRUCT(pt_emit); + if (!emit) + return NULL; + + emit->draw = draw; + emit->cache = translate_cache_create(); + if (!emit->cache) { + FREE(emit); + return NULL; + } + + return emit; +} + +void draw_pt_emit_destroy( struct pt_emit *emit ) +{ + if (emit->cache) + translate_cache_destroy(emit->cache); + + FREE(emit); +} diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c new file mode 100644 index 0000000000..6377f896fb --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -0,0 +1,228 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" + + +struct pt_fetch { + struct draw_context *draw; + + struct translate *translate; + + unsigned vertex_size; + boolean need_edgeflags; + + struct translate_cache *cache; +}; + +/* Perform the fetch from API vertex elements & vertex buffers, to a + * contiguous set of float[4] attributes as required for the + * vertex_shader->run_linear() method. + * + * This is used in all cases except pure passthrough + * (draw_pt_fetch_emit.c) which has its own version to translate + * directly to hw vertices. + * + */ +void draw_pt_fetch_prepare( struct pt_fetch *fetch, + unsigned vertex_size ) +{ + struct draw_context *draw = fetch->draw; + unsigned i, nr = 0; + unsigned dst_offset = 0; + struct translate_key key; + + fetch->vertex_size = vertex_size; + + /* Always emit/leave space for a vertex header. + * + * It's worth considering whether the vertex headers should contain + * a pointer to the 'data', rather than having it inline. + * Something to look at after we've fully switched over to the pt + * paths. + */ + { + /* Need to set header->vertex_id = 0xffff somehow. + */ + key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; + key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; + key.element[nr].input_offset = 0; + key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT; + key.element[nr].output_offset = dst_offset; + dst_offset += 1 * sizeof(float); + nr++; + + + /* Just leave the clip[] array untouched. + */ + dst_offset += 4 * sizeof(float); + } + + + for (i = 0; i < draw->pt.nr_vertex_elements; i++) { + key.element[nr].input_format = draw->pt.vertex_element[i].src_format; + key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index; + key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset; + key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + key.element[nr].output_offset = dst_offset; + + dst_offset += 4 * sizeof(float); + nr++; + } + + assert(dst_offset <= vertex_size); + + key.nr_elements = nr; + key.output_stride = vertex_size; + + + if (!fetch->translate || + translate_key_compare(&fetch->translate->key, &key) != 0) + { + translate_key_sanitize(&key); + fetch->translate = translate_cache_find(fetch->cache, &key); + + { + static struct vertex_header vh = { 0, 1, 0, 0xffff }; + fetch->translate->set_buffer(fetch->translate, + draw->pt.nr_vertex_buffers, + &vh, + 0); + } + } + + fetch->need_edgeflags = ((draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) && + draw->pt.user.edgeflag); +} + + + + +void draw_pt_fetch_run( struct pt_fetch *fetch, + const unsigned *elts, + unsigned count, + char *verts ) +{ + struct draw_context *draw = fetch->draw; + struct translate *translate = fetch->translate; + unsigned i; + + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + translate->set_buffer(translate, + i, + ((char *)draw->pt.user.vbuffer[i] + + draw->pt.vertex_buffer[i].buffer_offset), + draw->pt.vertex_buffer[i].pitch ); + } + + translate->run_elts( translate, + elts, + count, + verts ); + + /* Edgeflags are hard to fit into a translate program, populate + * them separately if required. In the setup above they are + * defaulted to one, so only need this if there is reason to change + * that default: + */ + if (fetch->need_edgeflags) { + for (i = 0; i < count; i++) { + struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size); + vh->edgeflag = draw_pt_get_edgeflag( draw, elts[i] ); + } + } +} + + +void draw_pt_fetch_run_linear( struct pt_fetch *fetch, + unsigned start, + unsigned count, + char *verts ) +{ + struct draw_context *draw = fetch->draw; + struct translate *translate = fetch->translate; + unsigned i; + + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + translate->set_buffer(translate, + i, + ((char *)draw->pt.user.vbuffer[i] + + draw->pt.vertex_buffer[i].buffer_offset), + draw->pt.vertex_buffer[i].pitch ); + } + + translate->run( translate, + start, + count, + verts ); + + /* Edgeflags are hard to fit into a translate program, populate + * them separately if required. In the setup above they are + * defaulted to one, so only need this if there is reason to change + * that default: + */ + if (fetch->need_edgeflags) { + for (i = 0; i < count; i++) { + struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size); + vh->edgeflag = draw_pt_get_edgeflag( draw, start + i ); + } + } +} + + +struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ) +{ + struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch); + if (!fetch) + return NULL; + + fetch->draw = draw; + fetch->cache = translate_cache_create(); + if (!fetch->cache) { + FREE(fetch); + return NULL; + } + + return fetch; +} + +void draw_pt_fetch_destroy( struct pt_fetch *fetch ) +{ + if (fetch->cache) + translate_cache_destroy(fetch->cache); + + FREE(fetch); +} + diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c new file mode 100644 index 0000000000..3966ad48ba --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -0,0 +1,413 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" + +/* The simplest 'middle end' in the new vertex code. + * + * The responsibilities of a middle end are to: + * - perform vertex fetch using + * - draw vertex element/buffer state + * - a list of fetch indices we received as an input + * - run the vertex shader + * - cliptest, + * - clip coord calculation + * - viewport transformation + * - if necessary, run the primitive pipeline, passing it: + * - a linear array of vertex_header vertices constructed here + * - a set of draw indices we received as an input + * - otherwise, drive the hw backend, + * - allocate space for hardware format vertices + * - translate the vertex-shader output vertices to hw format + * - calling the backend draw functions. + * + * For convenience, we provide a helper function to drive the hardware + * backend given similar inputs to those required to run the pipeline. + * + * In the case of passthrough mode, many of these actions are disabled + * or noops, so we end up doing: + * + * - perform vertex fetch + * - drive the hw backend + * + * IE, basically just vertex fetch to post-vs-format vertices, + * followed by a call to the backend helper function. + */ + + +struct fetch_emit_middle_end { + struct draw_pt_middle_end base; + struct draw_context *draw; + + struct translate *translate; + const struct vertex_info *vinfo; + + /* Cache point size somewhere it's address won't change: + */ + float point_size; + + struct translate_cache *cache; +}; + + + + +static void fetch_emit_prepare( struct draw_pt_middle_end *middle, + unsigned prim, + unsigned opt, + unsigned *max_vertices ) +{ + struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; + struct draw_context *draw = feme->draw; + const struct vertex_info *vinfo; + unsigned i, dst_offset; + boolean ok; + struct translate_key key; + + + ok = draw->render->set_primitive( draw->render, + prim ); + if (!ok) { + assert(0); + return; + } + + /* Must do this after set_primitive() above: + */ + vinfo = feme->vinfo = draw->render->get_vertex_info(draw->render); + + + + /* Transform from API vertices to HW vertices, skipping the + * pipeline_vertex intermediate step. + */ + dst_offset = 0; + memset(&key, 0, sizeof(key)); + + for (i = 0; i < vinfo->num_attribs; i++) { + const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->attrib[i].src_index]; + + unsigned emit_sz = 0; + unsigned input_format = src->src_format; + unsigned input_buffer = src->vertex_buffer_index; + unsigned input_offset = src->src_offset; + unsigned output_format; + + switch (vinfo->attrib[i].emit) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + input_format = PIPE_FORMAT_R32_FLOAT; + input_buffer = draw->pt.nr_vertex_buffers; + input_offset = 0; + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + default: + assert(0); + output_format = PIPE_FORMAT_NONE; + emit_sz = 0; + continue; + } + + key.element[i].input_format = input_format; + key.element[i].input_buffer = input_buffer; + key.element[i].input_offset = input_offset; + key.element[i].output_format = output_format; + key.element[i].output_offset = dst_offset; + + dst_offset += emit_sz; + } + + key.nr_elements = vinfo->num_attribs; + key.output_stride = vinfo->size * 4; + + /* Don't bother with caching at this stage: + */ + if (!feme->translate || + translate_key_compare(&feme->translate->key, &key) != 0) + { + translate_key_sanitize(&key); + feme->translate = translate_cache_find(feme->cache, + &key); + + + feme->translate->set_buffer(feme->translate, + draw->pt.nr_vertex_buffers, + &feme->point_size, + 0); + } + + feme->point_size = draw->rasterizer->point_size; + + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + feme->translate->set_buffer(feme->translate, + i, + ((char *)draw->pt.user.vbuffer[i] + + draw->pt.vertex_buffer[i].buffer_offset), + draw->pt.vertex_buffer[i].pitch ); + } + + *max_vertices = (draw->render->max_vertex_buffer_bytes / + (vinfo->size * 4)); + + /* Return an even number of verts. + * This prevents "parity" errors when splitting long triangle strips which + * can lead to front/back culling mix-ups. + * Every other triangle in a strip has an alternate front/back orientation + * so splitting at an odd position can cause the orientation of subsequent + * triangles to get reversed. + */ + *max_vertices = *max_vertices & ~1; +} + + + + + +static void fetch_emit_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; + struct draw_context *draw = feme->draw; + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)feme->translate->key.output_stride, + (ushort)fetch_count ); + if (!hw_verts) { + assert(0); + return; + } + + + /* Single routine to fetch vertices and emit HW verts. + */ + feme->translate->run_elts( feme->translate, + fetch_elts, + fetch_count, + hw_verts ); + + if (0) { + unsigned i; + for (i = 0; i < fetch_count; i++) { + debug_printf("\n\nvertex %d:\n", i); + draw_dump_emitted_vertex( feme->vinfo, + (const uint8_t *)hw_verts + feme->vinfo->size * 4 * i ); + } + } + + /* XXX: Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw( draw->render, + draw_elts, + draw_count ); + + /* Done -- that was easy, wasn't it: + */ + draw->render->release_vertices( draw->render, + hw_verts, + feme->translate->key.output_stride, + fetch_count ); + +} + + +static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count ) +{ + struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; + struct draw_context *draw = feme->draw; + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)feme->translate->key.output_stride, + (ushort)count ); + if (!hw_verts) { + assert(0); + return; + } + + /* Single routine to fetch vertices and emit HW verts. + */ + feme->translate->run( feme->translate, + start, + count, + hw_verts ); + + if (0) { + unsigned i; + for (i = 0; i < count; i++) { + debug_printf("\n\nvertex %d:\n", i); + draw_dump_emitted_vertex( feme->vinfo, + (const uint8_t *)hw_verts + feme->vinfo->size * 4 * i ); + } + } + + /* XXX: Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw_arrays( draw->render, + 0, /*start*/ + count ); + + /* Done -- that was easy, wasn't it: + */ + draw->render->release_vertices( draw->render, + hw_verts, + feme->translate->key.output_stride, + count ); + +} + + +static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; + struct draw_context *draw = feme->draw; + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)feme->translate->key.output_stride, + (ushort)count ); + if (!hw_verts) + return FALSE; + + /* Single routine to fetch vertices and emit HW verts. + */ + feme->translate->run( feme->translate, + start, + count, + hw_verts ); + + /* XXX: Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw( draw->render, + draw_elts, + draw_count ); + + /* Done -- that was easy, wasn't it: + */ + draw->render->release_vertices( draw->render, + hw_verts, + feme->translate->key.output_stride, + count ); + + return TRUE; +} + + + + +static void fetch_emit_finish( struct draw_pt_middle_end *middle ) +{ + /* nothing to do */ +} + +static void fetch_emit_destroy( struct draw_pt_middle_end *middle ) +{ + struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; + + if (feme->cache) + translate_cache_destroy(feme->cache); + + FREE(middle); +} + + +struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ) +{ + struct fetch_emit_middle_end *fetch_emit = CALLOC_STRUCT( fetch_emit_middle_end ); + if (fetch_emit == NULL) + return NULL; + + fetch_emit->cache = translate_cache_create(); + if (!fetch_emit->cache) { + FREE(fetch_emit); + return NULL; + } + + fetch_emit->base.prepare = fetch_emit_prepare; + fetch_emit->base.run = fetch_emit_run; + fetch_emit->base.run_linear = fetch_emit_run_linear; + fetch_emit->base.run_linear_elts = fetch_emit_run_linear_elts; + fetch_emit->base.finish = fetch_emit_finish; + fetch_emit->base.destroy = fetch_emit_destroy; + + fetch_emit->draw = draw; + + return &fetch_emit->base; +} + diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c new file mode 100644 index 0000000000..f7e6a1a8ee --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -0,0 +1,409 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "draw/draw_vs.h" + +#include "translate/translate.h" + +struct fetch_shade_emit; + + +/* Prototype fetch, shade, emit-hw-verts all in one go. + */ +struct fetch_shade_emit { + struct draw_pt_middle_end base; + struct draw_context *draw; + + + /* Temporaries: + */ + const float *constants; + unsigned pitch[PIPE_MAX_ATTRIBS]; + const ubyte *src[PIPE_MAX_ATTRIBS]; + unsigned prim; + + struct draw_vs_varient_key key; + struct draw_vs_varient *active; + + + const struct vertex_info *vinfo; +}; + + + + +static void fse_prepare( struct draw_pt_middle_end *middle, + unsigned prim, + unsigned opt, + unsigned *max_vertices ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs; + const struct vertex_info *vinfo; + unsigned i; + unsigned nr_vbs = 0; + + + if (!draw->render->set_primitive( draw->render, + prim )) { + assert(0); + return; + } + + /* Must do this after set_primitive() above: + */ + fse->vinfo = vinfo = draw->render->get_vertex_info(draw->render); + + + + fse->key.output_stride = vinfo->size * 4; + fse->key.nr_outputs = vinfo->num_attribs; + fse->key.nr_inputs = num_vs_inputs; + + fse->key.nr_elements = MAX2(fse->key.nr_outputs, /* outputs - translate to hw format */ + fse->key.nr_inputs); /* inputs - fetch from api format */ + + fse->key.viewport = !draw->identity_viewport; + fse->key.clip = !draw->bypass_clipping; + fse->key.const_vbuffers = 0; + + memset(fse->key.element, 0, + fse->key.nr_elements * sizeof(fse->key.element[0])); + + for (i = 0; i < num_vs_inputs; i++) { + const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; + fse->key.element[i].in.format = src->src_format; + + /* Consider ignoring these, ie make generated programs + * independent of this state: + */ + fse->key.element[i].in.buffer = src->vertex_buffer_index; + fse->key.element[i].in.offset = src->src_offset; + nr_vbs = MAX2(nr_vbs, src->vertex_buffer_index + 1); + } + + for (i = 0; i < 5 && i < nr_vbs; i++) { + if (draw->pt.vertex_buffer[i].pitch == 0) + fse->key.const_vbuffers |= (1<<i); + } + + if (0) debug_printf("%s: lookup const_vbuffers: %x\n", __FUNCTION__, fse->key.const_vbuffers); + + { + unsigned dst_offset = 0; + + for (i = 0; i < vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + + switch (vinfo->attrib[i].emit) { + case EMIT_4F: + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + emit_sz = 1 * sizeof(float); + break; + case EMIT_4UB: + emit_sz = 4 * sizeof(ubyte); + break; + default: + assert(0); + break; + } + + /* The elements in the key correspond to vertex shader output + * numbers, not to positions in the hw vertex description -- + * that's handled by the output_offset field. + */ + fse->key.element[i].out.format = vinfo->attrib[i].emit; + fse->key.element[i].out.vs_output = vinfo->attrib[i].src_index; + fse->key.element[i].out.offset = dst_offset; + + dst_offset += emit_sz; + assert(fse->key.output_stride >= dst_offset); + } + } + + + fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader, + &fse->key ); + + if (!fse->active) { + assert(0); + return ; + } + + if (0) debug_printf("%s: found const_vbuffers: %x\n", __FUNCTION__, + fse->active->key.const_vbuffers); + + /* Now set buffer pointers: + */ + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + fse->active->set_buffer( fse->active, + i, + ((const ubyte *) draw->pt.user.vbuffer[i] + + draw->pt.vertex_buffer[i].buffer_offset), + draw->pt.vertex_buffer[i].pitch ); + } + + *max_vertices = (draw->render->max_vertex_buffer_bytes / + (vinfo->size * 4)); + + /* Return an even number of verts. + * This prevents "parity" errors when splitting long triangle strips which + * can lead to front/back culling mix-ups. + * Every other triangle in a strip has an alternate front/back orientation + * so splitting at an odd position can cause the orientation of subsequent + * triangles to get reversed. + */ + *max_vertices = *max_vertices & ~1; + + /* Probably need to do this somewhere (or fix exec shader not to + * need it): + */ + if (1) { + struct draw_vertex_shader *vs = draw->vs.vertex_shader; + vs->prepare(vs, draw); + } + + + //return TRUE; +} + + + + + + + +static void fse_run_linear( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + char *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)count ); + + if (!hw_verts) { + assert(0); + return; + } + + /* Single routine to fetch vertices, run shader and emit HW verts. + * Clipping is done elsewhere -- either by the API or on hardware, + * or for some other reason not required... + */ + fse->active->run_linear( fse->active, + start, count, + hw_verts ); + + /* Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw_arrays( draw->render, + 0, + count ); + + if (0) { + unsigned i; + for (i = 0; i < count; i++) { + debug_printf("\n\n%s vertex %d: (stride %d, offset %d)\n", __FUNCTION__, i, + fse->key.output_stride, + fse->key.output_stride * i); + + draw_dump_emitted_vertex( fse->vinfo, + (const uint8_t *)hw_verts + fse->key.output_stride * i ); + } + } + + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + count ); +} + + +static void +fse_run(struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)fetch_count ); + if (!hw_verts) { + assert(0); + return; + } + + + /* Single routine to fetch vertices, run shader and emit HW verts. + */ + fse->active->run_elts( fse->active, + fetch_elts, + fetch_count, + hw_verts ); + + draw->render->draw( draw->render, + draw_elts, + draw_count ); + + if (0) { + unsigned i; + for (i = 0; i < fetch_count; i++) { + debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i); + draw_dump_emitted_vertex( fse->vinfo, + (const uint8_t *)hw_verts + + fse->key.output_stride * i ); + } + } + + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + fetch_count ); + +} + + + +static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + char *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)count ); + + if (!hw_verts) { + return FALSE; + } + + /* Single routine to fetch vertices, run shader and emit HW verts. + * Clipping is done elsewhere -- either by the API or on hardware, + * or for some other reason not required... + */ + fse->active->run_linear( fse->active, + start, count, + hw_verts ); + + + draw->render->draw( draw->render, + draw_elts, + draw_count ); + + + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + count ); + + return TRUE; +} + + + +static void fse_finish( struct draw_pt_middle_end *middle ) +{ +} + + +static void +fse_destroy( struct draw_pt_middle_end *middle ) +{ + FREE(middle); +} + +struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ) +{ + struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit); + if (!fse) + return NULL; + + fse->base.prepare = fse_prepare; + fse->base.run = fse_run; + fse->base.run_linear = fse_run_linear; + fse->base.run_linear_elts = fse_run_linear_elts; + fse->base.finish = fse_finish; + fse->base.destroy = fse_destroy; + fse->draw = draw; + + return &fse->base; +} diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c new file mode 100644 index 0000000000..ec3b41c320 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -0,0 +1,396 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "draw/draw_vs.h" +#include "translate/translate.h" + + +struct fetch_pipeline_middle_end { + struct draw_pt_middle_end base; + struct draw_context *draw; + + struct pt_emit *emit; + struct pt_fetch *fetch; + struct pt_post_vs *post_vs; + + unsigned vertex_data_offset; + unsigned vertex_size; + unsigned prim; + unsigned opt; +}; + + +static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, + unsigned prim, + unsigned opt, + unsigned *max_vertices ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *vs = draw->vs.vertex_shader; + + /* Add one to num_outputs because the pipeline occasionally tags on + * an additional texcoord, eg for AA lines. + */ + unsigned nr = MAX2( vs->info.num_inputs, + vs->info.num_outputs + 1 ); + + fpme->prim = prim; + fpme->opt = opt; + + /* Always leave room for the vertex header whether we need it or + * not. It's hard to get rid of it in particular because of the + * viewport code in draw_pt_post_vs.c. + */ + fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); + + + + draw_pt_fetch_prepare( fpme->fetch, + fpme->vertex_size ); + + /* XXX: it's not really gl rasterization rules we care about here, + * but gl vs dx9 clip spaces. + */ + draw_pt_post_vs_prepare( fpme->post_vs, + (boolean)draw->bypass_clipping, + (boolean)draw->identity_viewport, + (boolean)draw->rasterizer->gl_rasterization_rules ); + + + if (!(opt & PT_PIPELINE)) { + draw_pt_emit_prepare( fpme->emit, + prim, + max_vertices ); + + *max_vertices = MAX2( *max_vertices, + DRAW_PIPE_MAX_VERTICES ); + } + else { + *max_vertices = DRAW_PIPE_MAX_VERTICES; + } + + /* return even number */ + *max_vertices = *max_vertices & ~1; + + /* No need to prepare the shader. + */ + vs->prepare(vs, draw); +} + + + +static void fetch_pipeline_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *shader = draw->vs.vertex_shader; + unsigned opt = fpme->opt; + unsigned alloc_count = align( fetch_count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ + assert(0); + return; + } + + /* Fetch into our vertex buffer + */ + draw_pt_fetch_run( fpme->fetch, + fetch_elts, + fetch_count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, ie a bypass shader, + * then the inputs == outputs, and are already in the correct + * place. + */ + if (opt & PT_SHADE) + { + shader->run_linear(shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + } + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + fetch_count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + else { + draw_pt_emit( fpme->emit, + (const float (*)[4])pipeline_verts->data, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + + + FREE(pipeline_verts); +} + + +static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *shader = draw->vs.vertex_shader; + unsigned opt = fpme->opt; + unsigned alloc_count = align( count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ + assert(0); + return; + } + + /* Fetch into our vertex buffer + */ + draw_pt_fetch_run_linear( fpme->fetch, + start, + count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, ie a bypass shader, + * then the inputs == outputs, and are already in the correct + * place. + */ + if (opt & PT_SHADE) + { + shader->run_linear(shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.constants, + count, + fpme->vertex_size, + fpme->vertex_size); + } + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run_linear( fpme->draw, + fpme->prim, + pipeline_verts, + count, + fpme->vertex_size); + } + else { + draw_pt_emit_linear( fpme->emit, + (const float (*)[4])pipeline_verts->data, + count, + fpme->vertex_size, + 0, /*start*/ + count ); + } + + FREE(pipeline_verts); +} + + + +static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *shader = draw->vs.vertex_shader; + unsigned opt = fpme->opt; + unsigned alloc_count = align( count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) + return FALSE; + + /* Fetch into our vertex buffer + */ + draw_pt_fetch_run_linear( fpme->fetch, + start, + count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, ie a bypass shader, + * then the inputs == outputs, and are already in the correct + * place. + */ + if (opt & PT_SHADE) + { + shader->run_linear(shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.constants, + count, + fpme->vertex_size, + fpme->vertex_size); + } + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + else { + draw_pt_emit( fpme->emit, + (const float (*)[4])pipeline_verts->data, + count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + + FREE(pipeline_verts); + return TRUE; +} + + + +static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) +{ + /* nothing to do */ +} + +static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + + if (fpme->fetch) + draw_pt_fetch_destroy( fpme->fetch ); + + if (fpme->emit) + draw_pt_emit_destroy( fpme->emit ); + + if (fpme->post_vs) + draw_pt_post_vs_destroy( fpme->post_vs ); + + FREE(middle); +} + + +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *draw ) +{ + struct fetch_pipeline_middle_end *fpme = CALLOC_STRUCT( fetch_pipeline_middle_end ); + if (!fpme) + goto fail; + + fpme->base.prepare = fetch_pipeline_prepare; + fpme->base.run = fetch_pipeline_run; + fpme->base.run_linear = fetch_pipeline_linear_run; + fpme->base.run_linear_elts = fetch_pipeline_linear_run_elts; + fpme->base.finish = fetch_pipeline_finish; + fpme->base.destroy = fetch_pipeline_destroy; + + fpme->draw = draw; + + fpme->fetch = draw_pt_fetch_create( draw ); + if (!fpme->fetch) + goto fail; + + fpme->post_vs = draw_pt_post_vs_create( draw ); + if (!fpme->post_vs) + goto fail; + + fpme->emit = draw_pt_emit_create( draw ); + if (!fpme->emit) + goto fail; + + return &fpme->base; + + fail: + if (fpme) + fetch_pipeline_destroy( &fpme->base ); + + return NULL; +} diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c new file mode 100644 index 0000000000..96dc706b99 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -0,0 +1,233 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "pipe/p_context.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" + +struct pt_post_vs { + struct draw_context *draw; + + boolean (*run)( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ); +}; + + + +static INLINE float +dot4(const float *a, const float *b) +{ + return (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); +} + + + +static INLINE unsigned +compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0x0; + unsigned i; + +#if 0 + debug_printf("compute clipmask %f %f %f %f\n", + clip[0], clip[1], clip[2], clip[3]); + assert(clip[3] != 0.0); +#endif + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= (1<<0); + if ( clip[0] + clip[3] < 0) mask |= (1<<1); + if (-clip[1] + clip[3] < 0) mask |= (1<<2); + if ( clip[1] + clip[3] < 0) mask |= (1<<3); + if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */ + if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */ + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1<<i); + } + + return mask; +} + + +/* The normal case - cliptest, rhw divide, viewport transform. + * + * Also handle identity viewport here at the expense of a few wasted + * instructions + */ +static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ) +{ + struct vertex_header *out = vertices; + const float *scale = pvs->draw->viewport.scale; + const float *trans = pvs->draw->viewport.translate; + const unsigned pos = pvs->draw->vs.position_output; + unsigned clipped = 0; + unsigned j; + + if (0) debug_printf("%s\n"); + + for (j = 0; j < count; j++) { + float *position = out->data[pos]; + + out->clip[0] = position[0]; + out->clip[1] = position[1]; + out->clip[2] = position[2]; + out->clip[3] = position[3]; + + out->vertex_id = 0xffff; + out->clipmask = compute_clipmask_gl(out->clip, + pvs->draw->plane, + pvs->draw->nr_planes); + clipped += out->clipmask; + + if (out->clipmask == 0) + { + /* divide by w */ + float w = 1.0f / position[3]; + + /* Viewport mapping */ + position[0] = position[0] * w * scale[0] + trans[0]; + position[1] = position[1] * w * scale[1] + trans[1]; + position[2] = position[2] * w * scale[2] + trans[2]; + position[3] = w; +#if 0 + debug_printf("post viewport: %f %f %f %f\n", + position[0], + position[1], + position[2], + position[3]); +#endif + } + + out = (struct vertex_header *)( (char *)out + stride ); + } + + return clipped != 0; +} + + + +/* If bypass_clipping is set, skip cliptest and rhw divide. + */ +static boolean post_vs_viewport( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ) +{ + struct vertex_header *out = vertices; + const float *scale = pvs->draw->viewport.scale; + const float *trans = pvs->draw->viewport.translate; + const unsigned pos = pvs->draw->vs.position_output; + unsigned j; + + if (0) debug_printf("%s\n", __FUNCTION__); + for (j = 0; j < count; j++) { + float *position = out->data[pos]; + + /* Viewport mapping only, no cliptest/rhw divide + */ + position[0] = position[0] * scale[0] + trans[0]; + position[1] = position[1] * scale[1] + trans[1]; + position[2] = position[2] * scale[2] + trans[2]; + + out = (struct vertex_header *)((char *)out + stride); + } + + return FALSE; +} + + +/* If bypass_clipping is set and we have an identity viewport, nothing + * to do. + */ +static boolean post_vs_none( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ) +{ + if (0) debug_printf("%s\n", __FUNCTION__); + return FALSE; +} + +boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, + struct vertex_header *pipeline_verts, + unsigned count, + unsigned stride ) +{ + return pvs->run( pvs, pipeline_verts, count, stride ); +} + + +void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, + boolean bypass_clipping, + boolean identity_viewport, + boolean opengl ) +{ + if (bypass_clipping) { + if (identity_viewport) + pvs->run = post_vs_none; + else + pvs->run = post_vs_viewport; + } + else { + //if (opengl) + pvs->run = post_vs_cliptest_viewport_gl; + } +} + + +struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ) +{ + struct pt_post_vs *pvs = CALLOC_STRUCT( pt_post_vs ); + if (!pvs) + return NULL; + + pvs->draw = draw; + + return pvs; +} + +void draw_pt_post_vs_destroy( struct pt_post_vs *pvs ) +{ + FREE(pvs); +} diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c new file mode 100644 index 0000000000..3bc7939c55 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_util.c @@ -0,0 +1,102 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_pt.h" + +void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) +{ + switch (prim) { + case PIPE_PRIM_POINTS: + *first = 1; + *incr = 1; + break; + case PIPE_PRIM_LINES: + *first = 2; + *incr = 2; + break; + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_LINE_LOOP: + *first = 2; + *incr = 1; + break; + case PIPE_PRIM_TRIANGLES: + *first = 3; + *incr = 3; + break; + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + *first = 3; + *incr = 1; + break; + case PIPE_PRIM_QUADS: + *first = 4; + *incr = 4; + break; + case PIPE_PRIM_QUAD_STRIP: + *first = 4; + *incr = 2; + break; + default: + assert(0); + *first = 0; + *incr = 1; /* set to one so that count % incr works */ + break; + } +} + + +unsigned draw_pt_reduced_prim(unsigned prim) +{ + switch (prim) { + case PIPE_PRIM_POINTS: + return PIPE_PRIM_POINTS; + case PIPE_PRIM_LINES: + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_LINE_LOOP: + return PIPE_PRIM_LINES; + case PIPE_PRIM_TRIANGLES: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + return PIPE_PRIM_TRIANGLES; + default: + assert(0); + return PIPE_PRIM_POINTS; + } +} + + diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c new file mode 100644 index 0000000000..c15afe65f1 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -0,0 +1,193 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_pt.h" + +#define FETCH_MAX 256 +#define DRAW_MAX (FETCH_MAX+8) + +struct varray_frontend { + struct draw_pt_front_end base; + struct draw_context *draw; + + ushort draw_elts[DRAW_MAX]; + unsigned fetch_elts[FETCH_MAX]; + + unsigned driver_fetch_max; + unsigned fetch_max; + + struct draw_pt_middle_end *middle; + + unsigned input_prim; + unsigned output_prim; +}; + + +static void varray_flush_linear(struct varray_frontend *varray, + unsigned start, unsigned count) +{ + if (count) { + assert(varray->middle->run_linear); + varray->middle->run_linear(varray->middle, start, count); + } +} + +static void varray_line_loop_segment(struct varray_frontend *varray, + unsigned start, + unsigned segment_start, + unsigned segment_count, + boolean end ) +{ + assert(segment_count+1 < varray->fetch_max); + if (segment_count >= 1) { + unsigned nr = 0, i; + + for (i = 0; i < segment_count; i++) + varray->fetch_elts[nr++] = start + segment_start + i; + + if (end) + varray->fetch_elts[nr++] = start; + + assert(nr < FETCH_MAX); + + varray->middle->run(varray->middle, + varray->fetch_elts, + nr, + varray->draw_elts, /* ie. linear */ + nr); + } +} + + + +static void varray_fan_segment(struct varray_frontend *varray, + unsigned start, + unsigned segment_start, + unsigned segment_count ) +{ + assert(segment_count+1 < varray->fetch_max); + if (segment_count >= 2) { + unsigned nr = 0, i; + + if (segment_start != 0) + varray->fetch_elts[nr++] = start; + + for (i = 0 ; i < segment_count; i++) + varray->fetch_elts[nr++] = start + segment_start + i; + + assert(nr < FETCH_MAX); + + varray->middle->run(varray->middle, + varray->fetch_elts, + nr, + varray->draw_elts, /* ie. linear */ + nr); + } +} + + + + +#define FUNC varray_run +#include "draw_pt_varray_tmp_linear.h" + +static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINE_STRIP, /* decomposed LINELOOP */ + PIPE_PRIM_LINE_STRIP, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLE_STRIP, + PIPE_PRIM_TRIANGLE_FAN, + PIPE_PRIM_QUADS, + PIPE_PRIM_QUAD_STRIP, + PIPE_PRIM_POLYGON +}; + + + +static void varray_prepare(struct draw_pt_front_end *frontend, + unsigned prim, + struct draw_pt_middle_end *middle, + unsigned opt) +{ + struct varray_frontend *varray = (struct varray_frontend *)frontend; + + varray->base.run = varray_run; + + varray->input_prim = prim; + varray->output_prim = decompose_prim[prim]; + + varray->middle = middle; + middle->prepare(middle, varray->output_prim, opt, &varray->driver_fetch_max ); + + /* check that the max is even */ + assert((varray->driver_fetch_max & 1) == 0); + + varray->fetch_max = MIN2(FETCH_MAX, varray->driver_fetch_max); +} + + + + +static void varray_finish(struct draw_pt_front_end *frontend) +{ + struct varray_frontend *varray = (struct varray_frontend *)frontend; + varray->middle->finish(varray->middle); + varray->middle = NULL; +} + +static void varray_destroy(struct draw_pt_front_end *frontend) +{ + FREE(frontend); +} + + +struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw) +{ + ushort i; + struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend); + if (varray == NULL) + return NULL; + + varray->base.prepare = varray_prepare; + varray->base.run = NULL; + varray->base.finish = varray_finish; + varray->base.destroy = varray_destroy; + varray->draw = draw; + + for (i = 0; i < DRAW_MAX; i++) { + varray->draw_elts[i] = i; + } + + return &varray->base; +} diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h new file mode 100644 index 0000000000..7c722457c3 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -0,0 +1,238 @@ + +static void FUNC(struct draw_pt_front_end *frontend, + pt_elt_func get_elt, + const void *elts, + unsigned count) +{ + struct varray_frontend *varray = (struct varray_frontend *)frontend; + struct draw_context *draw = varray->draw; + unsigned start = (unsigned)elts; + + boolean flatfirst = (draw->rasterizer->flatshade && + draw->rasterizer->flatshade_first); + unsigned i, j; + ushort flags; + unsigned first, incr; + + varray->fetch_start = start; + + draw_pt_split_prim(varray->input_prim, &first, &incr); + +#if 0 + debug_printf("%s (%d) %d/%d\n", __FUNCTION__, + varray->input_prim, + start, count); +#endif + + switch (varray->input_prim) { + case PIPE_PRIM_POINTS: + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i < end; i++) { + POINT(varray, i + 0); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + break; + + case PIPE_PRIM_LINES: + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+1 < end; i += 2) { + LINE(varray, DRAW_PIPE_RESET_STIPPLE, + i + 0, i + 1); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + flags = DRAW_PIPE_RESET_STIPPLE; + + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 1; i < end; i++, flags = 0) { + LINE(varray, flags, i - 1, i); + } + LINE(varray, flags, i - 1, 0); + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + } + break; + + case PIPE_PRIM_LINE_STRIP: + flags = DRAW_PIPE_RESET_STIPPLE; + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 1; i < end; i++, flags = 0) { + LINE(varray, flags, i - 1, i); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i += 3) { + TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + i + 0, i + 1, i + 2); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatfirst) { + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i++) { + TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + i + 0, i + 1 + (i&1), i + 2 - (i&1)); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + if (j + first + i <= count) { + varray->fetch_start -= 2; + i -= 2; + } + } + } + else { + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i + 2 < end; i++) { + TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + i + 0 + (i&1), i + 1 - (i&1), i + 2); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + if (j + first + i <= count) { + varray->fetch_start -= 2; + i -= 2; + } + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + if (flatfirst) { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i++) { + TRIANGLE(varray, flags, i + 1, i + 2, 0); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + } + else { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i++) { + TRIANGLE(varray, flags, 0, i + 1, i + 2); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + } + } + break; + + case PIPE_PRIM_QUADS: + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+3 < end; i += 4) { + QUAD(varray, i + 0, i + 1, i + 2, i + 3); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + break; + + case PIPE_PRIM_QUAD_STRIP: + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+3 < end; i += 2) { + QUAD(varray, i + 2, i + 0, i + 1, i + 3); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + if (j + first + i <= count) { + varray->fetch_start -= 2; + i -= 2; + } + } + break; + + case PIPE_PRIM_POLYGON: + { + /* These bitflags look a little odd because we submit the + * vertices as (1,2,0) to satisfy flatshade requirements. + */ + const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; + const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; + + flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i++, flags = edge_middle) { + + if (i + 3 == count) + flags |= edge_last; + + TRIANGLE(varray, flags, i + 1, i + 2, 0); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + } + break; + + default: + assert(0); + break; + } + + varray_flush(varray); +} + +#undef TRIANGLE +#undef QUAD +#undef POINT +#undef LINE +#undef FUNC diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h new file mode 100644 index 0000000000..55a8e6521d --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -0,0 +1,91 @@ +static unsigned trim( unsigned count, unsigned first, unsigned incr ) +{ + return count - (count - first) % incr; +} + +static void FUNC(struct draw_pt_front_end *frontend, + pt_elt_func get_elt, + const void *elts, + unsigned count) +{ + struct varray_frontend *varray = (struct varray_frontend *)frontend; + unsigned start = (unsigned)elts; + + unsigned j; + unsigned first, incr; + + draw_pt_split_prim(varray->input_prim, &first, &incr); + + /* Sanitize primitive length: + */ + count = trim(count, first, incr); + if (count < first) + return; + +#if 0 + debug_printf("%s (%d) %d/%d\n", __FUNCTION__, + varray->input_prim, + start, count); +#endif + + switch (varray->input_prim) { + case PIPE_PRIM_POINTS: + case PIPE_PRIM_LINES: + case PIPE_PRIM_TRIANGLES: + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + for (j = 0; j < count;) { + unsigned remaining = count - j; + unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr ); + varray_flush_linear(varray, start + j, nr); + j += nr; + if (nr != remaining) + j -= (first - incr); + } + break; + + case PIPE_PRIM_LINE_LOOP: + /* Always have to decompose as we've stated that this will be + * emitted as a line-strip. + */ + for (j = 0; j < count;) { + unsigned remaining = count - j; + unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr ); + varray_line_loop_segment(varray, start, j, nr, nr == remaining); + j += nr; + if (nr != remaining) + j -= (first - incr); + } + break; + + + case PIPE_PRIM_POLYGON: + case PIPE_PRIM_TRIANGLE_FAN: + if (count < varray->driver_fetch_max) { + varray_flush_linear(varray, start, count); + } + else { + for ( j = 0; j < count;) { + unsigned remaining = count - j; + unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr ); + varray_fan_segment(varray, start, j, nr); + j += nr; + if (nr != remaining) + j -= (first - incr); + } + } + break; + + default: + assert(0); + break; + } +} + +#undef TRIANGLE +#undef QUAD +#undef POINT +#undef LINE +#undef FUNC diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c new file mode 100644 index 0000000000..80d7200ca6 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -0,0 +1,500 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_pt.h" + + +#define CACHE_MAX 256 +#define FETCH_MAX 256 +#define DRAW_MAX (16*1024) + +struct vcache_frontend { + struct draw_pt_front_end base; + struct draw_context *draw; + + unsigned in[CACHE_MAX]; + ushort out[CACHE_MAX]; + + ushort draw_elts[DRAW_MAX]; + unsigned fetch_elts[FETCH_MAX]; + + unsigned draw_count; + unsigned fetch_count; + unsigned fetch_max; + + struct draw_pt_middle_end *middle; + + unsigned input_prim; + unsigned output_prim; + + unsigned middle_prim; + unsigned opt; +}; + +static INLINE void +vcache_flush( struct vcache_frontend *vcache ) +{ + if (vcache->middle_prim != vcache->output_prim) { + vcache->middle_prim = vcache->output_prim; + vcache->middle->prepare( vcache->middle, + vcache->middle_prim, + vcache->opt, + &vcache->fetch_max ); + } + + if (vcache->draw_count) { + vcache->middle->run( vcache->middle, + vcache->fetch_elts, + vcache->fetch_count, + vcache->draw_elts, + vcache->draw_count ); + } + + memset(vcache->in, ~0, sizeof(vcache->in)); + vcache->fetch_count = 0; + vcache->draw_count = 0; +} + +static INLINE void +vcache_check_flush( struct vcache_frontend *vcache ) +{ + if ( vcache->draw_count + 6 >= DRAW_MAX || + vcache->fetch_count + 4 >= FETCH_MAX ) + { + vcache_flush( vcache ); + } +} + + +static INLINE void +vcache_elt( struct vcache_frontend *vcache, + unsigned felt, + ushort flags ) +{ + unsigned idx = felt % CACHE_MAX; + + if (vcache->in[idx] != felt) { + assert(vcache->fetch_count < FETCH_MAX); + + vcache->in[idx] = felt; + vcache->out[idx] = (ushort)vcache->fetch_count; + vcache->fetch_elts[vcache->fetch_count++] = felt; + } + + vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags; +} + + + +static INLINE void +vcache_triangle( struct vcache_frontend *vcache, + unsigned i0, + unsigned i1, + unsigned i2 ) +{ + vcache_elt(vcache, i0, 0); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, i2, 0); + vcache_check_flush(vcache); +} + + +static INLINE void +vcache_triangle_flags( struct vcache_frontend *vcache, + ushort flags, + unsigned i0, + unsigned i1, + unsigned i2 ) +{ + vcache_elt(vcache, i0, flags); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, i2, 0); + vcache_check_flush(vcache); +} + +static INLINE void +vcache_line( struct vcache_frontend *vcache, + unsigned i0, + unsigned i1 ) +{ + vcache_elt(vcache, i0, 0); + vcache_elt(vcache, i1, 0); + vcache_check_flush(vcache); +} + + +static INLINE void +vcache_line_flags( struct vcache_frontend *vcache, + ushort flags, + unsigned i0, + unsigned i1 ) +{ + vcache_elt(vcache, i0, flags); + vcache_elt(vcache, i1, 0); + vcache_check_flush(vcache); +} + + +static INLINE void +vcache_point( struct vcache_frontend *vcache, + unsigned i0 ) +{ + vcache_elt(vcache, i0, 0); + vcache_check_flush(vcache); +} + +static INLINE void +vcache_quad( struct vcache_frontend *vcache, + unsigned i0, + unsigned i1, + unsigned i2, + unsigned i3 ) +{ + vcache_triangle( vcache, i0, i1, i3 ); + vcache_triangle( vcache, i1, i2, i3 ); +} + +static INLINE void +vcache_ef_quad( struct vcache_frontend *vcache, + unsigned i0, + unsigned i1, + unsigned i2, + unsigned i3 ) +{ + vcache_triangle_flags( vcache, + ( DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_2 ), + i0, i1, i3 ); + + vcache_triangle_flags( vcache, + ( DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1 ), + i1, i2, i3 ); +} + +/* At least for now, we're back to using a template include file for + * this. The two paths aren't too different though - it may be + * possible to reunify them. + */ +#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2) +#define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3) +#define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1) +#define POINT(vc,i0) vcache_point(vc,i0) +#define FUNC vcache_run_extras +#include "draw_pt_vcache_tmp.h" + +#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2) +#define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3) +#define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1) +#define POINT(vc,i0) vcache_point(vc,i0) +#define FUNC vcache_run +#include "draw_pt_vcache_tmp.h" + +static INLINE void +rebase_uint_elts( const unsigned *src, + unsigned count, + int delta, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i] + delta); +} + +static INLINE void +rebase_ushort_elts( const ushort *src, + unsigned count, + int delta, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i] + delta); +} + +static INLINE void +rebase_ubyte_elts( const ubyte *src, + unsigned count, + int delta, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i] + delta); +} + + + +static INLINE void +translate_uint_elts( const unsigned *src, + unsigned count, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i]); +} + +static INLINE void +translate_ushort_elts( const ushort *src, + unsigned count, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i]); +} + +static INLINE void +translate_ubyte_elts( const ubyte *src, + unsigned count, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i]); +} + + + + +#if 0 +static INLINE enum pipe_format +format_from_get_elt( pt_elt_func get_elt ) +{ + switch (draw->pt.user.eltSize) { + case 1: return PIPE_FORMAT_R8_UNORM; + case 2: return PIPE_FORMAT_R16_UNORM; + case 4: return PIPE_FORMAT_R32_UNORM; + default: return PIPE_FORMAT_NONE; + } +} +#endif + +static INLINE void +vcache_check_run( struct draw_pt_front_end *frontend, + pt_elt_func get_elt, + const void *elts, + unsigned draw_count ) +{ + struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + struct draw_context *draw = vcache->draw; + unsigned min_index = draw->pt.user.min_index; + unsigned max_index = draw->pt.user.max_index; + unsigned index_size = draw->pt.user.eltSize; + unsigned fetch_count = max_index + 1 - min_index; + const ushort *transformed_elts; + ushort *storage = NULL; + boolean ok; + + + if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, + vcache->fetch_max, + draw_count); + + if (max_index == 0xffffffff || + fetch_count > draw_count) { + if (0) debug_printf("fail\n"); + goto fail; + } + + if (vcache->middle_prim != vcache->input_prim) { + vcache->middle_prim = vcache->input_prim; + vcache->middle->prepare( vcache->middle, + vcache->middle_prim, + vcache->opt, + &vcache->fetch_max ); + } + + + if (min_index == 0 && + index_size == 2) + { + transformed_elts = (const ushort *)elts; + } + else + { + storage = MALLOC( draw_count * sizeof(ushort) ); + if (!storage) + goto fail; + + if (min_index == 0) { + switch(index_size) { + case 1: + translate_ubyte_elts( (const ubyte *)elts, + draw_count, + storage ); + break; + + case 2: + translate_ushort_elts( (const ushort *)elts, + draw_count, + storage ); + break; + + case 4: + translate_uint_elts( (const uint *)elts, + draw_count, + storage ); + break; + + default: + assert(0); + return; + } + } + else { + switch(index_size) { + case 1: + rebase_ubyte_elts( (const ubyte *)elts, + draw_count, + 0 - (int)min_index, + storage ); + break; + + case 2: + rebase_ushort_elts( (const ushort *)elts, + draw_count, + 0 - (int)min_index, + storage ); + break; + + case 4: + rebase_uint_elts( (const uint *)elts, + draw_count, + 0 - (int)min_index, + storage ); + break; + + default: + assert(0); + return; + } + } + transformed_elts = storage; + } + + ok = vcache->middle->run_linear_elts( vcache->middle, + min_index, /* start */ + fetch_count, + transformed_elts, + draw_count ); + + FREE(storage); + + if (ok) + return; + + debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n", + fetch_count, draw_count); + + fail: + vcache_run( frontend, get_elt, elts, draw_count ); +} + + + + +static void +vcache_prepare( struct draw_pt_front_end *frontend, + unsigned prim, + struct draw_pt_middle_end *middle, + unsigned opt ) +{ + struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + + if (opt & PT_PIPELINE) + { + vcache->base.run = vcache_run_extras; + } + else + { + vcache->base.run = vcache_check_run; + } + + vcache->input_prim = prim; + vcache->output_prim = draw_pt_reduced_prim(prim); + + vcache->middle = middle; + vcache->opt = opt; + + /* Have to run prepare here, but try and guess a good prim for + * doing so: + */ + vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim; + middle->prepare( middle, vcache->middle_prim, opt, &vcache->fetch_max ); +} + + + + +static void +vcache_finish( struct draw_pt_front_end *frontend ) +{ + struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + vcache->middle->finish( vcache->middle ); + vcache->middle = NULL; +} + +static void +vcache_destroy( struct draw_pt_front_end *frontend ) +{ + FREE(frontend); +} + + +struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ) +{ + struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend ); + if (vcache == NULL) + return NULL; + + vcache->base.prepare = vcache_prepare; + vcache->base.run = NULL; + vcache->base.finish = vcache_finish; + vcache->base.destroy = vcache_destroy; + vcache->draw = draw; + + memset(vcache->in, ~0, sizeof(vcache->in)); + + return &vcache->base; +} diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h new file mode 100644 index 0000000000..ec05bbeab4 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -0,0 +1,177 @@ + + +static void FUNC( struct draw_pt_front_end *frontend, + pt_elt_func get_elt, + const void *elts, + unsigned count ) +{ + struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + struct draw_context *draw = vcache->draw; + + boolean flatfirst = (draw->rasterizer->flatshade && + draw->rasterizer->flatshade_first); + unsigned i; + ushort flags; + + if (0) debug_printf("%s %d\n", __FUNCTION__, count); + + + switch (vcache->input_prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i ++) { + POINT( vcache, + get_elt(elts, i + 0) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) { + LINE( vcache, + DRAW_PIPE_RESET_STIPPLE, + get_elt(elts, i + 0), + get_elt(elts, i + 1)); + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + flags = DRAW_PIPE_RESET_STIPPLE; + + for (i = 1; i < count; i++, flags = 0) { + LINE( vcache, + flags, + get_elt(elts, i - 1), + get_elt(elts, i )); + } + + LINE( vcache, + flags, + get_elt(elts, i - 1), + get_elt(elts, 0 )); + } + break; + + case PIPE_PRIM_LINE_STRIP: + flags = DRAW_PIPE_RESET_STIPPLE; + for (i = 1; i < count; i++, flags = 0) { + LINE( vcache, + flags, + get_elt(elts, i - 1), + get_elt(elts, i )); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2 )); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 0), + get_elt(elts, i + 1 + (i&1)), + get_elt(elts, i + 2 - (i&1))); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 0 + (i&1)), + get_elt(elts, i + 1 - (i&1)), + get_elt(elts, i + 2 )); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0 )); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2 )); + } + } + } + break; + + + case PIPE_PRIM_QUADS: + for (i = 0; i+3 < count; i += 4) { + QUAD( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, i + 3)); + } + break; + + case PIPE_PRIM_QUAD_STRIP: + for (i = 0; i+3 < count; i += 2) { + QUAD( vcache, + get_elt(elts, i + 2), + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 3)); + } + break; + + case PIPE_PRIM_POLYGON: + { + /* These bitflags look a little odd because we submit the + * vertices as (1,2,0) to satisfy flatshade requirements. + */ + const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; + const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; + + flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + + for (i = 0; i+2 < count; i++, flags = edge_middle) { + + if (i + 3 == count) + flags |= edge_last; + + TRIANGLE( vcache, + flags, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0)); + } + } + break; + + default: + assert(0); + break; + } + + vcache_flush( vcache ); +} + + +#undef TRIANGLE +#undef QUAD +#undef POINT +#undef LINE +#undef FUNC diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h new file mode 100644 index 0000000000..b0aa2df309 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -0,0 +1,113 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Vertex buffer drawing stage. + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * \author José Fonseca <jrfonsec@tungstengraphics.com> + */ + +#ifndef DRAW_VBUF_H_ +#define DRAW_VBUF_H_ + + + +struct draw_context; +struct vertex_info; + + +/** + * Interface for hardware vertex buffer rendering. + */ +struct vbuf_render { + + /** + * Driver limits. May be tuned lower to improve cache hits on + * index list. + */ + unsigned max_indices; + unsigned max_vertex_buffer_bytes; + + /** + * Get the hardware vertex format. + * + * XXX: have this in draw_context instead? + */ + const struct vertex_info *(*get_vertex_info)( struct vbuf_render * ); + + /** + * Request a destination for vertices. + * Hardware renderers will use ttm memory, others will just malloc + * something. + */ + void *(*allocate_vertices)( struct vbuf_render *, + ushort vertex_size, + ushort nr_vertices ); + + /** + * Notify the renderer of the current primitive when it changes. + * Must succeed for TRIANGLES, LINES and POINTS. Other prims at + * the discretion of the driver, for the benefit of the passthrough + * path. + */ + boolean (*set_primitive)( struct vbuf_render *, unsigned prim ); + + /** + * DrawElements, note indices are ushort. The driver must complete + * this call, if necessary splitting the index list itself. + */ + void (*draw)( struct vbuf_render *, + const ushort *indices, + uint nr_indices ); + + /* Draw Arrays path too. + */ + void (*draw_arrays)( struct vbuf_render *, + unsigned start, + uint nr ); + + /** + * Called when vbuf is done with this set of vertices: + */ + void (*release_vertices)( struct vbuf_render *, + void *vertices, + unsigned vertex_size, + unsigned vertices_used ); + + void (*destroy)( struct vbuf_render * ); +}; + + + +struct draw_stage * +draw_vbuf_stage( struct draw_context *draw, + struct vbuf_render *render ); + + +#endif /*DRAW_VBUF_H_*/ diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c new file mode 100644 index 0000000000..3214213e44 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -0,0 +1,129 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Functions for specifying the post-transformation vertex layout. + * + * Author: + * Brian Paul + * Keith Whitwell + */ + + +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" + + +/** + * Compute the size of a vertex, in dwords/floats, to update the + * vinfo->size field. + */ +void +draw_compute_vertex_size(struct vertex_info *vinfo) +{ + uint i; + + vinfo->size = 0; + for (i = 0; i < vinfo->num_attribs; i++) { + switch (vinfo->attrib[i].emit) { + case EMIT_OMIT: + break; + case EMIT_4UB: + /* fall-through */ + case EMIT_1F_PSIZE: + /* fall-through */ + case EMIT_1F: + vinfo->size += 1; + break; + case EMIT_2F: + vinfo->size += 2; + break; + case EMIT_3F: + vinfo->size += 3; + break; + case EMIT_4F: + vinfo->size += 4; + break; + default: + assert(0); + } + } +} + + +void +draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) +{ + unsigned i, j; + + for (i = 0; i < vinfo->num_attribs; i++) { + j = vinfo->attrib[i].src_index; + switch (vinfo->attrib[i].emit) { + case EMIT_OMIT: + debug_printf("EMIT_OMIT:"); + break; + case EMIT_1F: + debug_printf("EMIT_1F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_1F_PSIZE: + debug_printf("EMIT_1F_PSIZE:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_2F: + debug_printf("EMIT_2F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_3F: + debug_printf("EMIT_3F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + data += sizeof(float); + break; + case EMIT_4F: + debug_printf("EMIT_4F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_4UB: + debug_printf("EMIT_4UB:\t"); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + break; + default: + assert(0); + } + debug_printf("\n"); + } + debug_printf("\n"); +} diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h new file mode 100644 index 0000000000..a943607d7e --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -0,0 +1,163 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Post-transform vertex format info. The vertex_info struct is used by + * the draw_vbuf code to emit hardware-specific vertex layouts into hw + * vertex buffers. + * + * Author: + * Brian Paul + */ + + +#ifndef DRAW_VERTEX_H +#define DRAW_VERTEX_H + + +#include "pipe/p_state.h" + + +/** + * Vertex attribute emit modes + */ +enum attrib_emit { + EMIT_OMIT, /**< don't emit the attribute */ + EMIT_1F, + EMIT_1F_PSIZE, /**< insert constant point size */ + EMIT_2F, + EMIT_3F, + EMIT_4F, + EMIT_4UB /**< XXX may need variations for RGBA vs BGRA, etc */ +}; + + +/** + * Attribute interpolation mode + */ +enum interp_mode { + INTERP_NONE, /**< never interpolate vertex header info */ + INTERP_POS, /**< special case for frag position */ + INTERP_CONSTANT, + INTERP_LINEAR, + INTERP_PERSPECTIVE +}; + + +/** + * Information about hardware/rasterization vertex layout. + */ +struct vertex_info +{ + uint num_attribs; + uint hwfmt[4]; /**< hardware format info for this format */ + uint size; /**< total vertex size in dwords */ + + /* Keep this small and at the end of the struct to allow quick + * memcmp() comparisons. + */ + struct { + ubyte interp_mode:4; /**< INTERP_x */ + ubyte emit:4; /**< EMIT_x */ + ubyte src_index; /**< map to post-xform attribs */ + } attrib[PIPE_MAX_SHADER_INPUTS]; +}; + +static INLINE int +draw_vinfo_size( const struct vertex_info *a ) +{ + return ((const char *)&a->attrib[a->num_attribs] - + (const char *)a); +} + +static INLINE int +draw_vinfo_compare( const struct vertex_info *a, + const struct vertex_info *b ) +{ + unsigned sizea = draw_vinfo_size( a ); + return memcmp( a, b, sizea ); +} + +static INLINE void +draw_vinfo_copy( struct vertex_info *dst, + const struct vertex_info *src ) +{ + unsigned size = draw_vinfo_size( src ); + memcpy( dst, src, size ); +} + + + +/** + * Add another attribute to the given vertex_info object. + * \param src_index indicates which post-transformed vertex attrib slot + * corresponds to this attribute. + * \return slot in which the attribute was added + */ +static INLINE uint +draw_emit_vertex_attr(struct vertex_info *vinfo, + enum attrib_emit emit, + enum interp_mode interp, /* only used by softpipe??? */ + uint src_index) +{ + const uint n = vinfo->num_attribs; + assert(n < PIPE_MAX_SHADER_INPUTS); + vinfo->attrib[n].emit = emit; + vinfo->attrib[n].interp_mode = interp; + vinfo->attrib[n].src_index = src_index; + vinfo->num_attribs++; + return n; +} + + +extern void draw_compute_vertex_size(struct vertex_info *vinfo); + +void draw_dump_emitted_vertex(const struct vertex_info *vinfo, + const uint8_t *data); + + +static INLINE unsigned draw_translate_vinfo_format(unsigned format ) +{ + switch (format) { + case EMIT_1F: + case EMIT_1F_PSIZE: + return PIPE_FORMAT_R32_FLOAT; + case EMIT_2F: + return PIPE_FORMAT_R32G32_FLOAT; + case EMIT_3F: + return PIPE_FORMAT_R32G32B32_FLOAT; + case EMIT_4F: + return PIPE_FORMAT_R32G32B32A32_FLOAT; + case EMIT_4UB: + return PIPE_FORMAT_R8G8B8A8_UNORM; + default: + return PIPE_FORMAT_NONE; + } +} + + +#endif /* DRAW_VERTEX_H */ diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c new file mode 100644 index 0000000000..7f305304ff --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -0,0 +1,267 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "pipe/p_shader_tokens.h" + +#include "draw_private.h" +#include "draw_context.h" +#include "draw_vs.h" + +#include "translate/translate.h" +#include "translate/translate_cache.h" + + + + +void draw_vs_set_constants( struct draw_context *draw, + const float (*constants)[4], + unsigned size ) +{ + if (((unsigned)constants) & 0xf) { + if (size > draw->vs.const_storage_size) { + if (draw->vs.aligned_constant_storage) + align_free((void *)draw->vs.aligned_constant_storage); + draw->vs.aligned_constant_storage = align_malloc( size, 16 ); + } + memcpy( (void*)draw->vs.aligned_constant_storage, + constants, + size ); + constants = draw->vs.aligned_constant_storage; + } + + draw->vs.aligned_constants = constants; + draw_vs_aos_machine_constants( draw->vs.aos_machine, constants ); +} + + +void draw_vs_set_viewport( struct draw_context *draw, + const struct pipe_viewport_state *viewport ) +{ + draw_vs_aos_machine_viewport( draw->vs.aos_machine, viewport ); +} + + + +struct draw_vertex_shader * +draw_create_vertex_shader(struct draw_context *draw, + const struct pipe_shader_state *shader) +{ + struct draw_vertex_shader *vs; + + vs = draw_create_vs_llvm( draw, shader ); + if (!vs) { + vs = draw_create_vs_sse( draw, shader ); + if (!vs) { + vs = draw_create_vs_ppc( draw, shader ); + if (!vs) { + vs = draw_create_vs_exec( draw, shader ); + } + } + } + + if (vs) + { + uint i; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && + vs->info.output_semantic_index[i] == 0) + vs->position_output = i; + } + } + + assert(vs); + return vs; +} + + +void +draw_bind_vertex_shader(struct draw_context *draw, + struct draw_vertex_shader *dvs) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + if (dvs) + { + draw->vs.vertex_shader = dvs; + draw->vs.num_vs_outputs = dvs->info.num_outputs; + draw->vs.position_output = dvs->position_output; + dvs->prepare( dvs, draw ); + } + else { + draw->vs.vertex_shader = NULL; + draw->vs.num_vs_outputs = 0; + } +} + + +void +draw_delete_vertex_shader(struct draw_context *draw, + struct draw_vertex_shader *dvs) +{ + unsigned i; + + for (i = 0; i < dvs->nr_varients; i++) + dvs->varient[i]->destroy( dvs->varient[i] ); + + dvs->nr_varients = 0; + + dvs->delete( dvs ); +} + + + +boolean +draw_vs_init( struct draw_context *draw ) +{ + tgsi_exec_machine_init(&draw->vs.machine); + + /* FIXME: give this machine thing a proper constructor: + */ + draw->vs.machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + if (!draw->vs.machine.Inputs) + return FALSE; + + draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + if (!draw->vs.machine.Outputs) + return FALSE; + + draw->vs.emit_cache = translate_cache_create(); + if (!draw->vs.emit_cache) + return FALSE; + + draw->vs.fetch_cache = translate_cache_create(); + if (!draw->vs.fetch_cache) + return FALSE; + + draw->vs.aos_machine = draw_vs_aos_machine(); +#ifdef PIPE_ARCH_X86 + if (!draw->vs.aos_machine) + return FALSE; +#endif + + return TRUE; +} + +void +draw_vs_destroy( struct draw_context *draw ) +{ + if (draw->vs.machine.Inputs) + align_free(draw->vs.machine.Inputs); + + if (draw->vs.machine.Outputs) + align_free(draw->vs.machine.Outputs); + + if (draw->vs.fetch_cache) + translate_cache_destroy(draw->vs.fetch_cache); + + if (draw->vs.emit_cache) + translate_cache_destroy(draw->vs.emit_cache); + + if (draw->vs.aos_machine) + draw_vs_aos_machine_destroy(draw->vs.aos_machine); + + if (draw->vs.aligned_constant_storage) + align_free((void*)draw->vs.aligned_constant_storage); + + tgsi_exec_machine_free_data(&draw->vs.machine); + +} + + +struct draw_vs_varient * +draw_vs_lookup_varient( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) +{ + struct draw_vs_varient *varient; + unsigned i; + + /* Lookup existing varient: + */ + for (i = 0; i < vs->nr_varients; i++) + if (draw_vs_varient_key_compare(key, &vs->varient[i]->key) == 0) + return vs->varient[i]; + + /* Else have to create a new one: + */ + varient = vs->create_varient( vs, key ); + if (varient == NULL) + return NULL; + + /* Add it to our list, could be smarter: + */ + if (vs->nr_varients < Elements(vs->varient)) { + vs->varient[vs->nr_varients++] = varient; + } + else { + vs->last_varient++; + vs->last_varient %= Elements(vs->varient); + vs->varient[vs->last_varient]->destroy(vs->varient[vs->last_varient]); + vs->varient[vs->last_varient] = varient; + } + + /* Done + */ + return varient; +} + + +struct translate * +draw_vs_get_fetch( struct draw_context *draw, + struct translate_key *key ) +{ + if (!draw->vs.fetch || + translate_key_compare(&draw->vs.fetch->key, key) != 0) + { + translate_key_sanitize(key); + draw->vs.fetch = translate_cache_find(draw->vs.fetch_cache, key); + } + + return draw->vs.fetch; +} + +struct translate * +draw_vs_get_emit( struct draw_context *draw, + struct translate_key *key ) +{ + if (!draw->vs.emit || + translate_key_compare(&draw->vs.emit->key, key) != 0) + { + translate_key_sanitize(key); + draw->vs.emit = translate_cache_find(draw->vs.emit_cache, key); + } + + return draw->vs.emit; +} diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h new file mode 100644 index 0000000000..89ae158751 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -0,0 +1,224 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef DRAW_VS_H +#define DRAW_VS_H + +#include "draw_context.h" +#include "draw_private.h" + + +struct draw_context; +struct pipe_shader_state; + +struct draw_varient_input +{ + enum pipe_format format; + unsigned buffer; + unsigned offset; +}; + +struct draw_varient_output +{ + enum pipe_format format; /* output format */ + unsigned vs_output:8; /* which vertex shader output is this? */ + unsigned offset:24; /* offset into output vertex */ +}; + +struct draw_varient_element { + struct draw_varient_input in; + struct draw_varient_output out; +}; + +struct draw_vs_varient_key { + unsigned output_stride; + unsigned nr_elements:8; /* max2(nr_inputs, nr_outputs) */ + unsigned nr_inputs:8; + unsigned nr_outputs:8; + unsigned viewport:1; + unsigned clip:1; + unsigned const_vbuffers:5; + struct draw_varient_element element[PIPE_MAX_ATTRIBS]; +}; + +struct draw_vs_varient; + + +struct draw_vs_varient { + struct draw_vs_varient_key key; + + struct draw_vertex_shader *vs; + + void (*set_buffer)( struct draw_vs_varient *, + unsigned i, + const void *ptr, + unsigned stride ); + + void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader, + unsigned start, + unsigned count, + void *output_buffer ); + + void (PIPE_CDECL *run_elts)( struct draw_vs_varient *shader, + const unsigned *elts, + unsigned count, + void *output_buffer ); + + void (*destroy)( struct draw_vs_varient * ); +}; + + +/** + * Private version of the compiled vertex_shader + */ +struct draw_vertex_shader { + struct draw_context *draw; + + /* This member will disappear shortly: + */ + struct pipe_shader_state state; + + struct tgsi_shader_info info; + unsigned position_output; + + /* Extracted from shader: + */ + const float (*immediates)[4]; + + /* + */ + struct draw_vs_varient *varient[16]; + unsigned nr_varients; + unsigned last_varient; + struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader, + const struct draw_vs_varient_key *key ); + + + void (*prepare)( struct draw_vertex_shader *shader, + struct draw_context *draw ); + + /* Run the shader - this interface will get cleaned up in the + * future: + */ + void (*run_linear)( struct draw_vertex_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ); + + + void (*delete)( struct draw_vertex_shader * ); +}; + + +struct draw_vs_varient * +draw_vs_lookup_varient( struct draw_vertex_shader *base, + const struct draw_vs_varient_key *key ); + + +/******************************************************************************** + * Internal functions: + */ + +struct draw_vertex_shader * +draw_create_vs_exec(struct draw_context *draw, + const struct pipe_shader_state *templ); + +struct draw_vertex_shader * +draw_create_vs_sse(struct draw_context *draw, + const struct pipe_shader_state *templ); + +struct draw_vertex_shader * +draw_create_vs_ppc(struct draw_context *draw, + const struct pipe_shader_state *templ); + +struct draw_vertex_shader * +draw_create_vs_llvm(struct draw_context *draw, + const struct pipe_shader_state *templ); + + + +struct draw_vs_varient_key; +struct draw_vertex_shader; + +struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ); + + + +/******************************************************************************** + * Helpers for vs implementations that don't do their own fetch/emit varients. + * Means these can be shared between shaders. + */ +struct translate; +struct translate_key; + +struct translate *draw_vs_get_fetch( struct draw_context *draw, + struct translate_key *key ); + + +struct translate *draw_vs_get_emit( struct draw_context *draw, + struct translate_key *key ); + +struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ); + + + +static INLINE int draw_vs_varient_keysize( const struct draw_vs_varient_key *key ) +{ + return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_varient_element); +} + +static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key *a, + const struct draw_vs_varient_key *b ) +{ + int keysize = draw_vs_varient_keysize(a); + return memcmp(a, b, keysize); +} + + +struct aos_machine *draw_vs_aos_machine( void ); +void draw_vs_aos_machine_destroy( struct aos_machine *machine ); + +void draw_vs_aos_machine_constants( struct aos_machine *machine, + const float (*constants)[4] ); + +void draw_vs_aos_machine_viewport( struct aos_machine *machine, + const struct pipe_viewport_state *viewport ); + + +#define MAX_TGSI_VERTICES 4 + + + +#endif diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c new file mode 100644 index 0000000000..87232865e2 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -0,0 +1,2231 @@ +/* + * Mesa 3-D graphics library + * Version: 6.3 + * + * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Translate tgsi vertex programs to x86/x87/SSE/SSE2 machine code + * using the rtasm runtime assembler. Based on the old + * t_vb_arb_program_sse.c + */ + + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_debug.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" + +#include "draw_vs.h" +#include "draw_vs_aos.h" + +#include "rtasm/rtasm_x86sse.h" + +#ifdef PIPE_ARCH_X86 +#define DISASSEM 0 +#define FAST_MATH 1 + +static const char *files[] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM", + "INTERNAL", +}; + +static INLINE boolean eq( struct x86_reg a, + struct x86_reg b ) +{ + return (a.file == b.file && + a.idx == b.idx && + a.mod == b.mod && + a.disp == b.disp); +} + +struct x86_reg aos_get_x86( struct aos_compilation *cp, + unsigned which_reg, /* quick hack */ + unsigned value ) +{ + struct x86_reg reg; + + if (which_reg == 0) + reg = cp->temp_EBP; + else + reg = cp->tmp_EAX; + + if (cp->x86_reg[which_reg] != value) { + unsigned offset; + + switch (value) { + case X86_IMMEDIATES: + assert(which_reg == 0); + offset = Offset(struct aos_machine, immediates); + break; + case X86_CONSTANTS: + assert(which_reg == 1); + offset = Offset(struct aos_machine, constants); + break; + case X86_BUFFERS: + assert(which_reg == 0); + offset = Offset(struct aos_machine, buffer); + break; + default: + assert(0); + offset = 0; + } + + + x86_mov(cp->func, reg, + x86_make_disp(cp->machine_EDX, offset)); + + cp->x86_reg[which_reg] = value; + } + + return reg; +} + + +static struct x86_reg get_reg_ptr(struct aos_compilation *cp, + unsigned file, + unsigned idx ) +{ + struct x86_reg ptr = cp->machine_EDX; + + switch (file) { + case TGSI_FILE_INPUT: + assert(idx < MAX_INPUTS); + return x86_make_disp(ptr, Offset(struct aos_machine, input[idx])); + + case TGSI_FILE_OUTPUT: + return x86_make_disp(ptr, Offset(struct aos_machine, output[idx])); + + case TGSI_FILE_TEMPORARY: + assert(idx < MAX_TEMPS); + return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx])); + + case AOS_FILE_INTERNAL: + assert(idx < MAX_INTERNALS); + return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx])); + + case TGSI_FILE_IMMEDIATE: + assert(idx < MAX_IMMEDIATES); /* just a sanity check */ + return x86_make_disp(aos_get_x86(cp, 0, X86_IMMEDIATES), idx * 4 * sizeof(float)); + + case TGSI_FILE_CONSTANT: + assert(idx < MAX_CONSTANTS); /* just a sanity check */ + return x86_make_disp(aos_get_x86(cp, 1, X86_CONSTANTS), idx * 4 * sizeof(float)); + + default: + ERROR(cp, "unknown reg file"); + return x86_make_reg(0,0); + } +} + + + +#define X87_CW_EXCEPTION_INV_OP (1<<0) +#define X87_CW_EXCEPTION_DENORM_OP (1<<1) +#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2) +#define X87_CW_EXCEPTION_OVERFLOW (1<<3) +#define X87_CW_EXCEPTION_UNDERFLOW (1<<4) +#define X87_CW_EXCEPTION_PRECISION (1<<5) +#define X87_CW_PRECISION_SINGLE (0<<8) +#define X87_CW_PRECISION_RESERVED (1<<8) +#define X87_CW_PRECISION_DOUBLE (2<<8) +#define X87_CW_PRECISION_DOUBLE_EXT (3<<8) +#define X87_CW_PRECISION_MASK (3<<8) +#define X87_CW_ROUND_NEAREST (0<<10) +#define X87_CW_ROUND_DOWN (1<<10) +#define X87_CW_ROUND_UP (2<<10) +#define X87_CW_ROUND_ZERO (3<<10) +#define X87_CW_ROUND_MASK (3<<10) +#define X87_CW_INFINITY (1<<12) + + + + +static void spill( struct aos_compilation *cp, unsigned idx ) +{ + if (!cp->xmm[idx].dirty || + (cp->xmm[idx].file != TGSI_FILE_INPUT && /* inputs are fetched into xmm & set dirty */ + cp->xmm[idx].file != TGSI_FILE_OUTPUT && + cp->xmm[idx].file != TGSI_FILE_TEMPORARY)) { + ERROR(cp, "invalid spill"); + return; + } + else { + struct x86_reg oldval = get_reg_ptr(cp, + cp->xmm[idx].file, + cp->xmm[idx].idx); + + if (0) debug_printf("\nspill %s[%d]", + files[cp->xmm[idx].file], + cp->xmm[idx].idx); + + assert(cp->xmm[idx].dirty); + sse_movaps(cp->func, oldval, x86_make_reg(file_XMM, idx)); + cp->xmm[idx].dirty = 0; + } +} + + +void aos_spill_all( struct aos_compilation *cp ) +{ + unsigned i; + + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } +} + + +static struct x86_reg get_xmm_writable( struct aos_compilation *cp, + struct x86_reg reg ) +{ + if (reg.file != file_XMM || + cp->xmm[reg.idx].file != TGSI_FILE_NULL) + { + struct x86_reg tmp = aos_get_xmm_reg(cp); + sse_movaps(cp->func, tmp, reg); + reg = tmp; + } + + cp->xmm[reg.idx].last_used = cp->insn_counter; + return reg; +} + +static struct x86_reg get_xmm( struct aos_compilation *cp, + struct x86_reg reg ) +{ + if (reg.file != file_XMM) + { + struct x86_reg tmp = aos_get_xmm_reg(cp); + sse_movaps(cp->func, tmp, reg); + reg = tmp; + } + + cp->xmm[reg.idx].last_used = cp->insn_counter; + return reg; +} + + +/* Allocate an empty xmm register, either as a temporary or later to + * "adopt" as a shader reg. + */ +struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp ) +{ + unsigned i; + unsigned oldest = 0; + boolean found = FALSE; + + for (i = 0; i < 8; i++) + if (cp->xmm[i].last_used != cp->insn_counter && + cp->xmm[i].file == TGSI_FILE_NULL) { + oldest = i; + found = TRUE; + } + + if (!found) { + for (i = 0; i < 8; i++) + if (cp->xmm[i].last_used < cp->xmm[oldest].last_used) + oldest = i; + } + + /* Need to write out the old value? + */ + if (cp->xmm[oldest].dirty) + spill(cp, oldest); + + assert(cp->xmm[oldest].last_used != cp->insn_counter); + + cp->xmm[oldest].file = TGSI_FILE_NULL; + cp->xmm[oldest].idx = 0; + cp->xmm[oldest].dirty = 0; + cp->xmm[oldest].last_used = cp->insn_counter; + return x86_make_reg(file_XMM, oldest); +} + +void aos_release_xmm_reg( struct aos_compilation *cp, + unsigned idx ) +{ + cp->xmm[idx].file = TGSI_FILE_NULL; + cp->xmm[idx].idx = 0; + cp->xmm[idx].dirty = 0; + cp->xmm[idx].last_used = 0; +} + + + + +/* Mark an xmm reg as holding the current copy of a shader reg. + */ +void aos_adopt_xmm_reg( struct aos_compilation *cp, + struct x86_reg reg, + unsigned file, + unsigned idx, + unsigned dirty ) +{ + unsigned i; + + if (reg.file != file_XMM) { + assert(0); + return; + } + + + /* If any xmm reg thinks it holds this shader reg, break the + * illusion. + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].file == file && + cp->xmm[i].idx == idx) + { + /* If an xmm reg is already holding this shader reg, take into account its + * dirty flag... + */ + dirty |= cp->xmm[i].dirty; + aos_release_xmm_reg(cp, i); + } + } + + cp->xmm[reg.idx].file = file; + cp->xmm[reg.idx].idx = idx; + cp->xmm[reg.idx].dirty = dirty; + cp->xmm[reg.idx].last_used = cp->insn_counter; +} + + +/* Return a pointer to the in-memory copy of the reg, making sure it is uptodate. + */ +static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp, + unsigned file, + unsigned idx ) +{ + unsigned i; + + /* Ensure the in-memory copy of this reg is up-to-date + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].file == file && + cp->xmm[i].idx == idx && + cp->xmm[i].dirty) { + spill(cp, i); + } + } + + return get_reg_ptr( cp, file, idx ); +} + + +/* As above, but return a pointer. Note - this pointer may alias + * those returned by get_arg_ptr(). + */ +static struct x86_reg get_dst_ptr( struct aos_compilation *cp, + const struct tgsi_full_dst_register *dst ) +{ + unsigned file = dst->DstRegister.File; + unsigned idx = dst->DstRegister.Index; + unsigned i; + + + /* Ensure in-memory copy of this reg is up-to-date and invalidate + * any xmm copies. + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].file == file && + cp->xmm[i].idx == idx) + { + if (cp->xmm[i].dirty) + spill(cp, i); + + aos_release_xmm_reg(cp, i); + } + } + + return get_reg_ptr( cp, file, idx ); +} + + + + + +/* Return an XMM reg if the argument is resident, otherwise return a + * base+offset pointer to the saved value. + */ +struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, + unsigned file, + unsigned idx ) +{ + unsigned i; + + for (i = 0; i < 8; i++) { + if (cp->xmm[i].file == file && + cp->xmm[i].idx == idx) + { + cp->xmm[i].last_used = cp->insn_counter; + return x86_make_reg(file_XMM, i); + } + } + + /* If not found in the XMM register file, return an indirect + * reference to the in-memory copy: + */ + return get_reg_ptr( cp, file, idx ); +} + + + +static struct x86_reg aos_get_shader_reg_xmm( struct aos_compilation *cp, + unsigned file, + unsigned idx ) +{ + struct x86_reg reg = get_xmm( cp, + aos_get_shader_reg( cp, file, idx ) ); + + aos_adopt_xmm_reg( cp, + reg, + file, + idx, + FALSE ); + + return reg; +} + + + +struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp, + unsigned imm ) +{ + return aos_get_shader_reg_xmm( cp, AOS_FILE_INTERNAL, imm ); +} + + +struct x86_reg aos_get_internal( struct aos_compilation *cp, + unsigned imm ) +{ + return aos_get_shader_reg( cp, AOS_FILE_INTERNAL, imm ); +} + + + + + +/* Emulate pshufd insn in regular SSE, if necessary: + */ +static void emit_pshufd( struct aos_compilation *cp, + struct x86_reg dst, + struct x86_reg arg0, + ubyte shuf ) +{ + if (cp->have_sse2) { + sse2_pshufd(cp->func, dst, arg0, shuf); + } + else { + if (!eq(dst, arg0)) + sse_movaps(cp->func, dst, arg0); + + sse_shufps(cp->func, dst, dst, shuf); + } +} + +/* load masks (pack into negs??) + * pshufd - shuffle according to writemask + * and - result, mask + * nand - dest, mask + * or - dest, result + */ +static boolean mask_write( struct aos_compilation *cp, + struct x86_reg dst, + struct x86_reg result, + unsigned mask ) +{ + struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ); + struct x86_reg tmp = aos_get_xmm_reg(cp); + + emit_pshufd(cp, tmp, imm_swz, + SHUF((mask & 1) ? 2 : 3, + (mask & 2) ? 2 : 3, + (mask & 4) ? 2 : 3, + (mask & 8) ? 2 : 3)); + + sse_andps(cp->func, dst, tmp); + sse_andnps(cp->func, tmp, result); + sse_orps(cp->func, dst, tmp); + + aos_release_xmm_reg(cp, tmp.idx); + return TRUE; +} + + + + +/* Helper for writemask: + */ +static boolean emit_shuf_copy2( struct aos_compilation *cp, + struct x86_reg dst, + struct x86_reg arg0, + struct x86_reg arg1, + ubyte shuf ) +{ + struct x86_reg tmp = aos_get_xmm_reg(cp); + + emit_pshufd(cp, dst, arg1, shuf); + emit_pshufd(cp, tmp, arg0, shuf); + sse_shufps(cp->func, dst, tmp, SHUF(X, Y, Z, W)); + emit_pshufd(cp, dst, dst, shuf); + + aos_release_xmm_reg(cp, tmp.idx); + return TRUE; +} + + + +#define SSE_SWIZZLE_NOOP ((0<<0) | (1<<2) | (2<<4) | (3<<6)) + + +/* Locate a source register and perform any required (simple) swizzle. + * + * Just fail on complex swizzles at this point. + */ +static struct x86_reg fetch_src( struct aos_compilation *cp, + const struct tgsi_full_src_register *src ) +{ + struct x86_reg arg0 = aos_get_shader_reg(cp, + src->SrcRegister.File, + src->SrcRegister.Index); + unsigned i; + ubyte swz = 0; + unsigned negs = 0; + unsigned abs = 0; + + for (i = 0; i < 4; i++) { + unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, i ); + unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, i ); + + switch (swizzle) { + case TGSI_EXTSWIZZLE_ZERO: + case TGSI_EXTSWIZZLE_ONE: + ERROR(cp, "not supporting full swizzles yet in tgsi_aos_sse2"); + break; + + default: + swz |= (swizzle & 0x3) << (i * 2); + break; + } + + switch (neg) { + case TGSI_UTIL_SIGN_TOGGLE: + negs |= (1<<i); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + + case TGSI_UTIL_SIGN_CLEAR: + abs |= (1<<i); + break; + + default: + ERROR(cp, "unsupported sign-mode"); + break; + } + } + + if (swz != SSE_SWIZZLE_NOOP || negs != 0 || abs != 0) { + struct x86_reg dst = aos_get_xmm_reg(cp); + + if (swz != SSE_SWIZZLE_NOOP) + emit_pshufd(cp, dst, arg0, swz); + else + sse_movaps(cp->func, dst, arg0); + + if (negs && negs != 0xf) { + struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ); + struct x86_reg tmp = aos_get_xmm_reg(cp); + + /* Load 1,-1,0,0 + * Use neg as arg to pshufd + * Multiply + */ + emit_pshufd(cp, tmp, imm_swz, + SHUF((negs & 1) ? 1 : 0, + (negs & 2) ? 1 : 0, + (negs & 4) ? 1 : 0, + (negs & 8) ? 1 : 0)); + sse_mulps(cp->func, dst, tmp); + + aos_release_xmm_reg(cp, tmp.idx); + } + else if (negs) { + struct x86_reg imm_negs = aos_get_internal_xmm(cp, IMM_NEGS); + sse_mulps(cp->func, dst, imm_negs); + } + + + if (abs && abs != 0xf) { + ERROR(cp, "unsupported partial abs"); + } + else if (abs) { + struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); + struct x86_reg tmp = aos_get_xmm_reg(cp); + + sse_movaps(cp->func, tmp, dst); + sse_mulps(cp->func, tmp, neg); + sse_maxps(cp->func, dst, tmp); + + aos_release_xmm_reg(cp, tmp.idx); + } + + return dst; + } + + return arg0; +} + +static void x87_fld_src( struct aos_compilation *cp, + const struct tgsi_full_src_register *src, + unsigned channel ) +{ + struct x86_reg arg0 = aos_get_shader_reg_ptr(cp, + src->SrcRegister.File, + src->SrcRegister.Index); + + unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, channel ); + unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, channel ); + + switch (swizzle) { + case TGSI_EXTSWIZZLE_ZERO: + x87_fldz( cp->func ); + break; + + case TGSI_EXTSWIZZLE_ONE: + x87_fld1( cp->func ); + break; + + default: + x87_fld( cp->func, x86_make_disp(arg0, (swizzle & 3) * sizeof(float)) ); + break; + } + + + switch (neg) { + case TGSI_UTIL_SIGN_TOGGLE: + /* Flip the sign: + */ + x87_fchs( cp->func ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + + case TGSI_UTIL_SIGN_CLEAR: + x87_fabs( cp->func ); + break; + + case TGSI_UTIL_SIGN_SET: + x87_fabs( cp->func ); + x87_fchs( cp->func ); + break; + + default: + ERROR(cp, "unsupported sign-mode"); + break; + } +} + + + + + + +/* Used to implement write masking. This and most of the other instructions + * here would be easier to implement if there had been a translation + * to a 2 argument format (dst/arg0, arg1) at the shader level before + * attempting to translate to x86/sse code. + */ +static void store_dest( struct aos_compilation *cp, + const struct tgsi_full_dst_register *reg, + struct x86_reg result ) +{ + struct x86_reg dst; + + switch (reg->DstRegister.WriteMask) { + case 0: + return; + + case TGSI_WRITEMASK_XYZW: + aos_adopt_xmm_reg(cp, + get_xmm_writable(cp, result), + reg->DstRegister.File, + reg->DstRegister.Index, + TRUE); + return; + default: + break; + } + + dst = aos_get_shader_reg_xmm(cp, + reg->DstRegister.File, + reg->DstRegister.Index); + + switch (reg->DstRegister.WriteMask) { + case TGSI_WRITEMASK_X: + sse_movss(cp->func, dst, get_xmm(cp, result)); + break; + + case TGSI_WRITEMASK_ZW: + sse_shufps(cp->func, dst, get_xmm(cp, result), SHUF(X, Y, Z, W)); + break; + + case TGSI_WRITEMASK_XY: + result = get_xmm_writable(cp, result); + sse_shufps(cp->func, result, dst, SHUF(X, Y, Z, W)); + dst = result; + break; + + case TGSI_WRITEMASK_YZW: + result = get_xmm_writable(cp, result); + sse_movss(cp->func, result, dst); + dst = result; + break; + + default: + mask_write(cp, dst, result, reg->DstRegister.WriteMask); + break; + } + + aos_adopt_xmm_reg(cp, + dst, + reg->DstRegister.File, + reg->DstRegister.Index, + TRUE); + +} + +static void inject_scalar( struct aos_compilation *cp, + struct x86_reg dst, + struct x86_reg result, + ubyte swizzle ) +{ + sse_shufps(cp->func, dst, dst, swizzle); + sse_movss(cp->func, dst, result); + sse_shufps(cp->func, dst, dst, swizzle); +} + + +static void store_scalar_dest( struct aos_compilation *cp, + const struct tgsi_full_dst_register *reg, + struct x86_reg result ) +{ + unsigned writemask = reg->DstRegister.WriteMask; + struct x86_reg dst; + + if (writemask != TGSI_WRITEMASK_X && + writemask != TGSI_WRITEMASK_Y && + writemask != TGSI_WRITEMASK_Z && + writemask != TGSI_WRITEMASK_W && + writemask != 0) + { + result = get_xmm_writable(cp, result); /* already true, right? */ + sse_shufps(cp->func, result, result, SHUF(X,X,X,X)); + store_dest(cp, reg, result); + return; + } + + result = get_xmm(cp, result); + dst = aos_get_shader_reg_xmm(cp, + reg->DstRegister.File, + reg->DstRegister.Index); + + + + switch (reg->DstRegister.WriteMask) { + case TGSI_WRITEMASK_X: + sse_movss(cp->func, dst, result); + break; + + case TGSI_WRITEMASK_Y: + inject_scalar(cp, dst, result, SHUF(Y, X, Z, W)); + break; + + case TGSI_WRITEMASK_Z: + inject_scalar(cp, dst, result, SHUF(Z, Y, X, W)); + break; + + case TGSI_WRITEMASK_W: + inject_scalar(cp, dst, result, SHUF(W, Y, Z, X)); + break; + + default: + break; + } + + aos_adopt_xmm_reg(cp, + dst, + reg->DstRegister.File, + reg->DstRegister.Index, + TRUE); +} + + + +static void x87_fst_or_nop( struct x86_function *func, + unsigned writemask, + unsigned channel, + struct x86_reg ptr ) +{ + assert(ptr.file == file_REG32); + if (writemask & (1<<channel)) + x87_fst( func, x86_make_disp(ptr, channel * sizeof(float)) ); +} + +static void x87_fstp_or_pop( struct x86_function *func, + unsigned writemask, + unsigned channel, + struct x86_reg ptr ) +{ + assert(ptr.file == file_REG32); + if (writemask & (1<<channel)) + x87_fstp( func, x86_make_disp(ptr, channel * sizeof(float)) ); + else + x87_fstp( func, x86_make_reg( file_x87, 0 )); +} + + + +/* + */ +static void x87_fstp_dest4( struct aos_compilation *cp, + const struct tgsi_full_dst_register *dst ) +{ + struct x86_reg ptr = get_dst_ptr(cp, dst); + unsigned writemask = dst->DstRegister.WriteMask; + + x87_fst_or_nop(cp->func, writemask, 0, ptr); + x87_fst_or_nop(cp->func, writemask, 1, ptr); + x87_fst_or_nop(cp->func, writemask, 2, ptr); + x87_fstp_or_pop(cp->func, writemask, 3, ptr); +} + +/* Save current x87 state and put it into single precision mode. + */ +static void save_fpu_state( struct aos_compilation *cp ) +{ + x87_fnstcw( cp->func, x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, fpu_restore))); +} + +static void restore_fpu_state( struct aos_compilation *cp ) +{ + x87_fnclex(cp->func); + x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, fpu_restore))); +} + +static void set_fpu_round_neg_inf( struct aos_compilation *cp ) +{ + if (cp->fpucntl != FPU_RND_NEG) { + cp->fpucntl = FPU_RND_NEG; + x87_fnclex(cp->func); + x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, fpu_rnd_neg_inf))); + } +} + +static void set_fpu_round_nearest( struct aos_compilation *cp ) +{ + if (cp->fpucntl != FPU_RND_NEAREST) { + cp->fpucntl = FPU_RND_NEAREST; + x87_fnclex(cp->func); + x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, fpu_rnd_nearest))); + } +} + + +static void x87_emit_ex2( struct aos_compilation *cp ) +{ + struct x86_reg st0 = x86_make_reg(file_x87, 0); + struct x86_reg st1 = x86_make_reg(file_x87, 1); + int stack = cp->func->x87_stack; + +// set_fpu_round_neg_inf( cp ); + + x87_fld(cp->func, st0); /* a a */ + x87_fprndint( cp->func ); /* int(a) a*/ + x87_fsubr(cp->func, st1, st0); /* int(a) frc(a) */ + x87_fxch(cp->func, st1); /* frc(a) int(a) */ + x87_f2xm1(cp->func); /* (2^frc(a))-1 int(a) */ + x87_fld1(cp->func); /* 1 (2^frc(a))-1 int(a) */ + x87_faddp(cp->func, st1); /* 2^frac(a) int(a) */ + x87_fscale(cp->func); /* (2^frac(a)*2^int(int(a))) int(a) */ + /* 2^a int(a) */ + x87_fstp(cp->func, st1); /* 2^a */ + + assert( stack == cp->func->x87_stack); + +} + +static void PIPE_CDECL print_reg( const char *msg, + const float *reg ) +{ + debug_printf("%s: %f %f %f %f\n", msg, reg[0], reg[1], reg[2], reg[3]); +} + +static void emit_print( struct aos_compilation *cp, + const char *message, /* must point to a static string! */ + unsigned file, + unsigned idx ) +{ + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + struct x86_reg arg = aos_get_shader_reg_ptr( cp, file, idx ); + unsigned i; + + /* There shouldn't be anything on the x87 stack. Can add this + * capacity later if need be. + */ + assert(cp->func->x87_stack == 0); + + /* For absolute correctness, need to spill/invalidate all XMM regs + * too. We're obviously not concerned about performance on this + * debug path, so here goes: + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + + aos_release_xmm_reg(cp, i); + } + + /* Push caller-save (ie scratch) regs. + */ + x86_cdecl_caller_push_regs( cp->func ); + + + /* Push the arguments: + */ + x86_lea( cp->func, ecx, arg ); + x86_push( cp->func, ecx ); + x86_push_imm32( cp->func, (int)message ); + + /* Call the helper. Could call debug_printf directly, but + * print_reg is a nice place to put a breakpoint if need be. + */ + x86_mov_reg_imm( cp->func, ecx, (int)print_reg ); + x86_call( cp->func, ecx ); + x86_pop( cp->func, ecx ); + x86_pop( cp->func, ecx ); + + /* Pop caller-save regs + */ + x86_cdecl_caller_pop_regs( cp->func ); + + /* Done... + */ +} + +/** + * The traditional instructions. All operate on internal registers + * and ignore write masks and swizzling issues. + */ + +static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); + struct x86_reg tmp = aos_get_xmm_reg(cp); + + sse_movaps(cp->func, tmp, arg0); + sse_mulps(cp->func, tmp, neg); + sse_maxps(cp->func, tmp, arg0); + + store_dest(cp, &op->FullDstRegisters[0], tmp); + return TRUE; +} + +static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = get_xmm_writable(cp, arg0); + + sse_addps(cp->func, dst, arg1); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + x87_fcos(cp->func); + x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + return TRUE; +} + +/* The dotproduct instructions don't really do that well in sse: + * XXX: produces wrong results -- disabled. + */ +static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg tmp = aos_get_xmm_reg(cp); + struct x86_reg dst = get_xmm_writable(cp, arg0); + + sse_mulps(cp->func, dst, arg1); + /* Now the hard bit: sum the first 3 values: + */ + sse_movhlps(cp->func, tmp, dst); + sse_addss(cp->func, dst, tmp); /* a*x+c*z, b*y, ?, ? */ + emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z)); + sse_addss(cp->func, dst, tmp); + + aos_release_xmm_reg(cp, tmp.idx); + store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg tmp = aos_get_xmm_reg(cp); + struct x86_reg dst = get_xmm_writable(cp, arg0); + + sse_mulps(cp->func, dst, arg1); + + /* Now the hard bit: sum the values: + */ + sse_movhlps(cp->func, tmp, dst); + sse_addps(cp->func, dst, tmp); /* a*x+c*z, b*y+d*w, a*x+c*z, b*y+d*w */ + emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z)); + sse_addss(cp->func, dst, tmp); + + aos_release_xmm_reg(cp, tmp.idx); + store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg tmp = aos_get_xmm_reg(cp); + struct x86_reg dst = get_xmm_writable(cp, arg0); + + sse_mulps(cp->func, dst, arg1); + + /* Now the hard bit: sum the values (from DP3): + */ + sse_movhlps(cp->func, tmp, dst); + sse_addss(cp->func, dst, tmp); /* a*x+c*z, b*y, ?, ? */ + emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z)); + sse_addss(cp->func, dst, tmp); + emit_pshufd(cp, tmp, arg1, SHUF(W,W,W,W)); + sse_addss(cp->func, dst, tmp); + + aos_release_xmm_reg(cp, tmp.idx); + store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_DST( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg tmp = aos_get_xmm_reg(cp); + struct x86_reg ones = aos_get_internal(cp, IMM_ONES); + +/* dst[0] = 1.0 * 1.0F; */ +/* dst[1] = arg0[1] * arg1[1]; */ +/* dst[2] = arg0[2] * 1.0; */ +/* dst[3] = 1.0 * arg1[3]; */ + + emit_shuf_copy2(cp, dst, arg0, ones, SHUF(X,W,Z,Y)); + emit_shuf_copy2(cp, tmp, arg1, ones, SHUF(X,Z,Y,W)); + sse_mulps(cp->func, dst, tmp); + + aos_release_xmm_reg(cp, tmp.idx); + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + x87_fld1(cp->func); /* 1 */ + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 1 */ + x87_fyl2x(cp->func); /* log2(a0) */ + x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + return TRUE; +} + + +static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + x87_emit_ex2(cp); + x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + return TRUE; +} + + +static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + int i; + + set_fpu_round_neg_inf( cp ); + + /* Load all sources first to avoid aliasing + */ + for (i = 3; i >= 0; i--) { + if (writemask & (1<<i)) { + x87_fld_src(cp, &op->FullSrcRegisters[0], i); + } + } + + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) { + x87_fprndint( cp->func ); + x87_fstp(cp->func, x86_make_disp(dst, i*4)); + } + } + + return TRUE; +} + + +static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + int i; + + set_fpu_round_nearest( cp ); + + /* Load all sources first to avoid aliasing + */ + for (i = 3; i >= 0; i--) { + if (writemask & (1<<i)) { + x87_fld_src(cp, &op->FullSrcRegisters[0], i); + } + } + + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) { + x87_fprndint( cp->func ); + x87_fstp(cp->func, x86_make_disp(dst, i*4)); + } + } + + return TRUE; +} + + +static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + struct x86_reg st0 = x86_make_reg(file_x87, 0); + struct x86_reg st1 = x86_make_reg(file_x87, 1); + unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + int i; + + set_fpu_round_neg_inf( cp ); + + /* suck all the source values onto the stack before writing out any + * dst, which may alias... + */ + for (i = 3; i >= 0; i--) { + if (writemask & (1<<i)) { + x87_fld_src(cp, &op->FullSrcRegisters[0], i); + } + } + + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) { + x87_fld(cp->func, st0); /* a a */ + x87_fprndint( cp->func ); /* flr(a) a */ + x87_fsubp(cp->func, st1); /* frc(a) */ + x87_fstp(cp->func, x86_make_disp(dst, i*4)); + } + } + + return TRUE; +} + + + + + + +static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + unsigned lit_count = cp->lit_count++; + struct x86_reg result, arg0; + unsigned i; + +#if 1 + /* For absolute correctness, need to spill/invalidate all XMM regs + * too. + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } +#endif + + if (writemask != TGSI_WRITEMASK_XYZW) + result = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, tmp[0])); + else + result = get_dst_ptr(cp, &op->FullDstRegisters[0]); + + + arg0 = fetch_src( cp, &op->FullSrcRegisters[0] ); + if (arg0.file == file_XMM) { + struct x86_reg tmp = x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, tmp[1])); + sse_movaps( cp->func, tmp, arg0 ); + arg0 = tmp; + } + + + + /* Push caller-save (ie scratch) regs. + */ + x86_cdecl_caller_push_regs( cp->func ); + + /* Push the arguments: + */ + x86_push_imm32( cp->func, lit_count ); + + x86_lea( cp->func, ecx, arg0 ); + x86_push( cp->func, ecx ); + + x86_lea( cp->func, ecx, result ); + x86_push( cp->func, ecx ); + + x86_push( cp->func, cp->machine_EDX ); + + if (lit_count < MAX_LIT_INFO) { + x86_mov( cp->func, ecx, x86_make_disp( cp->machine_EDX, + Offset(struct aos_machine, lit_info) + + lit_count * sizeof(struct lit_info) + + Offset(struct lit_info, func))); + } + else { + x86_mov_reg_imm( cp->func, ecx, (int)aos_do_lit ); + } + + x86_call( cp->func, ecx ); + + x86_pop( cp->func, ecx ); /* fixme... */ + x86_pop( cp->func, ecx ); + x86_pop( cp->func, ecx ); + x86_pop( cp->func, ecx ); + + x86_cdecl_caller_pop_regs( cp->func ); + + if (writemask != TGSI_WRITEMASK_XYZW) { + store_dest( cp, + &op->FullDstRegisters[0], + get_xmm_writable( cp, result ) ); + } + + return TRUE; +} + +#if 0 +static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + + if (writemask & TGSI_WRITEMASK_YZ) { + struct x86_reg st1 = x86_make_reg(file_x87, 1); + struct x86_reg st2 = x86_make_reg(file_x87, 2); + + /* a1' = a1 <= 0 ? 1 : a1; + */ + x87_fldz(cp->func); /* 1 0 */ +#if 1 + x87_fld1(cp->func); /* 1 0 */ +#else + /* Correct but slow due to fp exceptions generated in fyl2x - fix me. + */ + x87_fldz(cp->func); /* 1 0 */ +#endif + x87_fld_src(cp, &op->FullSrcRegisters[0], 1); /* a1 1 0 */ + x87_fcomi(cp->func, st2); /* a1 1 0 */ + x87_fcmovb(cp->func, st1); /* a1' 1 0 */ + x87_fstp(cp->func, st1); /* a1' 0 */ + x87_fstp(cp->func, st1); /* a1' */ + + x87_fld_src(cp, &op->FullSrcRegisters[0], 3); /* a3 a1' */ + x87_fxch(cp->func, st1); /* a1' a3 */ + + + /* Compute pow(a1, a3) + */ + x87_fyl2x(cp->func); /* a3*log2(a1) */ + x87_emit_ex2( cp ); /* 2^(a3*log2(a1)) */ + + + /* a0' = max2(a0, 0): + */ + x87_fldz(cp->func); /* 0 r2 */ + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 0 r2 */ + x87_fcomi(cp->func, st1); + x87_fcmovb(cp->func, st1); /* a0' 0 r2 */ + + x87_fst_or_nop(cp->func, writemask, 1, dst); /* result[1] = a0' */ + + x87_fcomi(cp->func, st1); /* a0' 0 r2 */ + x87_fcmovnbe(cp->func, st2); /* r2' 0' r2 */ + + x87_fstp_or_pop(cp->func, writemask, 2, dst); /* 0 r2 */ + x87_fpop(cp->func); /* r2 */ + x87_fpop(cp->func); + } + + if (writemask & TGSI_WRITEMASK_XW) { + x87_fld1(cp->func); + x87_fst_or_nop(cp->func, writemask, 0, dst); + x87_fstp_or_pop(cp->func, writemask, 3, dst); + } + + return TRUE; +} +#endif + + + +static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = get_xmm_writable(cp, arg0); + + sse_maxps(cp->func, dst, arg1); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + + +static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = get_xmm_writable(cp, arg0); + + sse_minps(cp->func, dst, arg1); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg dst = get_xmm_writable(cp, arg0); + + /* potentially nothing to do */ + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = get_xmm_writable(cp, arg0); + + sse_mulps(cp->func, dst, arg1); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + + +static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg2 = fetch_src(cp, &op->FullSrcRegisters[2]); + + /* If we can't clobber old contents of arg0, get a temporary & copy + * it there, then clobber it... + */ + arg0 = get_xmm_writable(cp, arg0); + + sse_mulps(cp->func, arg0, arg1); + sse_addps(cp->func, arg0, arg2); + store_dest(cp, &op->FullDstRegisters[0], arg0); + return TRUE; +} + + + +/* A wrapper for powf(). + * Makes sure it is cdecl and operates on floats. + */ +static float PIPE_CDECL _powerf( float x, float y ) +{ +#if FAST_MATH + return util_fast_pow(x, y); +#else + return powf( x, y ); +#endif +} + +#if FAST_MATH +static float PIPE_CDECL _exp2(float x) +{ + return util_fast_exp2(x); +} +#endif + + +/* Really not sufficient -- need to check for conditions that could + * generate inf/nan values, which will slow things down hugely. + */ +static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ +#if 0 + x87_fld_src(cp, &op->FullSrcRegisters[1], 0); /* a1.x */ + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0.x a1.x */ + x87_fyl2x(cp->func); /* a1*log2(a0) */ + + x87_emit_ex2( cp ); /* 2^(a1*log2(a0)) */ + + x87_fstp_dest4(cp, &op->FullDstRegisters[0]); +#else + uint i; + + /* For absolute correctness, need to spill/invalidate all XMM regs + * too. + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } + + /* Push caller-save (ie scratch) regs. + */ + x86_cdecl_caller_push_regs( cp->func ); + + x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -8) ); + + x87_fld_src( cp, &op->FullSrcRegisters[1], 0 ); + x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 4 ) ); + x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); + x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) ); + + /* tmp_EAX has been pushed & will be restored below */ + x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _powerf ); + x86_call( cp->func, cp->tmp_EAX ); + + x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 8) ); + + x86_cdecl_caller_pop_regs( cp->func ); + + /* Note retval on x87 stack: + */ + cp->func->x87_stack++; + + x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); +#endif + return TRUE; +} + + +#if FAST_MATH +static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + uint i; + + /* For absolute correctness, need to spill/invalidate all XMM regs + * too. + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } + + /* Push caller-save (ie scratch) regs. + */ + x86_cdecl_caller_push_regs( cp->func ); + + x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -4) ); + + x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); + x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) ); + + /* tmp_EAX has been pushed & will be restored below */ + x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _exp2 ); + x86_call( cp->func, cp->tmp_EAX ); + + x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 4) ); + + x86_cdecl_caller_pop_regs( cp->func ); + + /* Note retval on x87 stack: + */ + cp->func->x87_stack++; + + x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); + + return TRUE; +} +#endif + + +static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg dst = aos_get_xmm_reg(cp); + + if (cp->have_sse2) { + sse2_rcpss(cp->func, dst, arg0); + /* extend precision here... + */ + } + else { + struct x86_reg ones = aos_get_internal(cp, IMM_ONES); + sse_movss(cp->func, dst, ones); + sse_divss(cp->func, dst, arg0); + } + + store_scalar_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + + +/* Although rsqrtps() and rcpps() are low precision on some/all SSE + * implementations, it is possible to improve its precision at + * fairly low cost, using a newton/raphson step, as below: + * + * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a) + * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)] + * or: + * x1 = rsqrtps(a) * [1.5 - .5 * a * rsqrtps(a) * rsqrtps(a)] + * + * + * See: http://softwarecommunity.intel.com/articles/eng/1818.htm + */ +static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + + if (0) { + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg r = aos_get_xmm_reg(cp); + sse_rsqrtss(cp->func, r, arg0); + store_scalar_dest(cp, &op->FullDstRegisters[0], r); + return TRUE; + } + else { + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg r = aos_get_xmm_reg(cp); + + struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ ); + struct x86_reg one_point_five = x86_make_disp( neg_half, 4 ); + struct x86_reg src = get_xmm_writable( cp, arg0 ); + + sse_rsqrtss( cp->func, r, src ); /* rsqrtss(a) */ + sse_mulss( cp->func, src, neg_half ); /* -.5 * a */ + sse_mulss( cp->func, src, r ); /* -.5 * a * r */ + sse_mulss( cp->func, src, r ); /* -.5 * a * r * r */ + sse_addss( cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */ + sse_mulss( cp->func, r, src ); /* r * (1.5 - .5 * a * r * r) */ + + store_scalar_dest(cp, &op->FullDstRegisters[0], r); + return TRUE; + } +} + + +static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg ones = aos_get_internal(cp, IMM_ONES); + struct x86_reg dst = get_xmm_writable(cp, arg0); + + sse_cmpps(cp->func, dst, arg1, cc_NotLessThan); + sse_andps(cp->func, dst, ones); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_SIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + x87_fsin(cp->func); + x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + return TRUE; +} + + + +static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg ones = aos_get_internal(cp, IMM_ONES); + struct x86_reg dst = get_xmm_writable(cp, arg0); + + sse_cmpps(cp->func, dst, arg1, cc_LessThan); + sse_andps(cp->func, dst, ones); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = get_xmm_writable(cp, arg0); + + sse_subps(cp->func, dst, arg1); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + + +static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg tmp0 = aos_get_xmm_reg(cp); + struct x86_reg tmp1 = aos_get_xmm_reg(cp); + + emit_pshufd(cp, tmp1, arg1, SHUF(Y, Z, X, W)); + sse_mulps(cp->func, tmp1, arg0); + emit_pshufd(cp, tmp0, arg0, SHUF(Y, Z, X, W)); + sse_mulps(cp->func, tmp0, arg1); + sse_subps(cp->func, tmp1, tmp0); + sse_shufps(cp->func, tmp1, tmp1, SHUF(Y, Z, X, W)); + +/* dst[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0]; */ +/* dst[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1]; */ +/* dst[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2]; */ +/* dst[3] is undef */ + + + aos_release_xmm_reg(cp, tmp0.idx); + store_dest(cp, &op->FullDstRegisters[0], tmp1); + return TRUE; +} + + + +static boolean +emit_instruction( struct aos_compilation *cp, + struct tgsi_full_instruction *inst ) +{ + x87_assert_stack_empty(cp->func); + + switch( inst->Instruction.Opcode ) { + case TGSI_OPCODE_MOV: + return emit_MOV( cp, inst ); + + case TGSI_OPCODE_LIT: + return emit_LIT(cp, inst); + + case TGSI_OPCODE_RCP: + return emit_RCP(cp, inst); + + case TGSI_OPCODE_RSQ: + return emit_RSQ(cp, inst); + + case TGSI_OPCODE_EXP: + /*return emit_EXP(cp, inst);*/ + return FALSE; + + case TGSI_OPCODE_LOG: + /*return emit_LOG(cp, inst);*/ + return FALSE; + + case TGSI_OPCODE_MUL: + return emit_MUL(cp, inst); + + case TGSI_OPCODE_ADD: + return emit_ADD(cp, inst); + + case TGSI_OPCODE_DP3: + return emit_DP3(cp, inst); + + case TGSI_OPCODE_DP4: + return emit_DP4(cp, inst); + + case TGSI_OPCODE_DST: + return emit_DST(cp, inst); + + case TGSI_OPCODE_MIN: + return emit_MIN(cp, inst); + + case TGSI_OPCODE_MAX: + return emit_MAX(cp, inst); + + case TGSI_OPCODE_SLT: + return emit_SLT(cp, inst); + + case TGSI_OPCODE_SGE: + return emit_SGE(cp, inst); + + case TGSI_OPCODE_MAD: + return emit_MAD(cp, inst); + + case TGSI_OPCODE_SUB: + return emit_SUB(cp, inst); + + case TGSI_OPCODE_LERP: +// return emit_LERP(cp, inst); + return FALSE; + + case TGSI_OPCODE_FRAC: + return emit_FRC(cp, inst); + + case TGSI_OPCODE_CLAMP: +// return emit_CLAMP(cp, inst); + return FALSE; + + case TGSI_OPCODE_FLOOR: + return emit_FLR(cp, inst); + + case TGSI_OPCODE_ROUND: + return emit_RND(cp, inst); + + case TGSI_OPCODE_EXPBASE2: +#if FAST_MATH + return emit_EXPBASE2(cp, inst); +#elif 0 + /* this seems to fail for "larger" exponents. + * See glean tvertProg1's EX2 test. + */ + return emit_EX2(cp, inst); +#else + return FALSE; +#endif + + case TGSI_OPCODE_LOGBASE2: + return emit_LG2(cp, inst); + + case TGSI_OPCODE_POWER: + return emit_POW(cp, inst); + + case TGSI_OPCODE_CROSSPRODUCT: + return emit_XPD(cp, inst); + + case TGSI_OPCODE_ABS: + return emit_ABS(cp, inst); + + case TGSI_OPCODE_DPH: + return emit_DPH(cp, inst); + + case TGSI_OPCODE_COS: + return emit_COS(cp, inst); + + case TGSI_OPCODE_SIN: + return emit_SIN(cp, inst); + + case TGSI_OPCODE_END: + return TRUE; + + default: + return FALSE; + } +} + + +static boolean emit_viewport( struct aos_compilation *cp ) +{ + struct x86_reg pos = aos_get_shader_reg_xmm(cp, + TGSI_FILE_OUTPUT, + cp->vaos->draw->vs.position_output ); + + struct x86_reg scale = x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, scale)); + + struct x86_reg translate = x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, translate)); + + sse_mulps(cp->func, pos, scale); + sse_addps(cp->func, pos, translate); + + aos_adopt_xmm_reg( cp, + pos, + TGSI_FILE_OUTPUT, + cp->vaos->draw->vs.position_output, + TRUE ); + return TRUE; +} + + +/* This is useful to be able to see the results on softpipe. Doesn't + * do proper clipping, just assumes the backend can do it during + * rasterization -- for debug only... + */ +static boolean emit_rhw_viewport( struct aos_compilation *cp ) +{ + struct x86_reg tmp = aos_get_xmm_reg(cp); + struct x86_reg pos = aos_get_shader_reg_xmm(cp, + TGSI_FILE_OUTPUT, + cp->vaos->draw->vs.position_output); + + struct x86_reg scale = x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, scale)); + + struct x86_reg translate = x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, translate)); + + + + emit_pshufd(cp, tmp, pos, SHUF(W, W, W, W)); + sse2_rcpss(cp->func, tmp, tmp); + sse_shufps(cp->func, tmp, tmp, SHUF(X, X, X, X)); + + sse_mulps(cp->func, pos, scale); + sse_mulps(cp->func, pos, tmp); + sse_addps(cp->func, pos, translate); + + /* Set pos[3] = w + */ + mask_write(cp, pos, tmp, TGSI_WRITEMASK_W); + + aos_adopt_xmm_reg( cp, + pos, + TGSI_FILE_OUTPUT, + cp->vaos->draw->vs.position_output, + TRUE ); + return TRUE; +} + + +#if 0 +static boolean note_immediate( struct aos_compilation *cp, + struct tgsi_full_immediate *imm ) +{ + unsigned pos = cp->num_immediates++; + unsigned j; + + for (j = 0; j < imm->Immediate.Size; j++) { + cp->vaos->machine->immediate[pos][j] = imm->u.ImmediateFloat32[j].Float; + } + + return TRUE; +} +#endif + + + + +static void find_last_write_outputs( struct aos_compilation *cp ) +{ + struct tgsi_parse_context parse; + unsigned this_instruction = 0; + unsigned i; + + tgsi_parse_init( &parse, cp->vaos->base.vs->state.tokens ); + + while (!tgsi_parse_end_of_tokens( &parse )) { + + tgsi_parse_token( &parse ); + + if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) + continue; + + for (i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++) { + if (parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.File == + TGSI_FILE_OUTPUT) + { + unsigned idx = parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.Index; + cp->output_last_write[idx] = this_instruction; + } + } + + this_instruction++; + } + + tgsi_parse_free( &parse ); +} + + +#define ARG_MACHINE 1 +#define ARG_START_ELTS 2 +#define ARG_COUNT 3 +#define ARG_OUTBUF 4 + + +static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, + boolean linear ) +{ + struct tgsi_parse_context parse; + struct aos_compilation cp; + unsigned fixup, label; + + util_init_math(); + + tgsi_parse_init( &parse, varient->base.vs->state.tokens ); + + memset(&cp, 0, sizeof(cp)); + + cp.insn_counter = 1; + cp.vaos = varient; + cp.have_sse2 = 1; + cp.func = &varient->func[ linear ? 0 : 1 ]; + + cp.tmp_EAX = x86_make_reg(file_REG32, reg_AX); + cp.idx_EBX = x86_make_reg(file_REG32, reg_BX); + cp.outbuf_ECX = x86_make_reg(file_REG32, reg_CX); + cp.machine_EDX = x86_make_reg(file_REG32, reg_DX); + cp.count_ESI = x86_make_reg(file_REG32, reg_SI); + cp.temp_EBP = x86_make_reg(file_REG32, reg_BP); + cp.stack_ESP = x86_make_reg( file_REG32, reg_SP ); + + x86_init_func(cp.func); + + find_last_write_outputs(&cp); + + x86_push(cp.func, cp.idx_EBX); + x86_push(cp.func, cp.count_ESI); + x86_push(cp.func, cp.temp_EBP); + + + /* Load arguments into regs: + */ + x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_MACHINE)); + x86_mov(cp.func, cp.idx_EBX, x86_fn_arg(cp.func, ARG_START_ELTS)); + x86_mov(cp.func, cp.count_ESI, x86_fn_arg(cp.func, ARG_COUNT)); + x86_mov(cp.func, cp.outbuf_ECX, x86_fn_arg(cp.func, ARG_OUTBUF)); + + + /* Compare count to zero and possibly bail. + */ + x86_xor(cp.func, cp.tmp_EAX, cp.tmp_EAX); + x86_cmp(cp.func, cp.count_ESI, cp.tmp_EAX); + fixup = x86_jcc_forward(cp.func, cc_E); + + + save_fpu_state( &cp ); + set_fpu_round_nearest( &cp ); + + aos_init_inputs( &cp, linear ); + + cp.x86_reg[0] = 0; + cp.x86_reg[1] = 0; + + /* Note address for loop jump + */ + label = x86_get_label(cp.func); + { + /* Fetch inputs... TODO: fetch lazily... + */ + if (!aos_fetch_inputs( &cp, linear )) + goto fail; + + /* Emit the shader: + */ + while( !tgsi_parse_end_of_tokens( &parse ) && !cp.error ) + { + tgsi_parse_token( &parse ); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: +#if 0 + if (!note_immediate( &cp, &parse.FullToken.FullImmediate )) + goto fail; +#endif + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (DISASSEM) + tgsi_dump_instruction( &parse.FullToken.FullInstruction, cp.insn_counter ); + + if (!emit_instruction( &cp, &parse.FullToken.FullInstruction )) + goto fail; + break; + } + + x87_assert_stack_empty(cp.func); + cp.insn_counter++; + + if (DISASSEM) + debug_printf("\n"); + } + + + { + unsigned i; + for (i = 0; i < 8; i++) { + if (cp.xmm[i].file != TGSI_FILE_OUTPUT) { + cp.xmm[i].file = TGSI_FILE_NULL; + cp.xmm[i].dirty = 0; + } + } + } + + if (cp.error) + goto fail; + + if (cp.vaos->base.key.clip) { + /* not really handling clipping, just do the rhw so we can + * see the results... + */ + emit_rhw_viewport(&cp); + } + else if (cp.vaos->base.key.viewport) { + emit_viewport(&cp); + } + + /* Emit output... TODO: do this eagerly after the last write to a + * given output. + */ + if (!aos_emit_outputs( &cp )) + goto fail; + + + /* Next vertex: + */ + x86_lea(cp.func, + cp.outbuf_ECX, + x86_make_disp(cp.outbuf_ECX, + cp.vaos->base.key.output_stride)); + + /* Incr index + */ + aos_incr_inputs( &cp, linear ); + } + /* decr count, loop if not zero + */ + x86_dec(cp.func, cp.count_ESI); + x86_jcc(cp.func, cc_NZ, label); + + restore_fpu_state(&cp); + + /* Land forward jump here: + */ + x86_fixup_fwd_jump(cp.func, fixup); + + /* Exit mmx state? + */ + if (cp.func->need_emms) + mmx_emms(cp.func); + + x86_pop(cp.func, cp.temp_EBP); + x86_pop(cp.func, cp.count_ESI); + x86_pop(cp.func, cp.idx_EBX); + + x87_assert_stack_empty(cp.func); + x86_ret(cp.func); + + tgsi_parse_free( &parse ); + return !cp.error; + + fail: + tgsi_parse_free( &parse ); + return FALSE; +} + + + +static void vaos_set_buffer( struct draw_vs_varient *varient, + unsigned buf, + const void *ptr, + unsigned stride ) +{ + struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + + if (buf < vaos->nr_vb) { + vaos->buffer[buf].base_ptr = (char *)ptr; + vaos->buffer[buf].stride = stride; + } + + if (0) debug_printf("%s %d/%d: %p %d\n", __FUNCTION__, buf, vaos->nr_vb, ptr, stride); +} + + + +static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, + const unsigned *elts, + unsigned count, + void *output_buffer ) +{ + struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct aos_machine *machine = vaos->draw->vs.aos_machine; + + if (0) debug_printf("%s %d\n", __FUNCTION__, count); + + machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; + machine->constants = vaos->draw->vs.aligned_constants; + machine->immediates = vaos->base.vs->immediates; + machine->buffer = vaos->buffer; + + vaos->gen_run_elts( machine, + elts, + count, + output_buffer ); +} + +static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, + unsigned start, + unsigned count, + void *output_buffer ) +{ + struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct aos_machine *machine = vaos->draw->vs.aos_machine; + + if (0) debug_printf("%s %d %d const: %x\n", __FUNCTION__, start, count, + vaos->base.key.const_vbuffers); + + machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; + machine->constants = vaos->draw->vs.aligned_constants; + machine->immediates = vaos->base.vs->immediates; + machine->buffer = vaos->buffer; + + vaos->gen_run_linear( machine, + start, + count, + output_buffer ); + + /* Sanity spot checks to make sure we didn't trash our constants */ + assert(machine->internal[IMM_ONES][0] == 1.0f); + assert(machine->internal[IMM_IDENTITY][0] == 0.0f); + assert(machine->internal[IMM_NEGS][0] == -1.0f); +} + + + +static void vaos_destroy( struct draw_vs_varient *varient ) +{ + struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + + FREE( vaos->buffer ); + + x86_release_func( &vaos->func[0] ); + x86_release_func( &vaos->func[1] ); + + FREE(vaos); +} + + + +static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) +{ + unsigned i; + struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse); + + if (!vaos) + goto fail; + + vaos->base.key = *key; + vaos->base.vs = vs; + vaos->base.set_buffer = vaos_set_buffer; + vaos->base.destroy = vaos_destroy; + vaos->base.run_linear = vaos_run_linear; + vaos->base.run_elts = vaos_run_elts; + + vaos->draw = vs->draw; + + for (i = 0; i < key->nr_inputs; i++) + vaos->nr_vb = MAX2( vaos->nr_vb, key->element[i].in.buffer + 1 ); + + vaos->buffer = MALLOC( vaos->nr_vb * sizeof(vaos->buffer[0]) ); + if (!vaos->buffer) + goto fail; + + debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers); + +#if 0 + tgsi_dump(vs->state.tokens, 0); +#endif + + if (!build_vertex_program( vaos, TRUE )) + goto fail; + + if (!build_vertex_program( vaos, FALSE )) + goto fail; + + vaos->gen_run_linear = (vaos_run_linear_func)x86_get_func(&vaos->func[0]); + if (!vaos->gen_run_linear) + goto fail; + + vaos->gen_run_elts = (vaos_run_elts_func)x86_get_func(&vaos->func[1]); + if (!vaos->gen_run_elts) + goto fail; + + return &vaos->base; + + fail: + if (vaos && vaos->buffer) + FREE(vaos->buffer); + + if (vaos) + x86_release_func( &vaos->func[0] ); + + if (vaos) + x86_release_func( &vaos->func[1] ); + + FREE(vaos); + + return NULL; +} + + +struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) +{ + struct draw_vs_varient *varient = varient_aos_sse( vs, key ); + + if (varient == NULL) { + varient = draw_vs_varient_generic( vs, key ); + } + + return varient; +} + + + +#endif /* PIPE_ARCH_X86 */ diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h new file mode 100644 index 0000000000..264387517b --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -0,0 +1,253 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef DRAW_VS_AOS_H +#define DRAW_VS_AOS_H + +#include "pipe/p_config.h" + +#ifdef PIPE_ARCH_X86 + +struct tgsi_token; +struct x86_function; + +#include "pipe/p_state.h" +#include "rtasm/rtasm_x86sse.h" + + + + + +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 + +#define MAX_INPUTS PIPE_MAX_ATTRIBS +#define MAX_OUTPUTS PIPE_MAX_SHADER_OUTPUTS +#define MAX_TEMPS TGSI_EXEC_NUM_TEMPS +#define MAX_CONSTANTS 1024 /** only used for sanity checking */ +#define MAX_IMMEDIATES 1024 /** only used for sanity checking */ +#define MAX_INTERNALS 8 /** see IMM_x values below */ + +#define AOS_FILE_INTERNAL TGSI_FILE_COUNT + +#define FPU_RND_NEG 1 +#define FPU_RND_NEAREST 2 + +struct aos_machine; +typedef void (PIPE_CDECL *lit_func)( struct aos_machine *, + float *result, + const float *in, + unsigned count ); + +void PIPE_CDECL aos_do_lit( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ); + +struct shine_tab { + float exponent; + float values[258]; + unsigned last_used; +}; + +struct lit_info { + lit_func func; + struct shine_tab *shine_tab; +}; + +#define MAX_SHINE_TAB 4 +#define MAX_LIT_INFO 16 + +struct aos_buffer { + const void *base_ptr; + unsigned stride; + void *ptr; /* updated per vertex */ +}; + + + + +/* This is the temporary storage used by all the aos_sse vs varients. + * Create one per context and reuse by passing a pointer in at + * vs_varient creation?? + */ +struct aos_machine { + float input [MAX_INPUTS ][4]; + float output [MAX_OUTPUTS ][4]; + float temp [MAX_TEMPS ][4]; + float internal [MAX_INTERNALS ][4]; + + float scale[4]; /* viewport */ + float translate[4]; /* viewport */ + + float tmp[2][4]; /* scratch space for LIT */ + + struct shine_tab shine_tab[MAX_SHINE_TAB]; + struct lit_info lit_info[MAX_LIT_INFO]; + unsigned now; + + + ushort fpu_rnd_nearest; + ushort fpu_rnd_neg_inf; + ushort fpu_restore; + ushort fpucntl; /* one of FPU_* above */ + + const float (*immediates)[4]; /* points to shader data */ + const float (*constants)[4]; /* points to draw data */ + + const struct aos_buffer *buffer; /* points to ? */ +}; + + + + +struct aos_compilation { + struct x86_function *func; + struct draw_vs_varient_aos_sse *vaos; + + unsigned insn_counter; + unsigned num_immediates; + unsigned count; + unsigned lit_count; + + struct { + unsigned idx:16; + unsigned file:8; + unsigned dirty:8; + unsigned last_used; + } xmm[8]; + + unsigned x86_reg[2]; /* one of X86_* */ + + boolean input_fetched[PIPE_MAX_ATTRIBS]; + unsigned output_last_write[PIPE_MAX_ATTRIBS]; + + boolean have_sse2; + boolean error; + short fpucntl; + + /* these are actually known values, but putting them in a struct + * like this is helpful to keep them in sync across the file. + */ + struct x86_reg tmp_EAX; + struct x86_reg idx_EBX; /* either start+i or &elt[i] */ + struct x86_reg outbuf_ECX; + struct x86_reg machine_EDX; + struct x86_reg count_ESI; /* decrements to zero */ + struct x86_reg temp_EBP; + struct x86_reg stack_ESP; +}; + +struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp ); +void aos_release_xmm_reg( struct aos_compilation *cp, unsigned idx ); + +void aos_adopt_xmm_reg( struct aos_compilation *cp, + struct x86_reg reg, + unsigned file, + unsigned idx, + unsigned dirty ); + +void aos_spill_all( struct aos_compilation *cp ); + +struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, + unsigned file, + unsigned idx ); + +boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ); +boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ); +boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ); + +boolean aos_emit_outputs( struct aos_compilation *cp ); + + +#define IMM_ONES 0 /* 1, 1,1,1 */ +#define IMM_SWZ 1 /* 1,-1,0, 0xffffffff */ +#define IMM_IDENTITY 2 /* 0, 0,0,1 */ +#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */ +#define IMM_255 4 /* 255, 255, 255, 255 */ +#define IMM_NEGS 5 /* -1,-1,-1,-1 */ +#define IMM_RSQ 6 /* -.5,1.5,_,_ */ +#define IMM_PSIZE 7 /* not really an immediate - updated each run */ + +struct x86_reg aos_get_internal( struct aos_compilation *cp, + unsigned imm ); +struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp, + unsigned imm ); + + +#define ERROR(cp, msg) \ +do { \ + if (0) debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \ + cp->error = 1; \ +} while (0) + + +#define X86_NULL 0 +#define X86_IMMEDIATES 1 +#define X86_CONSTANTS 2 +#define X86_BUFFERS 3 + +struct x86_reg aos_get_x86( struct aos_compilation *cp, + unsigned which_reg, + unsigned value ); + + +typedef void (PIPE_CDECL *vaos_run_elts_func)( struct aos_machine *, + const unsigned *elts, + unsigned count, + void *output_buffer); + +typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *, + unsigned start, + unsigned count, + void *output_buffer); + + +struct draw_vs_varient_aos_sse { + struct draw_vs_varient base; + struct draw_context *draw; + + struct aos_buffer *buffer; + unsigned nr_vb; + + vaos_run_linear_func gen_run_linear; + vaos_run_elts_func gen_run_elts; + + + struct x86_function func[2]; +}; + + +#endif + +#endif + diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c new file mode 100644 index 0000000000..39f75b50b7 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -0,0 +1,462 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" +#include "draw_vs.h" +#include "draw_vs_aos.h" +#include "draw_vertex.h" + +#include "rtasm/rtasm_x86sse.h" + +#ifdef PIPE_ARCH_X86 + +/* Note - don't yet have to worry about interacting with the code in + * draw_vs_aos.c as there is no intermingling of generated code... + * That may have to change, we'll see. + */ +static void emit_load_R32G32B32A32( struct aos_compilation *cp, + struct x86_reg data, + struct x86_reg src_ptr ) +{ + sse_movups(cp->func, data, src_ptr); +} + +static void emit_load_R32G32B32( struct aos_compilation *cp, + struct x86_reg data, + struct x86_reg src_ptr ) +{ +#if 1 + sse_movss(cp->func, data, x86_make_disp(src_ptr, 8)); + /* data = z ? ? ? */ + sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) ); + /* data = z ? 0 1 */ + sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) ); + /* data = ? 0 z 1 */ + sse_movlps(cp->func, data, src_ptr); + /* data = x y z 1 */ +#else + sse_movups(cp->func, data, src_ptr); + /* data = x y z ? */ + sse2_pshufd(cp->func, data, data, SHUF(W,X,Y,Z) ); + /* data = ? x y z */ + sse_movss(cp->func, data, aos_get_internal_xmm( cp, IMM_ONES ) ); + /* data = 1 x y z */ + sse2_pshufd(cp->func, data, data, SHUF(Y,Z,W,X) ); + /* data = x y z 1 */ +#endif +} + +static void emit_load_R32G32( struct aos_compilation *cp, + struct x86_reg data, + struct x86_reg src_ptr ) +{ + sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) ); + sse_movlps(cp->func, data, src_ptr); +} + + +static void emit_load_R32( struct aos_compilation *cp, + struct x86_reg data, + struct x86_reg src_ptr ) +{ + sse_movss(cp->func, data, src_ptr); + sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) ); +} + + +static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp, + struct x86_reg data, + struct x86_reg src_ptr ) +{ + sse_movss(cp->func, data, src_ptr); + sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY )); + sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY )); + sse2_cvtdq2ps(cp->func, data, data); + sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255)); +} + + + +/* Extended swizzles? Maybe later. + */ +static void emit_swizzle( struct aos_compilation *cp, + struct x86_reg dest, + struct x86_reg src, + ubyte shuffle ) +{ + sse_shufps(cp->func, dest, src, shuffle); +} + + + +static boolean get_buffer_ptr( struct aos_compilation *cp, + boolean linear, + unsigned buf_idx, + struct x86_reg elt, + struct x86_reg ptr) +{ + struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), + buf_idx * sizeof(struct aos_buffer)); + + struct x86_reg buf_stride = x86_make_disp(buf, + Offset(struct aos_buffer, stride)); + if (linear) { + struct x86_reg buf_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, ptr)); + + + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_ptr); + x86_mov(cp->func, elt, buf_stride); + x86_add(cp->func, elt, ptr); + if (buf_idx == 0) sse_prefetchnta(cp->func, x86_make_disp(elt, 192)); + x86_mov(cp->func, buf_ptr, elt); + } + else { + struct x86_reg buf_base_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, base_ptr)); + + + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_stride); + x86_imul(cp->func, ptr, elt); + x86_add(cp->func, ptr, buf_base_ptr); + } + + cp->insn_counter++; + + return TRUE; +} + + +static boolean load_input( struct aos_compilation *cp, + unsigned idx, + struct x86_reg bufptr ) +{ + unsigned format = cp->vaos->base.key.element[idx].in.format; + unsigned offset = cp->vaos->base.key.element[idx].in.offset; + struct x86_reg dataXMM = aos_get_xmm_reg(cp); + + /* Figure out source pointer address: + */ + struct x86_reg src = x86_make_disp(bufptr, offset); + + aos_adopt_xmm_reg( cp, + dataXMM, + TGSI_FILE_INPUT, + idx, + TRUE ); + + switch (format) { + case PIPE_FORMAT_R32_FLOAT: + emit_load_R32(cp, dataXMM, src); + break; + case PIPE_FORMAT_R32G32_FLOAT: + emit_load_R32G32(cp, dataXMM, src); + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + emit_load_R32G32B32(cp, dataXMM, src); + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + emit_load_R32G32B32A32(cp, dataXMM, src); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + emit_load_R8G8B8A8_UNORM(cp, dataXMM, src); + emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + emit_load_R8G8B8A8_UNORM(cp, dataXMM, src); + break; + default: + ERROR(cp, "unhandled input format"); + return FALSE; + } + + return TRUE; +} + +static boolean load_inputs( struct aos_compilation *cp, + unsigned buffer, + struct x86_reg ptr ) +{ + unsigned i; + + for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) { + if (cp->vaos->base.key.element[i].in.buffer == buffer) { + + if (!load_input( cp, i, ptr )) + return FALSE; + + cp->insn_counter++; + } + } + + return TRUE; +} + +boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ) +{ + unsigned i; + for (i = 0; i < cp->vaos->nr_vb; i++) { + struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), + i * sizeof(struct aos_buffer)); + + struct x86_reg buf_base_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, base_ptr)); + + if (cp->vaos->base.key.const_vbuffers & (1<<i)) { + struct x86_reg ptr = cp->tmp_EAX; + + x86_mov(cp->func, ptr, buf_base_ptr); + + /* Load all inputs for this constant vertex buffer + */ + load_inputs( cp, i, x86_deref(ptr) ); + + /* Then just force them out to aos_machine.input[] + */ + aos_spill_all( cp ); + + } + else if (linear) { + + struct x86_reg elt = cp->idx_EBX; + struct x86_reg ptr = cp->tmp_EAX; + + struct x86_reg buf_stride = x86_make_disp(buf, + Offset(struct aos_buffer, stride)); + + struct x86_reg buf_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, ptr)); + + + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_stride); + x86_imul(cp->func, ptr, elt); + x86_add(cp->func, ptr, buf_base_ptr); + + + /* In the linear case, keep the buffer pointer instead of the + * index number. + */ + if (cp->vaos->nr_vb == 1) + x86_mov( cp->func, elt, ptr ); + else + x86_mov( cp->func, buf_ptr, ptr ); + + cp->insn_counter++; + } + } + + return TRUE; +} + +boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) +{ + unsigned j; + + for (j = 0; j < cp->vaos->nr_vb; j++) { + if (cp->vaos->base.key.const_vbuffers & (1<<j)) { + /* just retreive pre-transformed input */ + } + else if (linear && cp->vaos->nr_vb == 1) { + load_inputs( cp, 0, cp->idx_EBX ); + } + else { + struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX); + struct x86_reg ptr = cp->tmp_EAX; + + if (!get_buffer_ptr( cp, linear, j, elt, ptr )) + return FALSE; + + if (!load_inputs( cp, j, ptr )) + return FALSE; + } + } + + return TRUE; +} + +boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ) +{ + if (linear && cp->vaos->nr_vb == 1) { + struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), + (0 * sizeof(struct aos_buffer) + + Offset(struct aos_buffer, stride))); + + x86_add(cp->func, cp->idx_EBX, stride); + sse_prefetchnta(cp->func, x86_make_disp(cp->idx_EBX, 192)); + } + else if (linear) { + /* Nothing to do */ + } + else { + x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4)); + } + + return TRUE; +} + + + + + + +static void emit_store_R32G32B32A32( struct aos_compilation *cp, + struct x86_reg dst_ptr, + struct x86_reg dataXMM ) +{ + sse_movups(cp->func, dst_ptr, dataXMM); +} + +static void emit_store_R32G32B32( struct aos_compilation *cp, + struct x86_reg dst_ptr, + struct x86_reg dataXMM ) +{ + sse_movlps(cp->func, dst_ptr, dataXMM); + sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ + sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM); +} + +static void emit_store_R32G32( struct aos_compilation *cp, + struct x86_reg dst_ptr, + struct x86_reg dataXMM ) +{ + sse_movlps(cp->func, dst_ptr, dataXMM); +} + +static void emit_store_R32( struct aos_compilation *cp, + struct x86_reg dst_ptr, + struct x86_reg dataXMM ) +{ + sse_movss(cp->func, dst_ptr, dataXMM); +} + + + +static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp, + struct x86_reg dst_ptr, + struct x86_reg dataXMM ) +{ + sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255)); + sse2_cvtps2dq(cp->func, dataXMM, dataXMM); + sse2_packssdw(cp->func, dataXMM, dataXMM); + sse2_packuswb(cp->func, dataXMM, dataXMM); + sse_movss(cp->func, dst_ptr, dataXMM); +} + + + + + +static boolean emit_output( struct aos_compilation *cp, + struct x86_reg ptr, + struct x86_reg dataXMM, + unsigned format ) +{ + switch (format) { + case EMIT_1F: + case EMIT_1F_PSIZE: + emit_store_R32(cp, ptr, dataXMM); + break; + case EMIT_2F: + emit_store_R32G32(cp, ptr, dataXMM); + break; + case EMIT_3F: + emit_store_R32G32B32(cp, ptr, dataXMM); + break; + case EMIT_4F: + emit_store_R32G32B32A32(cp, ptr, dataXMM); + break; + case EMIT_4UB: + if (1) { + emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); + emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); + } + else { + emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); + } + break; + default: + ERROR(cp, "unhandled output format"); + return FALSE; + } + + return TRUE; +} + + + +boolean aos_emit_outputs( struct aos_compilation *cp ) +{ + unsigned i; + + for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) { + unsigned format = cp->vaos->base.key.element[i].out.format; + unsigned offset = cp->vaos->base.key.element[i].out.offset; + unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output; + + struct x86_reg data; + + if (format == EMIT_1F_PSIZE) { + data = aos_get_internal_xmm( cp, IMM_PSIZE ); + } + else { + data = aos_get_shader_reg( cp, + TGSI_FILE_OUTPUT, + vs_output ); + } + + if (data.file != file_XMM) { + struct x86_reg tmp = aos_get_xmm_reg( cp ); + sse_movaps(cp->func, tmp, data); + data = tmp; + } + + if (!emit_output( cp, + x86_make_disp( cp->outbuf_ECX, offset ), + data, + format )) + return FALSE; + + aos_release_xmm_reg( cp, data.idx ); + + cp->insn_counter++; + } + + return TRUE; +} + +#endif diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c new file mode 100644 index 0000000000..b358bd2df4 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -0,0 +1,324 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_config.h" + + +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" +#include "draw_vs.h" +#include "draw_vs_aos.h" +#include "draw_vertex.h" + +#ifdef PIPE_ARCH_X86 + +#include "rtasm/rtasm_x86sse.h" + + +#define X87_CW_EXCEPTION_INV_OP (1<<0) +#define X87_CW_EXCEPTION_DENORM_OP (1<<1) +#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2) +#define X87_CW_EXCEPTION_OVERFLOW (1<<3) +#define X87_CW_EXCEPTION_UNDERFLOW (1<<4) +#define X87_CW_EXCEPTION_PRECISION (1<<5) +#define X87_CW_PRECISION_SINGLE (0<<8) +#define X87_CW_PRECISION_RESERVED (1<<8) +#define X87_CW_PRECISION_DOUBLE (2<<8) +#define X87_CW_PRECISION_DOUBLE_EXT (3<<8) +#define X87_CW_PRECISION_MASK (3<<8) +#define X87_CW_ROUND_NEAREST (0<<10) +#define X87_CW_ROUND_DOWN (1<<10) +#define X87_CW_ROUND_UP (2<<10) +#define X87_CW_ROUND_ZERO (3<<10) +#define X87_CW_ROUND_MASK (3<<10) +#define X87_CW_INFINITY (1<<12) + + +void PIPE_CDECL aos_do_lit( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + if (in[0] > 0) + { + if (in[1] <= 0.0) + { + result[0] = 1.0F; + result[1] = in[0]; + result[2] = 1.0; + result[3] = 1.0F; + } + else + { + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon)); + result[0] = 1.0F; + result[1] = in[0]; + result[2] = powf(in[1], exponent); + result[3] = 1.0; + } + } + else + { + result[0] = 1.0F; + result[1] = 0.0; + result[2] = 0.0; + result[3] = 1.0F; + } +} + + +static void PIPE_CDECL do_lit_lut( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + if (in[0] > 0) + { + if (in[1] <= 0.0) + { + result[0] = 1.0F; + result[1] = in[0]; + result[2] = 1.0; + result[3] = 1.0F; + return; + } + + if (machine->lit_info[count].shine_tab->exponent != in[3]) { + machine->lit_info[count].func = aos_do_lit; + goto no_luck; + } + + if (in[1] <= 1.0) + { + const float *tab = machine->lit_info[count].shine_tab->values; + float f = in[1] * 256; + int k = (int)f; + float frac = f - (float)k; + + result[0] = 1.0F; + result[1] = in[0]; + result[2] = tab[k] + frac*(tab[k+1]-tab[k]); + result[3] = 1.0; + return; + } + + no_luck: + { + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon)); + result[0] = 1.0F; + result[1] = in[0]; + result[2] = powf(in[1], exponent); + result[3] = 1.0; + } + } + else + { + result[0] = 1.0F; + result[1] = 0.0; + result[2] = 0.0; + result[3] = 1.0F; + } +} + + +static void do_populate_lut( struct shine_tab *tab, + float unclamped_exponent ) +{ + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon)); + unsigned i; + + tab->exponent = unclamped_exponent; /* for later comparison */ + + tab->values[0] = 0; + if (exponent == 0) { + for (i = 1; i < 258; i++) { + tab->values[i] = 1.0; + } + } + else { + for (i = 1; i < 258; i++) { + tab->values[i] = powf((float)i * epsilon, exponent); + } + } +} + + + + +static void PIPE_CDECL populate_lut( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + unsigned i, tab; + + /* Search for an existing table for this value. Note that without + * static analysis we don't really know if in[3] will be constant, + * but it usually is... + */ + for (tab = 0; tab < 4; tab++) { + if (machine->shine_tab[tab].exponent == in[3]) { + goto found; + } + } + + for (tab = 0, i = 1; i < 4; i++) { + if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used) + tab = i; + } + + if (machine->shine_tab[tab].last_used == machine->now) { + /* No unused tables (this is not a ffvertex program...). Just + * call pow each time: + */ + machine->lit_info[count].func = aos_do_lit; + machine->lit_info[count].func( machine, result, in, count ); + return; + } + else { + do_populate_lut( &machine->shine_tab[tab], in[3] ); + } + + found: + machine->shine_tab[tab].last_used = machine->now; + machine->lit_info[count].shine_tab = &machine->shine_tab[tab]; + machine->lit_info[count].func = do_lit_lut; + machine->lit_info[count].func( machine, result, in, count ); +} + + +void draw_vs_aos_machine_constants( struct aos_machine *machine, + const float (*constants)[4] ) +{ + machine->constants = constants; + + { + unsigned i; + for (i = 0; i < MAX_LIT_INFO; i++) { + machine->lit_info[i].func = populate_lut; + machine->now++; + } + } +} + + +void draw_vs_aos_machine_viewport( struct aos_machine *machine, + const struct pipe_viewport_state *viewport ) +{ + memcpy(machine->scale, viewport->scale, 4 * sizeof(float)); + memcpy(machine->translate, viewport->translate, 4 * sizeof(float)); +} + + + +void draw_vs_aos_machine_destroy( struct aos_machine *machine ) +{ + align_free(machine); +} + +struct aos_machine *draw_vs_aos_machine( void ) +{ + struct aos_machine *machine; + unsigned i; + float inv = 1.0f/255.0f; + float f255 = 255.0f; + + machine = align_malloc(sizeof(struct aos_machine), 16); + if (!machine) + return NULL; + + memset(machine, 0, sizeof(*machine)); + + ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f); + *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff; + + ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f); + ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f); + ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f); + ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv); + ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255); + ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f); + + + machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP | + X87_CW_EXCEPTION_DENORM_OP | + X87_CW_EXCEPTION_ZERO_DIVIDE | + X87_CW_EXCEPTION_OVERFLOW | + X87_CW_EXCEPTION_UNDERFLOW | + X87_CW_EXCEPTION_PRECISION | + (1<<6) | + X87_CW_ROUND_NEAREST | + X87_CW_PRECISION_DOUBLE_EXT); + + assert(machine->fpu_rnd_nearest == 0x37f); + + machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP | + X87_CW_EXCEPTION_DENORM_OP | + X87_CW_EXCEPTION_ZERO_DIVIDE | + X87_CW_EXCEPTION_OVERFLOW | + X87_CW_EXCEPTION_UNDERFLOW | + X87_CW_EXCEPTION_PRECISION | + (1<<6) | + X87_CW_ROUND_DOWN | + X87_CW_PRECISION_DOUBLE_EXT); + + for (i = 0; i < MAX_SHINE_TAB; i++) + do_populate_lut( &machine->shine_tab[i], 1.0f ); + + return machine; +} + +#else + +void draw_vs_aos_machine_viewport( struct aos_machine *machine, + const struct pipe_viewport_state *viewport ) +{ +} + +void draw_vs_aos_machine_constants( struct aos_machine *machine, + const float (*constants)[4] ) +{ +} + +void draw_vs_aos_machine_destroy( struct aos_machine *machine ) +{ +} + +struct aos_machine *draw_vs_aos_machine( void ) +{ + return NULL; +} +#endif + diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c new file mode 100644 index 0000000000..13d4fcfdbf --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -0,0 +1,194 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" + +#include "draw_private.h" +#include "draw_context.h" +#include "draw_vs.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" + + +struct exec_vertex_shader { + struct draw_vertex_shader base; + struct tgsi_exec_machine *machine; +}; + +static struct exec_vertex_shader *exec_vertex_shader( struct draw_vertex_shader *vs ) +{ + return (struct exec_vertex_shader *)vs; +} + + +/* Not required for run_linear. + */ +static void +vs_exec_prepare( struct draw_vertex_shader *shader, + struct draw_context *draw ) +{ + struct exec_vertex_shader *evs = exec_vertex_shader(shader); + + /* Specify the vertex program to interpret/execute. + * Avoid rebinding when possible. + */ + if (evs->machine->Tokens != shader->state.tokens) { + tgsi_exec_machine_bind_shader(evs->machine, + shader->state.tokens, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/ ); + } +} + + + + +/* Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. + */ +static void +vs_exec_run_linear( struct draw_vertex_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + struct exec_vertex_shader *evs = exec_vertex_shader(shader); + struct tgsi_exec_machine *machine = evs->machine; + unsigned int i, j; + unsigned slot; + + machine->Consts = constants; + + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + + /* Swizzle inputs. + */ + for (j = 0; j < max_vertices; j++) { +#if 0 + debug_printf("%d) Input vert:\n", i + j); + for (slot = 0; slot < shader->info.num_inputs; slot++) { + debug_printf("\t%d: %f %f %f %f\n", slot, + input[slot][0], + input[slot][1], + input[slot][2], + input[slot][3]); + } +#endif + + for (slot = 0; slot < shader->info.num_inputs; slot++) { + machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; + machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; + machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; + machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; + } + + input = (const float (*)[4])((const char *)input + input_stride); + } + + /* run interpreter */ + tgsi_exec_machine_run( machine ); + + /* Unswizzle all output results. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < shader->info.num_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + + } + +#if 0 + debug_printf("%d) Post xform vert:\n", i + j); + for (slot = 0; slot < shader->info.num_outputs; slot++) { + debug_printf("\t%d: %f %f %f %f\n", slot, + output[slot][0], + output[slot][1], + output[slot][2], + output[slot][3]); + } +#endif + + output = (float (*)[4])((char *)output + output_stride); + } + + } +} + + + + +static void +vs_exec_delete( struct draw_vertex_shader *dvs ) +{ + FREE((void*) dvs->state.tokens); + FREE( dvs ); +} + + +struct draw_vertex_shader * +draw_create_vs_exec(struct draw_context *draw, + const struct pipe_shader_state *state) +{ + struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader ); + + if (vs == NULL) + return NULL; + + /* we make a private copy of the tokens */ + vs->base.state.tokens = tgsi_dup_tokens(state->tokens); + if (!vs->base.state.tokens) { + FREE(vs); + return NULL; + } + + tgsi_scan_shader(state->tokens, &vs->base.info); + + vs->base.draw = draw; + vs->base.prepare = vs_exec_prepare; + vs->base.run_linear = vs_exec_run_linear; + vs->base.delete = vs_exec_delete; + vs->base.create_varient = draw_vs_varient_generic; + vs->machine = &draw->vs.machine; + + return &vs->base; +} diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c new file mode 100644 index 0000000000..727977bc3a --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -0,0 +1,161 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" +#include "draw_context.h" +#include "draw_vs.h" + +#include "tgsi/tgsi_parse.h" + +#ifdef MESA_LLVM + +#include "gallivm/gallivm.h" + +struct draw_llvm_vertex_shader { + struct draw_vertex_shader base; + struct gallivm_prog *llvm_prog; + struct tgsi_exec_machine *machine; +}; + + +static void +vs_llvm_prepare( struct draw_vertex_shader *base, + struct draw_context *draw ) +{ +} + + + + +static void +vs_llvm_run_linear( struct draw_vertex_shader *base, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + struct draw_llvm_vertex_shader *shader = + (struct draw_llvm_vertex_shader *)base; + + gallivm_cpu_vs_exec(shader->llvm_prog, shader->machine, + input, base->info.num_inputs, output, base->info.num_outputs, + constants, count, input_stride, output_stride); +} + + + +static void +vs_llvm_delete( struct draw_vertex_shader *base ) +{ + struct draw_llvm_vertex_shader *shader = + (struct draw_llvm_vertex_shader *)base; + + /* Do something to free compiled shader: + */ + + FREE( (void*) shader->base.state.tokens ); + FREE( shader ); +} + + + + +struct draw_vertex_shader * +draw_create_vs_llvm(struct draw_context *draw, + const struct pipe_shader_state *templ) +{ + struct draw_llvm_vertex_shader *vs; + + vs = CALLOC_STRUCT( draw_llvm_vertex_shader ); + if (vs == NULL) + return NULL; + + /* we make a private copy of the tokens */ + vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); + if (!vs->base.state.tokens) { + FREE(vs); + return NULL; + } + + tgsi_scan_shader(vs->base.state.tokens, &vs->base.info); + + vs->base.draw = draw; + vs->base.prepare = vs_llvm_prepare; + vs->base.create_varient = draw_vs_varient_generic; + vs->base.run_linear = vs_llvm_run_linear; + vs->base.delete = vs_llvm_delete; + vs->machine = &draw->vs.machine; + + { + struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS); + gallivm_ir_set_layout(ir, GALLIVM_SOA); + gallivm_ir_set_components(ir, 4); + gallivm_ir_fill_from_tgsi(ir, vs->base.state.tokens); + vs->llvm_prog = gallivm_ir_compile(ir); + gallivm_ir_delete(ir); + } + + draw->vs.engine = gallivm_global_cpu_engine(); + + /* XXX: Why are there two versions of this? Shouldn't creating the + * engine be a separate operation to compiling a shader? + */ + if (!draw->vs.engine) { + draw->vs.engine = gallivm_cpu_engine_create(vs->llvm_prog); + } + else { + gallivm_cpu_jit_compile(draw->vs.engine, vs->llvm_prog); + } + + return &vs->base; +} + + + + + +#else + +struct draw_vertex_shader * +draw_create_vs_llvm(struct draw_context *draw, + const struct pipe_shader_state *shader) +{ + return NULL; +} + +#endif diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c new file mode 100644 index 0000000000..8eff6d4fda --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -0,0 +1,274 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_config.h" + +#include "draw_vs.h" + +#if defined(PIPE_ARCH_PPC) + +#include "pipe/p_shader_tokens.h" + +#include "draw_private.h" +#include "draw_context.h" + +#include "rtasm/rtasm_cpu.h" +#include "rtasm/rtasm_ppc.h" +#include "tgsi/tgsi_ppc.h" +#include "tgsi/tgsi_parse.h" + + + +typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], + float (*outputs)[4][4], + float (*temps)[4][4], + float (*immeds)[4][4], + float (*consts)[4], + const float *builtins); + +#if 0 + const struct tgsi_exec_vector *input, + struct tgsi_exec_vector *output, + float (*constant)[4], /* 3 */ + struct tgsi_exec_vector *temporary, /* 4 */ + float (*immediates)[4], /* 5 */ + const float (*aos_input)[4], /* 6 */ + uint num_inputs, /* 7 */ + uint input_stride, /* 8 */ + float (*aos_output)[4], /* 9 */ + uint num_outputs, /* 10 */ + uint output_stride ); /* 11 */ +#endif + +struct draw_ppc_vertex_shader { + struct draw_vertex_shader base; + struct ppc_function ppc_program; + + codegen_function func; + + struct tgsi_exec_machine *machine; +}; + + +static void +vs_ppc_prepare( struct draw_vertex_shader *base, + struct draw_context *draw ) +{ +} + + + +/* Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. + */ +static void +vs_ppc_run_linear( struct draw_vertex_shader *base, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; + struct tgsi_exec_machine *machine = shader->machine; + unsigned int i; + +#define MAX_VERTICES 4 + + /* loop over verts */ + for (i = 0; i < count; i += MAX_VERTICES) { + const uint max_vertices = MIN2(MAX_VERTICES, count - i); + float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB; + float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB; + float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB; + uint attr; + + /* convert (up to) four input verts to SoA format */ + for (attr = 0; attr < base->info.num_inputs; attr++) { + const float *vIn = (const float *) input; + uint vert; + for (vert = 0; vert < max_vertices; vert++) { +#if 0 + if (attr==0) + printf("Input v%d a%d: %f %f %f %f\n", + vert, attr, vIn[0], vIn[1], vIn[2], vIn[3]); +#endif + inputs_soa[attr][0][vert] = vIn[attr * 4 + 0]; + inputs_soa[attr][1][vert] = vIn[attr * 4 + 1]; + inputs_soa[attr][2][vert] = vIn[attr * 4 + 2]; + inputs_soa[attr][3][vert] = vIn[attr * 4 + 3]; + vIn += input_stride / 4; + } + } + + /* run compiled shader + */ +#if 0 + shader->func(machine->Inputs, + machine->Outputs, + (float (*)[4])constants, + machine->Temps, + (float (*)[4])shader->base.immediates, + input, + base->info.num_inputs, + input_stride, + output, + base->info.num_outputs, + output_stride ); +#else + shader->func(inputs_soa, outputs_soa, temps_soa, + (float (*)[4][4]) shader->base.immediates, + (float (*)[4]) constants, + ppc_builtin_constants); + + /*output[0][0] = input[0][0] * 0.5;*/ +#endif + + /* convert (up to) four output verts from SoA back to AoS format */ + for (attr = 0; attr < base->info.num_outputs; attr++) { + float *vOut = (float *) output; + uint vert; + for (vert = 0; vert < max_vertices; vert++) { + vOut[attr * 4 + 0] = outputs_soa[attr][0][vert]; + vOut[attr * 4 + 1] = outputs_soa[attr][1][vert]; + vOut[attr * 4 + 2] = outputs_soa[attr][2][vert]; + vOut[attr * 4 + 3] = outputs_soa[attr][3][vert]; +#if 0 + if (attr==0) + printf("Output v%d a%d: %f %f %f %f\n", + vert, attr, vOut[0], vOut[1], vOut[2], vOut[3]); +#endif + vOut += output_stride / 4; + } + } + + /* advance to next group of four input/output verts */ + input = (const float (*)[4])((const char *)input + input_stride * max_vertices); + output = (float (*)[4])((char *)output + output_stride * max_vertices); + } +} + + + + +static void +vs_ppc_delete( struct draw_vertex_shader *base ) +{ + struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; + + ppc_release_func( &shader->ppc_program ); + + align_free( (void *) shader->base.immediates ); + + FREE( (void*) shader->base.state.tokens ); + FREE( shader ); +} + + +struct draw_vertex_shader * +draw_create_vs_ppc(struct draw_context *draw, + const struct pipe_shader_state *templ) +{ + struct draw_ppc_vertex_shader *vs; + + vs = CALLOC_STRUCT( draw_ppc_vertex_shader ); + if (vs == NULL) + return NULL; + + /* we make a private copy of the tokens */ + vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); + if (!vs->base.state.tokens) + goto fail; + + tgsi_scan_shader(templ->tokens, &vs->base.info); + + vs->base.draw = draw; +#if 0 + if (1) + vs->base.create_varient = draw_vs_varient_aos_ppc; + else +#endif + vs->base.create_varient = draw_vs_varient_generic; + vs->base.prepare = vs_ppc_prepare; + vs->base.run_linear = vs_ppc_run_linear; + vs->base.delete = vs_ppc_delete; + + vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * 4 * + sizeof(float), 16); + + vs->machine = &draw->vs.machine; + + ppc_init_func( &vs->ppc_program, 2000 ); /* XXX fix limit */ + + if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, + &vs->ppc_program, + (float (*)[4])vs->base.immediates, + TRUE )) + goto fail; + + vs->func = (codegen_function) ppc_get_func( &vs->ppc_program ); + if (!vs->func) { + goto fail; + } + + return &vs->base; + +fail: + /* + debug_error("tgsi_emit_ppc() failed, falling back to interpreter\n"); + */ + + ppc_release_func( &vs->ppc_program ); + + FREE(vs); + return NULL; +} + + + +#else /* PIPE_ARCH_PPC */ + + +struct draw_vertex_shader * +draw_create_vs_ppc( struct draw_context *draw, + const struct pipe_shader_state *templ ) +{ + return (void *) 0; +} + + +#endif /* PIPE_ARCH_PPC */ diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c new file mode 100644 index 0000000000..b11ae31662 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -0,0 +1,212 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_config.h" + +#include "draw_vs.h" + +#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE) + +#include "pipe/p_shader_tokens.h" + +#include "draw_private.h" +#include "draw_context.h" + +#include "rtasm/rtasm_cpu.h" +#include "rtasm/rtasm_x86sse.h" +#include "tgsi/tgsi_sse2.h" +#include "tgsi/tgsi_parse.h" + +#define SSE_MAX_VERTICES 4 + +typedef void (PIPE_CDECL *codegen_function) ( + const struct tgsi_exec_vector *input, /* 1 */ + struct tgsi_exec_vector *output, /* 2 */ + float (*constant)[4], /* 3 */ + struct tgsi_exec_vector *temporary, /* 4 */ + float (*immediates)[4], /* 5 */ + const float (*aos_input)[4], /* 6 */ + uint num_inputs, /* 7 */ + uint input_stride, /* 8 */ + float (*aos_output)[4], /* 9 */ + uint num_outputs, /* 10 */ + uint output_stride ); /* 11 */ + +struct draw_sse_vertex_shader { + struct draw_vertex_shader base; + struct x86_function sse2_program; + + codegen_function func; + + struct tgsi_exec_machine *machine; +}; + + +static void +vs_sse_prepare( struct draw_vertex_shader *base, + struct draw_context *draw ) +{ +} + + + +/* Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. + */ +static void +vs_sse_run_linear( struct draw_vertex_shader *base, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; + struct tgsi_exec_machine *machine = shader->machine; + unsigned int i; + + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + + /* run compiled shader + */ + shader->func(machine->Inputs, + machine->Outputs, + (float (*)[4])constants, + machine->Temps, + (float (*)[4])shader->base.immediates, + input, + base->info.num_inputs, + input_stride, + output, + base->info.num_outputs, + output_stride ); + + input = (const float (*)[4])((const char *)input + input_stride * max_vertices); + output = (float (*)[4])((char *)output + output_stride * max_vertices); + } +} + + + + +static void +vs_sse_delete( struct draw_vertex_shader *base ) +{ + struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; + + x86_release_func( &shader->sse2_program ); + + align_free( (void *) shader->base.immediates ); + + FREE( (void*) shader->base.state.tokens ); + FREE( shader ); +} + + +struct draw_vertex_shader * +draw_create_vs_sse(struct draw_context *draw, + const struct pipe_shader_state *templ) +{ + struct draw_sse_vertex_shader *vs; + + if (!rtasm_cpu_has_sse2()) + return NULL; + + vs = CALLOC_STRUCT( draw_sse_vertex_shader ); + if (vs == NULL) + return NULL; + + /* we make a private copy of the tokens */ + vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); + if (!vs->base.state.tokens) + goto fail; + + tgsi_scan_shader(templ->tokens, &vs->base.info); + + vs->base.draw = draw; + if (1) + vs->base.create_varient = draw_vs_varient_aos_sse; + else + vs->base.create_varient = draw_vs_varient_generic; + vs->base.prepare = vs_sse_prepare; + vs->base.run_linear = vs_sse_run_linear; + vs->base.delete = vs_sse_delete; + + vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * + sizeof(float), 16); + + vs->machine = &draw->vs.machine; + + x86_init_func( &vs->sse2_program ); + + if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, + &vs->sse2_program, + (float (*)[4])vs->base.immediates, + TRUE )) + goto fail; + + vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); + if (!vs->func) { + goto fail; + } + + return &vs->base; + +fail: + debug_error("tgsi_emit_sse2() failed, falling back to interpreter\n"); + + x86_release_func( &vs->sse2_program ); + + FREE(vs); + return NULL; +} + + + +#else + +struct draw_vertex_shader * +draw_create_vs_sse( struct draw_context *draw, + const struct pipe_shader_state *templ ) +{ + return (void *) 0; +} + + +#endif + diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c new file mode 100644 index 0000000000..7ee567d478 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -0,0 +1,322 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" +#include "util/u_math.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_vs.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" + +/* A first pass at incorporating vertex fetch/emit functionality into + */ +struct draw_vs_varient_generic { + struct draw_vs_varient base; + + struct draw_vertex_shader *shader; + struct draw_context *draw; + + /* Basic plan is to run these two translate functions before/after + * the vertex shader's existing run_linear() routine to simulate + * the inclusion of this functionality into the shader... + * + * Next will look at actually including it. + */ + struct translate *fetch; + struct translate *emit; + + unsigned temp_vertex_stride; +}; + + + + + +static void vsvg_set_buffer( struct draw_vs_varient *varient, + unsigned buffer, + const void *ptr, + unsigned stride ) +{ + struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + + vsvg->fetch->set_buffer(vsvg->fetch, + buffer, + ptr, + stride); +} + + +/* Mainly for debug at this stage: + */ +static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg, + unsigned count, + void *output_buffer ) +{ + char *ptr = (char *)output_buffer; + const float *scale = vsvg->base.vs->draw->viewport.scale; + const float *trans = vsvg->base.vs->draw->viewport.translate; + unsigned stride = vsvg->temp_vertex_stride; + unsigned j; + + ptr += vsvg->base.vs->position_output * 4 * sizeof(float); + + for (j = 0; j < count; j++, ptr += stride) { + float *data = (float *)ptr; + float w = 1.0f / data[3]; + + data[0] = data[0] * w * scale[0] + trans[0]; + data[1] = data[1] * w * scale[1] + trans[1]; + data[2] = data[2] * w * scale[2] + trans[2]; + data[3] = w; + } +} + +static void do_viewport( struct draw_vs_varient_generic *vsvg, + unsigned count, + void *output_buffer ) +{ + char *ptr = (char *)output_buffer; + const float *scale = vsvg->base.vs->draw->viewport.scale; + const float *trans = vsvg->base.vs->draw->viewport.translate; + unsigned stride = vsvg->temp_vertex_stride; + unsigned j; + + ptr += vsvg->base.vs->position_output * 4 * sizeof(float); + + for (j = 0; j < count; j++, ptr += stride) { + float *data = (float *)ptr; + + data[0] = data[0] * scale[0] + trans[0]; + data[1] = data[1] * scale[1] + trans[1]; + data[2] = data[2] * scale[2] + trans[2]; + } +} + + +static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, + const unsigned *elts, + unsigned count, + void *output_buffer) +{ + struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + unsigned temp_vertex_stride = vsvg->temp_vertex_stride; + void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride ); + + if (0) debug_printf("%s %d \n", __FUNCTION__, count); + + /* Want to do this in small batches for cache locality? + */ + + vsvg->fetch->run_elts( vsvg->fetch, + elts, + count, + temp_buffer ); + + vsvg->base.vs->run_linear( vsvg->base.vs, + temp_buffer, + temp_buffer, + (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, + count, + temp_vertex_stride, + temp_vertex_stride); + + + if (vsvg->base.key.clip) { + /* not really handling clipping, just do the rhw so we can + * see the results... + */ + do_rhw_viewport( vsvg, + count, + temp_buffer ); + } + else if (vsvg->base.key.viewport) { + do_viewport( vsvg, + count, + temp_buffer ); + } + + + vsvg->emit->set_buffer( vsvg->emit, + 0, + temp_buffer, + temp_vertex_stride ); + + vsvg->emit->set_buffer( vsvg->emit, + 1, + &vsvg->draw->rasterizer->point_size, + 0); + + vsvg->emit->run( vsvg->emit, + 0, count, + output_buffer ); + + FREE(temp_buffer); +} + + +static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, + unsigned start, + unsigned count, + void *output_buffer ) +{ + struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + unsigned temp_vertex_stride = vsvg->temp_vertex_stride; + void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride ); + + if (0) debug_printf("%s %d %d (sz %d, %d)\n", __FUNCTION__, start, count, + vsvg->base.key.output_stride, + temp_vertex_stride); + + vsvg->fetch->run( vsvg->fetch, + start, + count, + temp_buffer ); + + vsvg->base.vs->run_linear( vsvg->base.vs, + temp_buffer, + temp_buffer, + (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, + count, + temp_vertex_stride, + temp_vertex_stride); + + if (vsvg->base.key.clip) { + /* not really handling clipping, just do the rhw so we can + * see the results... + */ + do_rhw_viewport( vsvg, + count, + temp_buffer ); + } + else if (vsvg->base.key.viewport) { + do_viewport( vsvg, + count, + temp_buffer ); + } + + vsvg->emit->set_buffer( vsvg->emit, + 0, + temp_buffer, + temp_vertex_stride ); + + vsvg->emit->set_buffer( vsvg->emit, + 1, + &vsvg->draw->rasterizer->point_size, + 0); + + vsvg->emit->run( vsvg->emit, + 0, count, + output_buffer ); + + FREE(temp_buffer); +} + + + + + +static void vsvg_destroy( struct draw_vs_varient *varient ) +{ + FREE(varient); +} + + +struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) +{ + unsigned i; + struct translate_key fetch, emit; + + struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic ); + if (vsvg == NULL) + return NULL; + + vsvg->base.key = *key; + vsvg->base.vs = vs; + vsvg->base.set_buffer = vsvg_set_buffer; + vsvg->base.run_elts = vsvg_run_elts; + vsvg->base.run_linear = vsvg_run_linear; + vsvg->base.destroy = vsvg_destroy; + + vsvg->draw = vs->draw; + + vsvg->temp_vertex_stride = MAX2(key->nr_inputs, + vsvg->base.vs->info.num_outputs) * 4 * sizeof(float); + + /* Build free-standing fetch and emit functions: + */ + fetch.nr_elements = key->nr_inputs; + fetch.output_stride = vsvg->temp_vertex_stride; + for (i = 0; i < key->nr_inputs; i++) { + fetch.element[i].input_format = key->element[i].in.format; + fetch.element[i].input_buffer = key->element[i].in.buffer; + fetch.element[i].input_offset = key->element[i].in.offset; + fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fetch.element[i].output_offset = i * 4 * sizeof(float); + assert(fetch.element[i].output_offset < fetch.output_stride); + } + + + emit.nr_elements = key->nr_outputs; + emit.output_stride = key->output_stride; + for (i = 0; i < key->nr_outputs; i++) { + if (key->element[i].out.format != EMIT_1F_PSIZE) + { + emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit.element[i].input_buffer = 0; + emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); + emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format); + emit.element[i].output_offset = key->element[i].out.offset; + assert(emit.element[i].input_offset <= fetch.output_stride); + } + else { + emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; + emit.element[i].input_buffer = 1; + emit.element[i].input_offset = 0; + emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT; + emit.element[i].output_offset = key->element[i].out.offset; + } + } + + vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch ); + vsvg->emit = draw_vs_get_emit( vs->draw, &emit ); + + return &vsvg->base; +} + + + + + diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile new file mode 100644 index 0000000000..c3f7bfba93 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/Makefile @@ -0,0 +1,92 @@ +# -*-makefile-*- +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = gallivm + + +GALLIVM_SOURCES = \ + gallivm.cpp \ + gallivm_cpu.cpp \ + instructions.cpp \ + loweringpass.cpp \ + tgsitollvm.cpp \ + storage.cpp \ + storagesoa.cpp \ + instructionssoa.cpp + +INC_SOURCES = gallivm_builtins.cpp gallivmsoabuiltins.cpp + +CPP_SOURCES = \ + $(GALLIVM_SOURCES) + +C_SOURCES = +ASM_SOURCES = + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(CPP_SOURCES:.cpp=.o) \ + $(ASM_SOURCES:.S=.o) + +### Include directories +INCLUDES = \ + -I. \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/include + + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(LLVM_CFLAGS) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDES) $(LLVM_CXXFLAGS) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +##### TARGETS ##### + +default:: depend symlinks $(LIBNAME) + + +$(LIBNAME): $(OBJECTS) Makefile + $(TOP)/bin/mklib -o $@ -static $(OBJECTS) + + +depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \ + $(ASM_SOURCES) $(INC_SOURCES) 2> /dev/null + + +gallivm_builtins.cpp: llvm_builtins.c + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin + (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ + rm temp1.bin + +gallivmsoabuiltins.cpp: soabuiltins.c + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin + (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ + rm temp2.bin + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + + +# Remove .o and backup files +clean: + -rm -f *.o */*.o *~ *.so *~ server/*.o + -rm -f depend depend.bak + -rm -f gallivm_builtins.cpp + -rm -f gallivmsoabuiltins.cpp + +symlinks: + + +include depend diff --git a/src/gallium/auxiliary/gallivm/SConscript b/src/gallium/auxiliary/gallivm/SConscript new file mode 100644 index 0000000000..c0aa51b90a --- /dev/null +++ b/src/gallium/auxiliary/gallivm/SConscript @@ -0,0 +1,16 @@ +Import('*') + +gallivm = env.ConvenienceLibrary( + target = 'gallivm', + source = [ + 'gallivm.cpp', + 'gallivm_cpu.cpp', + 'instructions.cpp', + 'loweringpass.cpp', + 'tgsitollvm.cpp', + 'storage.cpp', + 'storagesoa.cpp', + 'instructionssoa.cpp', + ]) + +auxiliaries.insert(0, gallivm) diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp new file mode 100644 index 0000000000..29adeea47d --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp @@ -0,0 +1,332 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +#ifdef MESA_LLVM + +#include "gallivm.h" +#include "gallivm_p.h" + +#include "instructions.h" +#include "loweringpass.h" +#include "storage.h" +#include "tgsitollvm.h" + +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" + +#include <llvm/Module.h> +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/Instructions.h> +#include <llvm/ModuleProvider.h> +#include <llvm/Pass.h> +#include <llvm/PassManager.h> +#include <llvm/ParameterAttributes.h> +#include <llvm/Support/PatternMatch.h> +#include <llvm/ExecutionEngine/JIT.h> +#include <llvm/ExecutionEngine/Interpreter.h> +#include <llvm/ExecutionEngine/GenericValue.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/LinkAllPasses.h> +#include <llvm/Analysis/Verifier.h> +#include <llvm/Analysis/LoopPass.h> +#include <llvm/Target/TargetData.h> +#include <llvm/Bitcode/ReaderWriter.h> +#include <llvm/Transforms/Utils/Cloning.h> + +#include <sstream> +#include <fstream> +#include <iostream> + +static int GLOBAL_ID = 0; + +using namespace llvm; + +static inline +void AddStandardCompilePasses(PassManager &PM) +{ + PM.add(new LoweringPass()); + PM.add(createVerifierPass()); // Verify that input is correct + + PM.add(createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp + + //PM.add(createStripSymbolsPass(true)); + + PM.add(createRaiseAllocationsPass()); // call %malloc -> malloc inst + PM.add(createCFGSimplificationPass()); // Clean up disgusting code + PM.add(createPromoteMemoryToRegisterPass());// Kill useless allocas + PM.add(createGlobalOptimizerPass()); // Optimize out global vars + PM.add(createGlobalDCEPass()); // Remove unused fns and globs + PM.add(createIPConstantPropagationPass());// IP Constant Propagation + PM.add(createDeadArgEliminationPass()); // Dead argument elimination + PM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE + PM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + + PM.add(createPruneEHPass()); // Remove dead EH info + + PM.add(createFunctionInliningPass()); // Inline small functions + PM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args + + PM.add(createTailDuplicationPass()); // Simplify cfg by copying code + PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl. + PM.add(createCFGSimplificationPass()); // Merge & remove BBs + PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas + PM.add(createInstructionCombiningPass()); // Combine silly seq's + PM.add(createCondPropagationPass()); // Propagate conditionals + + PM.add(createTailCallEliminationPass()); // Eliminate tail calls + PM.add(createCFGSimplificationPass()); // Merge & remove BBs + PM.add(createReassociatePass()); // Reassociate expressions + PM.add(createLoopRotatePass()); + PM.add(createLICMPass()); // Hoist loop invariants + PM.add(createLoopUnswitchPass()); // Unswitch loops. + PM.add(createLoopIndexSplitPass()); // Index split loops. + PM.add(createInstructionCombiningPass()); // Clean up after LICM/reassoc + PM.add(createIndVarSimplifyPass()); // Canonicalize indvars + PM.add(createLoopUnrollPass()); // Unroll small loops + PM.add(createInstructionCombiningPass()); // Clean up after the unroller + PM.add(createGVNPass()); // Remove redundancies + PM.add(createSCCPPass()); // Constant prop with SCCP + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + PM.add(createInstructionCombiningPass()); + PM.add(createCondPropagationPass()); // Propagate conditionals + + PM.add(createDeadStoreEliminationPass()); // Delete dead stores + PM.add(createAggressiveDCEPass()); // SSA based 'Aggressive DCE' + PM.add(createCFGSimplificationPass()); // Merge & remove BBs + PM.add(createSimplifyLibCallsPass()); // Library Call Optimizations + PM.add(createDeadTypeEliminationPass()); // Eliminate dead types + PM.add(createConstantMergePass()); // Merge dup global constants +} + +void gallivm_prog_delete(struct gallivm_prog *prog) +{ + delete prog->module; + prog->module = 0; + prog->function = 0; + free(prog); +} + +static inline void +constant_interpolation(float (*inputs)[16][4], + const struct tgsi_interp_coef *coefs, + unsigned attrib, + unsigned chan) +{ + unsigned i; + + for (i = 0; i < QUAD_SIZE; ++i) { + inputs[i][attrib][chan] = coefs[attrib].a0[chan]; + } +} + +static inline void +linear_interpolation(float (*inputs)[16][4], + const struct tgsi_interp_coef *coefs, + unsigned attrib, + unsigned chan) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + const float x = inputs[i][0][0]; + const float y = inputs[i][0][1]; + + inputs[i][attrib][chan] = + coefs[attrib].a0[chan] + + coefs[attrib].dadx[chan] * x + + coefs[attrib].dady[chan] * y; + } +} + +static inline void +perspective_interpolation(float (*inputs)[16][4], + const struct tgsi_interp_coef *coefs, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + const float x = inputs[i][0][0]; + const float y = inputs[i][0][1]; + /* WPOS.w here is really 1/w */ + const float w = 1.0f / inputs[i][0][3]; + assert(inputs[i][0][3] != 0.0); + + inputs[i][attrib][chan] = + (coefs[attrib].a0[chan] + + coefs[attrib].dadx[chan] * x + + coefs[attrib].dady[chan] * y) * w; + } +} + +void gallivm_ir_dump(struct gallivm_ir *ir, const char *file_prefix) +{ + if (!ir || !ir->module) + return; + + if (file_prefix) { + std::ostringstream stream; + stream << file_prefix; + stream << ir->id; + stream << ".ll"; + std::string name = stream.str(); + std::ofstream out(name.c_str()); + if (!out) { + std::cerr<<"Can't open file : "<<stream.str()<<std::endl;; + return; + } + out << (*ir->module); + out.close(); + } else { + const llvm::Module::FunctionListType &funcs = ir->module->getFunctionList(); + llvm::Module::FunctionListType::const_iterator itr; + std::cout<<"; ---------- Start shader "<<ir->id<<std::endl; + for (itr = funcs.begin(); itr != funcs.end(); ++itr) { + const llvm::Function &func = (*itr); + std::string name = func.getName(); + const llvm::Function *found = 0; + if (name.find("vs_shader") != std::string::npos || + name.find("fs_shader") != std::string::npos || + name.find("function") != std::string::npos) + found = &func; + if (found) { + std::cout<<*found<<std::endl; + } + } + std::cout<<"; ---------- End shader "<<ir->id<<std::endl; + } +} + + +void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog, + float (*inputs)[16][4], + const struct tgsi_interp_coef *coef) +{ + for (int i = 0; i < prog->num_interp; ++i) { + const gallivm_interpolate &interp = prog->interpolators[i]; + switch (interp.type) { + case TGSI_INTERPOLATE_CONSTANT: + constant_interpolation(inputs, coef, interp.attrib, interp.chan); + break; + + case TGSI_INTERPOLATE_LINEAR: + linear_interpolation(inputs, coef, interp.attrib, interp.chan); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + perspective_interpolation(inputs, coef, interp.attrib, interp.chan); + break; + + default: + assert( 0 ); + } + } +} + + +struct gallivm_ir * gallivm_ir_new(enum gallivm_shader_type type) +{ + struct gallivm_ir *ir = + (struct gallivm_ir *)calloc(1, sizeof(struct gallivm_ir)); + ++GLOBAL_ID; + ir->id = GLOBAL_ID; + ir->type = type; + + return ir; +} + +void gallivm_ir_set_layout(struct gallivm_ir *ir, + enum gallivm_vector_layout layout) +{ + ir->layout = layout; +} + +void gallivm_ir_set_components(struct gallivm_ir *ir, int num) +{ + ir->num_components = num; +} + +void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, + const struct tgsi_token *tokens) +{ + std::cout << "Creating llvm from: " <<std::endl; + tgsi_dump(tokens, 0); + + llvm::Module *mod = tgsi_to_llvmir(ir, tokens); + ir->module = mod; + gallivm_ir_dump(ir, 0); +} + +void gallivm_ir_delete(struct gallivm_ir *ir) +{ + delete ir->module; + free(ir); +} + +struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) +{ + struct gallivm_prog *prog = + (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog)); + + std::cout << "Before optimizations:"<<std::endl; + ir->module->dump(); + std::cout<<"-------------------------------"<<std::endl; + + PassManager veri; + veri.add(createVerifierPass()); + veri.run(*ir->module); + llvm::Module *mod = llvm::CloneModule(ir->module); + prog->num_consts = ir->num_consts; + memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators)); + prog->num_interp = ir->num_interp; + + /* Run optimization passes over it */ + PassManager passes; + passes.add(new TargetData(mod)); + AddStandardCompilePasses(passes); + passes.run(*mod); + prog->module = mod; + + std::cout << "After optimizations:"<<std::endl; + mod->dump(); + + return prog; +} + +#endif /* MESA_LLVM */ diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h new file mode 100644 index 0000000000..36a64a7747 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm.h @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ + +#ifndef GALLIVM_H +#define GALLIVM_H + +/* + LLVM representation consists of two stages - layout independent + intermediate representation gallivm_ir and driver specific + gallivm_prog. TGSI is first being translated into gallivm_ir + after that driver can set number of options on gallivm_ir and + have it compiled into gallivm_prog. gallivm_prog can be either + executed (assuming there's LLVM JIT backend for the current + target) or machine code generation can be done (assuming there's + a LLVM code generator for thecurrent target) + */ +#if defined __cplusplus +extern "C" { +#endif + +#include "pipe/p_state.h" + +#ifdef MESA_LLVM + +struct tgsi_token; + +struct gallivm_ir; +struct gallivm_prog; +struct gallivm_cpu_engine; +struct tgsi_interp_coef; +struct tgsi_sampler; +struct tgsi_exec_vector; + +enum gallivm_shader_type { + GALLIVM_VS, + GALLIVM_FS +}; + +enum gallivm_vector_layout { + GALLIVM_AOS, + GALLIVM_SOA +}; + +struct gallivm_ir *gallivm_ir_new(enum gallivm_shader_type type); +void gallivm_ir_set_layout(struct gallivm_ir *ir, + enum gallivm_vector_layout layout); +void gallivm_ir_set_components(struct gallivm_ir *ir, int num); +void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, + const struct tgsi_token *tokens); +void gallivm_ir_delete(struct gallivm_ir *ir); + + +struct gallivm_prog *gallivm_ir_compile(struct gallivm_ir *ir); + +void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog, + float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], + const struct tgsi_interp_coef *coefs); +void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix); + + +struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog); +struct gallivm_cpu_engine *gallivm_global_cpu_engine(); +int gallivm_cpu_vs_exec(struct gallivm_prog *prog, + struct tgsi_exec_machine *machine, + const float (*input)[4], + unsigned num_inputs, + float (*output)[4], + unsigned num_outputs, + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride); +int gallivm_cpu_fs_exec(struct gallivm_prog *prog, + float x, float y, + float (*dests)[PIPE_MAX_SHADER_INPUTS][4], + float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], + float (*consts)[4], + struct tgsi_sampler *samplers); +void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *ee, struct gallivm_prog *prog); +void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee); + + +#endif /* MESA_LLVM */ + +#if defined __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp new file mode 100644 index 0000000000..fcc5c05794 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp @@ -0,0 +1,140 @@ +static const unsigned char llvm_builtins_data[] = { +0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x27,0x02,0x00,0x00,0x01,0x10,0x00,0x00, +0x10,0x00,0x00,0x00,0x07,0x81,0x23,0x91,0x41,0xc8,0x04,0x49,0x06,0x10,0x32,0x39, +0x92,0x01,0x84,0x0c,0x25,0x05,0x08,0x19,0x1e,0x04,0x8b,0x62,0x80,0x14,0x45,0x02, +0x42,0x92,0x0b,0x42,0xa4,0x10,0x32,0x14,0x38,0x08,0x18,0x49,0x0a,0x32,0x44,0x24, +0x48,0x0a,0x90,0x21,0x23,0x44,0x72,0x80,0x8c,0x14,0x21,0x86,0x0a,0x8a,0x0a,0x64, +0x0c,0x1f,0x00,0x00,0x49,0x18,0x00,0x00,0x03,0x00,0x00,0x00,0x0b,0x84,0xff,0xff, +0xff,0xff,0x1f,0xc0,0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x12,0x00,0x00,0x00, +0x32,0x22,0x48,0x09,0x20,0x65,0x82,0x84,0x00,0x26,0x45,0x48,0x05,0x09,0x26,0x45, +0xc6,0x05,0x42,0x52,0x26,0x08,0xae,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83, +0x29,0x80,0x21,0x00,0xb2,0x73,0x04,0x01,0x51,0x8a,0xf4,0x08,0x92,0xa4,0x39,0x47, +0x80,0x50,0x2b,0x03,0x00,0xa0,0x08,0x21,0x5c,0x46,0x2b,0x44,0x08,0x21,0xd4,0x40, +0x14,0x01,0x80,0x11,0x80,0x22,0x88,0x00,0x13,0x30,0x7c,0xc0,0x03,0x3b,0xf8,0x05, +0x3b,0xa0,0x83,0x36,0xa8,0x07,0x77,0x58,0x07,0x77,0x78,0x87,0x7b,0x70,0x87,0x36, +0x60,0x87,0x74,0x70,0x87,0x7a,0xc0,0x87,0x36,0x38,0x07,0x77,0xa8,0x87,0x0d,0xf7, +0x50,0x0e,0x6d,0x00,0x0f,0x7a,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60, +0x07,0x74,0xd0,0x06,0xe9,0x10,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e, +0x78,0xa0,0x07,0x78,0xa0,0x07,0x78,0xd0,0x06,0xe9,0x10,0x07,0x76,0xa0,0x07,0x71, +0x60,0x07,0x7a,0x10,0x07,0x76,0xd0,0x06,0xe9,0x30,0x07,0x72,0xa0,0x07,0x73,0x20, +0x07,0x7a,0x30,0x07,0x72,0xd0,0x06,0xe9,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07, +0x7a,0x60,0x07,0x74,0xd0,0x06,0xe6,0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x7a, +0x30,0x07,0x72,0xd0,0x06,0xe6,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60, +0x07,0x74,0xd0,0x06,0xf6,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60,0x07, +0x74,0xd0,0x06,0xf6,0x10,0x07,0x72,0x80,0x07,0x7a,0x10,0x07,0x72,0x80,0x07,0x7a, +0x10,0x07,0x72,0x80,0x07,0x6d,0x10,0x0e,0x70,0xa0,0x07,0x70,0xa0,0x07,0x76,0x40, +0x07,0x6d,0x60,0x0e,0x78,0x00,0x07,0x7a,0x10,0x07,0x72,0x80,0x07,0x7a,0x10,0x07, +0x72,0x80,0x07,0x3a,0x0f,0x84,0x48,0x20,0x23,0x24,0x40,0x00,0x62,0x67,0x88,0x9f, +0x19,0x92,0x24,0x00,0x10,0x04,0x00,0x00,0x00,0x43,0x92,0x04,0x08,0x00,0x00,0x00, +0x00,0x60,0x48,0xa2,0x00,0x40,0x10,0x00,0x00,0x00,0x0c,0x49,0x16,0x00,0x08,0x02, +0x00,0x00,0x80,0x21,0x89,0x02,0x00,0x41,0x00,0x00,0x00,0x30,0x24,0x61,0x80,0x00, +0x00,0x00,0x00,0x00,0x86,0x24,0x07,0x10,0x00,0x00,0x00,0x00,0xc0,0x90,0x44,0x01, +0x80,0x20,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00,0x00,0x00,0x43,0x12, +0x05,0x00,0x82,0x00,0x00,0x00,0x60,0x48,0x52,0x00,0x40,0x10,0x00,0x00,0x00,0x64, +0x81,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10,0x19,0x11,0x4c,0x90, +0x8c,0x09,0x26,0x47,0xc6,0x04,0x43,0x8a,0x8a,0x59,0x8b,0x43,0x50,0xd2,0x09,0x02, +0x81,0xd2,0x73,0x50,0xc9,0x0c,0x2a,0x99,0x41,0x25,0x33,0xa8,0x64,0x56,0x28,0x66, +0x2d,0x0e,0x41,0xcf,0x2a,0x15,0x04,0x4a,0xcf,0x41,0x25,0x33,0xa8,0x64,0x06,0x95, +0xcc,0xa0,0x92,0x59,0x01,0x00,0x00,0x00,0x53,0x82,0x26,0x0c,0x04,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x05,0x00,0x00,0x00, +0x04,0xc6,0x08,0x40,0x10,0x04,0xe1,0x70,0x18,0x23,0x00,0x41,0x10,0x84,0xc3,0x60, +0x04,0x00,0x00,0x00,0xc3,0x0d,0xce,0x43,0x4c,0x37,0x3c,0x8e,0x34,0xdc,0x30,0x41, +0xc2,0x74,0x03,0x34,0x51,0xc3,0x0d,0x4d,0x44,0x4c,0x37,0x44,0x8d,0x35,0x56,0x01, +0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00,0xd6,0x10,0x00,0xe6, +0x10,0x04,0x76,0x81,0x00,0x3e,0x30,0x0c,0x91,0x4f,0x1b,0x05,0x21,0x30,0x8f,0x6d, +0x13,0x48,0xe0,0x03,0xc3,0x10,0xf9,0xb4,0x55,0x20,0x81,0x0f,0x0c,0x43,0xe4,0xd7, +0x66,0x41,0x08,0xcc,0xa3,0x1f,0x40,0x41,0x34,0x53,0x84,0x99,0xc4,0x20,0x30,0x8f, +0x61,0x10,0x02,0xb0,0x2c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x27,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00,0x08,0x00,0x00,0x00, +0x24,0x8a,0xa0,0x0c,0x46,0x00,0x4a,0x80,0xc2,0x1c,0x84,0x55,0x55,0xd6,0x1c,0x84, +0x45,0x51,0x16,0x81,0x19,0x80,0x11,0x80,0x31,0x02,0x10,0x04,0x41,0xfc,0x03,0x00, +0x63,0x08,0x0d,0x34,0xdc,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60,0x73,0x0c,0xd3,0x15, +0x8d,0x21,0x34,0xd1,0x18,0x42,0xf3,0x8c,0x55,0x00,0x81,0xa0,0x6d,0x73,0x0c,0x19, +0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x10,0x00,0x00,0x00,0x27,0x50,0x20,0x05, +0xd1,0x0c,0x17,0x60,0x20,0xc5,0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d, +0x14,0x13,0xf3,0xd4,0xb8,0x69,0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4, +0xba,0x35,0x0c,0x13,0xf3,0xd8,0x05,0x31,0x31,0x8f,0x6e,0x1c,0x84,0x00,0x2c,0xcb, +0x01,0x14,0x44,0x33,0x45,0x98,0x61,0x0c,0x02,0xf3,0x00,0x00,0x00,0x00,0x00,0x00, +0x61,0x20,0x00,0x00,0x81,0x00,0x00,0x00,0x13,0x04,0x4d,0x2c,0x10,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x91, +0x11,0x00,0x00,0x00,0x63,0x08,0x4d,0x64,0x16,0xc1,0xe1,0x86,0xab,0x22,0x66,0x19, +0x02,0x01,0x1b,0x43,0x70,0xa2,0x59,0x82,0x61,0x0c,0xe1,0x89,0x66,0x09,0x86,0x81, +0x0a,0x20,0x0b,0x34,0x61,0x8e,0x81,0xda,0xa2,0x31,0x84,0x46,0xb2,0x8e,0xe0,0x70, +0x83,0x57,0x11,0xb3,0x0c,0x44,0xf1,0x8d,0x21,0x38,0xd2,0x2c,0x81,0x31,0x86,0xf0, +0x48,0xb3,0x04,0xc6,0x40,0x05,0x00,0x06,0x44,0x18,0x14,0x73,0x0c,0x9c,0x18,0x48, +0x63,0x08,0xcd,0x64,0x64,0x40,0x70,0xb8,0xa1,0x0c,0x2a,0x62,0x96,0xe1,0x40,0xcc, +0x60,0x0c,0xc1,0x99,0x66,0x09,0x92,0x31,0x84,0x67,0x9a,0x25,0x48,0x06,0x2a,0x80, +0x33,0x38,0xd0,0x00,0x99,0x63,0x18,0x83,0x34,0x98,0xc6,0x10,0x1a,0xc8,0xd6,0x80, +0xe0,0x70,0x03,0x1b,0x54,0xc4,0x2c,0x83,0xb2,0xb4,0xc1,0x18,0x82,0x03,0xcd,0x12, +0x30,0x63,0x08,0x0f,0x34,0x4b,0xc0,0x0c,0x54,0x00,0x6e,0xa0,0xbc,0xc1,0x32,0xc7, +0xa0,0x06,0x70,0x00,0x61,0x1c,0x84,0x03,0x01,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, +0x76,0x52,0x4c,0xcc,0x73,0xd3,0x24,0x05,0x64,0xec,0xcd,0x8d,0xcc,0xe5,0x87,0x46, +0xc6,0x50,0x8a,0x89,0x79,0xee,0xdb,0x54,0x8a,0x89,0x79,0xee,0xdd,0x1a,0x88,0x89, +0x79,0x68,0x73,0x20,0x26,0xe6,0xa9,0xed,0x81,0x98,0x98,0xc7,0x36,0x0b,0x62,0x62, +0x9e,0xdb,0x32,0x88,0x89,0x79,0x72,0xd3,0x20,0x26,0xe6,0xd9,0x8d,0x83,0x98,0x98, +0xa7,0xb7,0x95,0x62,0x62,0x9e,0xbb,0x27,0x2d,0x20,0x63,0x6f,0x6e,0x64,0x2e,0x3a, +0x34,0x35,0x56,0x62,0x08,0x4e,0x53,0xd9,0xba,0xb5,0x14,0x02,0xf3,0xe0,0xf5,0x25, +0x2c,0x82,0xd3,0x0c,0xbe,0xe0,0x34,0xd3,0x8d,0x9b,0x88,0x21,0x38,0xcd,0x60,0xd7, +0x24,0x01,0x63,0xec,0xcd,0x8d,0xcc,0x45,0x87,0x44,0x80,0x8c,0xbd,0xb9,0x91,0xb9, +0xfc,0xc4,0xd0,0x90,0x02,0x8c,0xb1,0x37,0x37,0x32,0x97,0x1f,0x73,0x29,0x26,0xe6, +0xc1,0x71,0x7b,0x29,0x26,0xe6,0xc1,0x77,0xfb,0x28,0x04,0xe6,0xa9,0x6f,0x52,0x01, +0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x13,0x73,0x63,0x18,0x83,0xc0,0x3c,0xb6,0x41,0x08, +0x4e,0x33,0x58,0x47,0x31,0x31,0x4f,0x5d,0x1f,0xc3,0x22,0x38,0xcd,0xe0,0x0b,0x4e, +0x33,0xe1,0xbc,0xa5,0x18,0x82,0xd3,0x0c,0x77,0x6e,0x20,0xc5,0xc4,0x3c,0xb5,0x4e, +0x3a,0x40,0xc6,0xde,0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x2c,0xa4,0x98,0x98,0xa7,0xee, +0x6f,0x20,0x11,0x9c,0x66,0xf0,0x05,0xa7,0x99,0xec,0x82,0x10,0x9c,0xa6,0x32,0x93, +0x42,0x60,0x1e,0x7b,0xb7,0x98,0x62,0x62,0x9e,0xbc,0x36,0x16,0x43,0x70,0x9a,0x0a, +0xa7,0x6d,0xa4,0x98,0x98,0xc7,0xbe,0x8d,0xa4,0x98,0x98,0xc7,0xce,0x0d,0xc6,0x10, +0x9c,0x66,0xc0,0x7b,0x12,0x02,0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x33,0x13,0x73,0x06, +0x8b,0xe0,0x34,0x83,0x2f,0x38,0xcd,0x64,0xd3,0x07,0x50,0x10,0xcd,0x14,0x61,0xe6, +0x61,0x08,0x4e,0x53,0xd5,0x36,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x4a,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x07,0x00,0x00,0x00, +0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xb9,0x61,0x0c,0x04,0x10, +0x1e,0xe1,0x19,0xc6,0x40,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00,0x63,0x08,0xcd,0x63, +0x15,0xc1,0x31,0x84,0x06,0xb2,0x8b,0xe0,0x18,0x42,0x13,0x59,0x46,0x70,0x0c,0xa1, +0x71,0x6c,0x23,0x38,0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37,0x16,0x01,0x04,0x48, +0x35,0xc7,0x20,0x79,0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3,0x07,0x06,0xd0,0x58, +0x04,0x10,0x20,0xd5,0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01,0x04,0x48,0x35,0xc7, +0x30,0x06,0x64,0xe0,0x98,0x47,0xd0,0xc0,0x80,0xa0,0x89,0x01,0x41,0x23,0x03,0x82, +0x63,0x21,0x40,0x70,0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58,0x06,0xe1,0x40,0x00, +0x25,0x00,0x00,0x00,0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41,0x4c,0xcc,0x53,0xdb, +0x05,0x31,0x31,0xcf,0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10,0x13,0xf3,0xf4,0xd6, +0x41,0x08,0xc0,0xb2,0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46,0x21,0x38,0x4d,0xb5, +0x9b,0x8a,0x21,0x00,0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53,0xdd,0xb7,0x9d,0x18, +0x82,0xd3,0x54,0xb7,0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb,0x47,0x31,0x31,0x4f, +0x9d,0x9b,0x87,0x21,0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0,0xb2,0xd4,0xbc,0x59, +0x10,0x82,0xd3,0x54,0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85,0x14,0x13,0xf3,0xd8, +0xb4,0x8d,0x14,0x13,0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c,0xf6,0x6d,0x24,0x86, +0x00,0x2c,0x8b,0xcd,0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6,0x30,0x54,0xc0,0x72, +0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x19,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0x24,0x4a,0x60,0x04,0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00,0x63,0x08,0xcd,0x33, +0x16,0x01,0x04,0x48,0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10,0x28,0xd1,0x1c,0xc3, +0x44,0x39,0x58,0x85,0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,0x26,0x41,0x08,0xc0, +0xb2,0x18,0x45,0x21,0x00,0xcb,0xb2,0x5b,0x04,0x31,0x31,0x8f,0x6d,0x13,0xc4,0xc4, +0x3c,0xb9,0x35,0x0c,0x15,0xb0,0x58,0x05,0x31,0x31,0x4f,0x7f,0x00,0x05,0xd1,0x4c, +0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x1b,0x00,0x00,0x00, +0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0x60,0x04, +0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0xca, +0x34,0xc7,0x20,0x51,0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16,0x15,0xcd,0x31,0x5c, +0x94,0x83,0x58,0x38,0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x76,0x51,0x4c,0xcc, +0x53,0xdb,0x86,0x51,0x4c,0xcc,0x53,0xe7,0x36,0x41,0x4c,0xcc,0x63,0x5b,0x05,0x31, +0x31,0x8f,0x6e,0x16,0xc4,0xc4,0x3c,0xbd,0x51,0x10,0x02,0xb0,0x2c,0xd6,0x30,0x54, +0xc0,0x72,0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x2c,0x00,0x00,0x00,0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0x24,0xca,0xa0,0x04,0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00,0x63,0x08,0x0d,0x34, +0xdc,0x30,0x49,0xc4,0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33,0xdc,0x50,0x49,0xc4, +0x2c,0x03,0x21,0x58,0x63,0x08,0x4d,0x34,0xdc,0x70,0x49,0xc4,0x2c,0x03,0x31,0x60, +0x63,0x08,0x8d,0x33,0xdc,0x90,0x49,0x84,0x69,0x22,0x70,0xc3,0x27,0x1c,0x08,0x00, +0x17,0x00,0x00,0x00,0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41,0x08,0xcc,0x83,0xdb, +0x04,0x31,0x31,0x4f,0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10,0x02,0xf3,0xf0,0x76, +0x41,0x4c,0xcc,0xb3,0x1f,0x81,0x11,0x11,0x13,0x15,0x35,0x37,0x90,0x2c,0x4e,0xf4, +0x47,0x87,0x54,0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87,0x74,0x02,0xc8,0xe2,0x44, +0x7f,0x74,0x48,0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d,0x18,0x11,0x31,0x55,0xc0, +0x62,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x46,0x31,0x08,0xcc,0x03, +0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82, +0x23,0x19,0xc3,0xa0,0x20,0x8b,0x1d,0x18,0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3, +0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c, +0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84, +0x84,0x34,0x85,0x25,0x0c,0x92,0x20,0x59,0xc1,0x20,0x30,0x8f,0x2d,0x10,0x95,0x84, +0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp new file mode 100644 index 0000000000..3a2f2878a3 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -0,0 +1,245 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +#ifdef MESA_LLVM + +#include "gallivm.h" +#include "gallivm_p.h" + +#include "instructions.h" +#include "loweringpass.h" +#include "storage.h" +#include "tgsitollvm.h" + +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include <llvm/Module.h> +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/Instructions.h> +#include <llvm/ModuleProvider.h> +#include <llvm/Pass.h> +#include <llvm/PassManager.h> +#include <llvm/ParameterAttributes.h> +#include <llvm/Support/PatternMatch.h> +#include <llvm/ExecutionEngine/JIT.h> +#include <llvm/ExecutionEngine/Interpreter.h> +#include <llvm/ExecutionEngine/GenericValue.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/LinkAllPasses.h> +#include <llvm/Analysis/Verifier.h> +#include <llvm/Analysis/LoopPass.h> +#include <llvm/Target/TargetData.h> +#include <llvm/Bitcode/ReaderWriter.h> +#include <llvm/Transforms/Utils/Cloning.h> + +#include <sstream> +#include <fstream> +#include <iostream> + +struct gallivm_cpu_engine { + llvm::ExecutionEngine *engine; +}; + +static struct gallivm_cpu_engine *CPU = 0; + +typedef int (*fragment_shader_runner)(float x, float y, + float (*dests)[16][4], + float (*inputs)[16][4], + int num_attribs, + float (*consts)[4], int num_consts, + struct tgsi_sampler *samplers); + +int gallivm_cpu_fs_exec(struct gallivm_prog *prog, + float fx, float fy, + float (*dests)[16][4], + float (*inputs)[16][4], + float (*consts)[4], + struct tgsi_sampler *samplers) +{ + fragment_shader_runner runner = reinterpret_cast<fragment_shader_runner>(prog->function); + assert(runner); + + return runner(fx, fy, dests, inputs, prog->num_interp, + consts, prog->num_consts, + samplers); +} + +static inline llvm::Function *func_for_shader(struct gallivm_prog *prog) +{ + llvm::Module *mod = prog->module; + llvm::Function *func = 0; + + switch (prog->type) { + case GALLIVM_VS: + func = mod->getFunction("vs_shader"); + break; + case GALLIVM_FS: + func = mod->getFunction("fs_shader"); + break; + default: + assert(!"Unknown shader type!"); + break; + } + return func; +} + +/*! + This function creates a CPU based execution engine for the given gallivm_prog. + gallivm_cpu_engine should be used as a singleton throughout the library. Before + executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile. + The gallivm_prog instance which is being passed to the constructor is being + automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile + with it again. + */ +struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog) +{ + struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *) + calloc(1, sizeof(struct gallivm_cpu_engine)); + llvm::Module *mod = static_cast<llvm::Module*>(prog->module); + llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); + llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false); + ee->DisableLazyCompilation(); + cpu->engine = ee; + + llvm::Function *func = func_for_shader(prog); + + prog->function = ee->getPointerToFunction(func); + CPU = cpu; + return cpu; +} + + +/*! + This function JIT compiles the given gallivm_prog with the given cpu based execution engine. + The reference to the generated machine code entry point will be stored + in the gallivm_prog program. After executing this function one can call gallivm_prog_exec + in order to execute the gallivm_prog on the CPU. + */ +void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog) +{ + llvm::Module *mod = static_cast<llvm::Module*>(prog->module); + llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); + llvm::ExecutionEngine *ee = cpu->engine; + assert(ee); + /*FIXME : why was this disabled ? we need it for pow/sqrt/... */ + ee->DisableLazyCompilation(false); + ee->addModuleProvider(mp); + + llvm::Function *func = func_for_shader(prog); + prog->function = ee->getPointerToFunction(func); +} + +void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu) +{ + free(cpu); +} + +struct gallivm_cpu_engine * gallivm_global_cpu_engine() +{ + return CPU; +} + + +typedef void (*vertex_shader_runner)(void *ainputs, + void *dests, + float (*aconsts)[4], + void *temps); + +#define MAX_TGSI_VERTICES 4 +/*! + This function is used to execute the gallivm_prog in software. Before calling + this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile + function. + */ +int gallivm_cpu_vs_exec(struct gallivm_prog *prog, + struct tgsi_exec_machine *machine, + const float (*input)[4], + unsigned num_inputs, + float (*output)[4], + unsigned num_outputs, + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + unsigned int i, j; + unsigned slot; + vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function); + assert(runner); + + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + + /* Swizzle inputs. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < num_inputs; slot++) { + machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; + machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; + machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; + machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; + } + + input = (const float (*)[4])((const char *)input + input_stride); + } + + /* run shader */ + runner(machine->Inputs, + machine->Outputs, + (float (*)[4]) constants, + machine->Temps); + + /* Unswizzle all output results + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < num_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + output = (float (*)[4])((char *)output + output_stride); + } + } + + return 0; +} + +#endif diff --git a/src/gallium/auxiliary/gallivm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h new file mode 100644 index 0000000000..ebf3e11cd5 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm_p.h @@ -0,0 +1,110 @@ +#ifndef GALLIVM_P_H +#define GALLIVM_P_H + +#ifdef MESA_LLVM + +#include "gallivm.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_compiler.h" + +namespace llvm { + class Module; +} + +#if defined __cplusplus +extern "C" { +#endif + +enum gallivm_shader_type; +enum gallivm_vector_layout; + +struct gallivm_interpolate { + int attrib; + int chan; + int type; +}; + +struct gallivm_ir { + llvm::Module *module; + int id; + enum gallivm_shader_type type; + enum gallivm_vector_layout layout; + int num_components; + int num_consts; + + /* FIXME: this might not be enough for some shaders */ + struct gallivm_interpolate interpolators[32*4]; + int num_interp; +}; + +struct gallivm_prog { + llvm::Module *module; + void *function; + + int id; + enum gallivm_shader_type type; + + int num_consts; + + /* FIXME: this might not be enough for some shaders */ + struct gallivm_interpolate interpolators[32*4]; + int num_interp; +}; + +static INLINE void gallivm_swizzle_components(int swizzle, + int *xc, int *yc, + int *zc, int *wc) +{ + int x = swizzle / 1000; swizzle -= x * 1000; + int y = swizzle / 100; swizzle -= y * 100; + int z = swizzle / 10; swizzle -= z * 10; + int w = swizzle; + + if (xc) *xc = x; + if (yc) *yc = y; + if (zc) *zc = z; + if (wc) *wc = w; +} + +static INLINE boolean gallivm_is_swizzle(int swizzle) +{ + const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 + + TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W; + return swizzle != NO_SWIZZLE; +} + +static INLINE int gallivm_x_swizzle(int swizzle) +{ + int x; + gallivm_swizzle_components(swizzle, &x, 0, 0, 0); + return x; +} + +static INLINE int gallivm_y_swizzle(int swizzle) +{ + int y; + gallivm_swizzle_components(swizzle, 0, &y, 0, 0); + return y; +} + +static INLINE int gallivm_z_swizzle(int swizzle) +{ + int z; + gallivm_swizzle_components(swizzle, 0, 0, &z, 0); + return z; +} + +static INLINE int gallivm_w_swizzle(int swizzle) +{ + int w; + gallivm_swizzle_components(swizzle, 0, 0, 0, &w); + return w; +} + +#endif /* MESA_LLVM */ + +#if defined __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp new file mode 100644 index 0000000000..599975d5ad --- /dev/null +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -0,0 +1,1193 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +#ifdef MESA_LLVM + +#include "instructions.h" + +#include "storage.h" + +#include "util/u_memory.h" + +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/Function.h> +#include <llvm/InstrTypes.h> +#include <llvm/Instructions.h> +#include <llvm/ParameterAttributes.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/Bitcode/ReaderWriter.h> + +#include <sstream> +#include <fstream> +#include <iostream> + +using namespace llvm; + +#include "gallivm_builtins.cpp" + +#if 0 +llvm::Value *arrayFromChannels(std::vector<llvm::Value*> &vals) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + ArrayType *vectorArray = ArrayType::get(vectorType, 4); +} +#endif + +static inline std::string createFuncName(int label) +{ + std::ostringstream stream; + stream << "function"; + stream << label; + return stream.str(); +} + +Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, + Storage *storage) + : m_mod(mod), m_func(func), m_builder(block), m_idx(0), + m_storage(storage) +{ + m_floatVecType = VectorType::get(Type::FloatTy, 4); + + m_llvmFSqrt = 0; + m_llvmFAbs = 0; + m_llvmPow = 0; + m_llvmFloor = 0; + m_llvmFlog = 0; + m_llvmFexp = 0; + m_llvmLit = 0; + m_fmtPtr = 0; + + MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( + (const char*)&llvm_builtins_data[0], + (const char*)&llvm_builtins_data[Elements(llvm_builtins_data)-1]); + m_mod = ParseBitcodeFile(buffer); +} + +llvm::BasicBlock * Instructions::currentBlock() const +{ + return m_builder.GetInsertBlock(); +} + +llvm::Value * Instructions::abs(llvm::Value *in) +{ + std::vector<llvm::Value*> vec = extractVector(in); + Value *xabs = callFAbs(vec[0]); + Value *yabs = callFAbs(vec[1]); + Value *zabs = callFAbs(vec[2]); + Value *wabs = callFAbs(vec[3]); + return vectorFromVals(xabs, yabs, zabs, wabs); +} + +llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2) +{ + return m_builder.CreateAdd(in1, in2, name("add")); +} + +llvm::Value * Instructions::arl(llvm::Value *in) +{ + return floor(in); +} + +void Instructions::beginLoop() +{ + BasicBlock *begin = BasicBlock::Create(name("loop"), m_func,0); + BasicBlock *end = BasicBlock::Create(name("endloop"), m_func,0); + + m_builder.CreateBr(begin); + Loop loop; + loop.begin = begin; + loop.end = end; + m_builder.SetInsertPoint(begin); + m_loopStack.push(loop); +} + +void Instructions::bgnSub(unsigned label) +{ + llvm::Function *func = findFunction(label); + + Function::arg_iterator args = func->arg_begin(); + Value *ptr_INPUT = args++; + ptr_INPUT->setName("INPUT"); + m_storage->pushArguments(ptr_INPUT); + + llvm::BasicBlock *entry = BasicBlock::Create("entry", func, 0); + + m_func = func; + m_builder.SetInsertPoint(entry); +} + +void Instructions::brk() +{ + assert(!m_loopStack.empty()); + BasicBlock *unr = BasicBlock::Create(name("unreachable"), m_func,0); + m_builder.CreateBr(m_loopStack.top().end); + m_builder.SetInsertPoint(unr); +} + +void Instructions::cal(int label, llvm::Value *input) +{ + std::vector<Value*> params; + params.push_back(input); + llvm::Function *func = findFunction(label); + + m_builder.CreateCall(func, params.begin(), params.end()); +} + +llvm::Value * Instructions::ceil(llvm::Value *in) +{ + std::vector<llvm::Value*> vec = extractVector(in); + return vectorFromVals(callCeil(vec[0]), callCeil(vec[1]), + callCeil(vec[2]), callCeil(vec[3])); +} + +llvm::Value * Instructions::clamp(llvm::Value *in1) +{ + llvm::Value *zero = constVector(0.0f, 0.0f, 0.0f, 0.0f); + llvm::Value *one = constVector(1.0f, 1.0f, 1.0f, 1.0f); + return min( max(zero, in1), one); +} + +llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) +{ + llvm::Function *func = m_mod->getFunction("cmp"); + assert(func); + + std::vector<Value*> params; + params.push_back(in1); + params.push_back(in2); + params.push_back(in3); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), name("cmpres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) +{ + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + std::vector<llvm::Value*> vec3 = extractVector(in3); + Constant *half = ConstantFP::get(APFloat(0.5f)); + + Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], half, name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec2[0], vec3[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], half, name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec2[1], vec3[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], half, name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec2[2], vec3[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], half, name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec2[3], vec3[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); +} + +llvm::Value * Instructions::cnd0(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) +{ + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + std::vector<llvm::Value*> vec3 = extractVector(in3); + Constant *zero = Constant::getNullValue(Type::FloatTy); + + Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], zero, name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec2[0], vec3[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], zero, name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec2[1], vec3[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], zero, name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec2[2], vec3[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], zero, name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec2[3], vec3[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); +} + +llvm::Value * Instructions::cos(llvm::Value *in) +{ +#if 0 + llvm::Function *func = m_mod->getFunction("vcos"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("cosres")); + call->setTailCall(false); + return call; +#else + std::vector<llvm::Value*> elems = extractVector(in); + Function *func = m_mod->getFunction("cosf"); + assert(func); + CallInst *cos = m_builder.CreateCall(func, elems[0], name("cosres")); + cos->setCallingConv(CallingConv::C); + cos->setTailCall(true); + return vectorFromVals(cos, cos, cos, cos); +#endif +} + +llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *y1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(1), + name("y1")); + Value *z1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(2), + name("z1")); + + Value *x2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(0), + name("x2")); + Value *y2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(1), + name("y2")); + Value *z2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(2), + name("z2")); + Value *y1z2 = mul(y1, z2); + Value *z1y2 = mul(z1, y2); + + Value *z1x2 = mul(z1, x2); + Value *x1z2 = mul(x1, z2); + + Value *x1y2 = mul(x1, y2); + Value *y1x2 = mul(y1, x2); + + return vectorFromVals(sub(y1z2, z1y2), sub(z1x2, x1z2), sub(x1y2, y1x2)); +} + +llvm::Value * Instructions::ddx(llvm::Value *in) +{ + // FIXME + assert(0); +} + +llvm::Value * Instructions::ddy(llvm::Value *in) +{ + // FIXME + assert(0); +} + +llvm::Value * Instructions::div(llvm::Value *in1, llvm::Value *in2) +{ + return m_builder.CreateFDiv(in1, in2, name("div")); +} + +llvm::Value * Instructions::dot2add(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) +{ + Value *mulRes = mul(in1, in2); + Value *x = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(0), + name("extractx")); + Value *y = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(1), + name("extracty")); + Value *z = m_builder.CreateExtractElement(in3, + m_storage->constantInt(2), + name("extractz")); + Value *xy = m_builder.CreateAdd(x, y,name("xy")); + Value *dot2add = m_builder.CreateAdd(xy, z, name("dot2add")); + return vectorFromVals(dot2add, dot2add, dot2add, dot2add); +} + +llvm::Value * Instructions::dp2(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + Value *x = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(0), + name("extractx")); + Value *y = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(1), + name("extracty")); + Value *xy = m_builder.CreateAdd(x, y,name("xy")); + return vectorFromVals(xy, xy, xy, xy); +} + +llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + Value *x = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(0), + name("extractx")); + Value *y = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(1), + name("extracty")); + Value *z = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(2), + name("extractz")); + Value *xy = m_builder.CreateAdd(x, y,name("xy")); + Value *dot3 = m_builder.CreateAdd(xy, z, name("dot3")); + return vectorFromVals(dot3, dot3, dot3, dot3); +} + +llvm::Value * Instructions::dp4(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + std::vector<llvm::Value*> vec = extractVector(mulRes); + Value *xy = m_builder.CreateAdd(vec[0], vec[1], name("xy")); + Value *xyz = m_builder.CreateAdd(xy, vec[2], name("xyz")); + Value *dot4 = m_builder.CreateAdd(xyz, vec[3], name("dot4")); + return vectorFromVals(dot4, dot4, dot4, dot4); +} + +llvm::Value * Instructions::dph(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + std::vector<llvm::Value*> vec1 = extractVector(mulRes); + Value *xy = m_builder.CreateAdd(vec1[0], vec1[1], name("xy")); + Value *xyz = m_builder.CreateAdd(xy, vec1[2], name("xyz")); + Value *dph = m_builder.CreateAdd(xyz, vec1[3], name("dph")); + return vectorFromVals(dph, dph, dph, dph); +} + +llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2) +{ + Value *y1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(1), + name("y1")); + Value *z = m_builder.CreateExtractElement(in1, + m_storage->constantInt(2), + name("z")); + Value *y2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(1), + name("y2")); + Value *w = m_builder.CreateExtractElement(in2, + m_storage->constantInt(3), + name("w")); + Value *ry = m_builder.CreateMul(y1, y2, name("tyuy")); + return vectorFromVals(ConstantFP::get(APFloat(1.f)), + ry, z, w); +} + +void Instructions::elseop() +{ + assert(!m_ifStack.empty()); + BasicBlock *ifend = BasicBlock::Create(name("ifend"), m_func,0); + m_builder.CreateBr(ifend); + m_builder.SetInsertPoint(m_ifStack.top()); + currentBlock()->setName(name("ifelse")); + m_ifStack.pop(); + m_ifStack.push(ifend); +} + +void Instructions::endif() +{ + assert(!m_ifStack.empty()); + m_builder.CreateBr(m_ifStack.top()); + m_builder.SetInsertPoint(m_ifStack.top()); + m_ifStack.pop(); +} + +void Instructions::endLoop() +{ + assert(!m_loopStack.empty()); + Loop loop = m_loopStack.top(); + m_builder.CreateBr(loop.begin); + loop.end->moveAfter(currentBlock()); + m_builder.SetInsertPoint(loop.end); + m_loopStack.pop(); +} + +void Instructions::end() +{ + m_builder.CreateRetVoid(); +} + +void Instructions::endSub() +{ + m_func = 0; + m_builder.SetInsertPoint(0); +} + +llvm::Value * Instructions::exp(llvm::Value *in) +{ + std::vector<llvm::Value*> vec = extractVector(in); + return vectorFromVals(callFExp(vec[0]), callFExp(vec[1]), + callFExp(vec[2]), callFExp(vec[3])); +} + +llvm::Value * Instructions::ex2(llvm::Value *in) +{ + llvm::Value *val = callPow(ConstantFP::get(APFloat(2.f)), + m_builder.CreateExtractElement( + in, m_storage->constantInt(0), + name("x1"))); + return vectorFromVals(val, val, val, val); +} + +llvm::Value * Instructions::floor(llvm::Value *in) +{ + std::vector<llvm::Value*> vec = extractVector(in); + return vectorFromVals(callFloor(vec[0]), callFloor(vec[1]), + callFloor(vec[2]), callFloor(vec[3])); +} + +llvm::Value * Instructions::frc(llvm::Value *in) +{ + llvm::Value *flr = floor(in); + return sub(in, flr); +} + +void Instructions::ifop(llvm::Value *in) +{ + BasicBlock *ifthen = BasicBlock::Create(name("ifthen"), m_func,0); + BasicBlock *ifend = BasicBlock::Create(name("ifthenend"), m_func,0); + + //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0); + //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0); + //BasicBlock *wblock = new BasicBlock(name("wblock"), m_func,0); + + Constant *float0 = Constant::getNullValue(Type::FloatTy); + + Value *x = m_builder.CreateExtractElement(in, m_storage->constantInt(0), + name("extractx")); + Value *xcmp = m_builder.CreateFCmpUNE(x, float0, name("xcmp")); + m_builder.CreateCondBr(xcmp, ifthen, ifend); + //m_builder.SetInsertPoint(yblock); + + m_builder.SetInsertPoint(ifthen); + m_ifStack.push(ifend); +} + +llvm::Value * Instructions::kil(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("kil"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("kilpres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3) +{ + llvm::Value *m = mul(in1, in2); + llvm::Value *vec1 = constVector(1.f, 1.f, 1.f, 1.f); + llvm::Value *s = sub(vec1, in1); + return add(m, mul(s, in3)); +} + +llvm::Value * Instructions::lg2(llvm::Value *in) +{ + std::vector<llvm::Value*> vec = extractVector(in); + llvm::Value *const_vec = constVector(1.442695f, 1.442695f, + 1.442695f, 1.442695f); + return mul(vectorFromVals(callFLog(vec[0]), callFLog(vec[1]), + callFLog(vec[2]), callFLog(vec[3])), const_vec); +} + +llvm::Value * Instructions::lit(llvm::Value *in) +{ + if (!m_llvmLit) { + m_llvmLit = m_mod->getFunction("lit"); + } + CallInst *call = m_builder.CreateCall(m_llvmLit, in, name("litres")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::log(llvm::Value *in) +{ + std::vector<llvm::Value*> vec = extractVector(in); + return vectorFromVals(callFLog(vec[0]), callFLog(vec[1]), + callFLog(vec[2]), callFLog(vec[3])); +} + +llvm::Value * Instructions::madd(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3) +{ + Value *mulRes = mul(in1, in2); + return add(mulRes, in3); +} + +llvm::Value * Instructions::max(llvm::Value *in1, llvm::Value *in2) +{ + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], + name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], + name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], + name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], + name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); +} + +llvm::Value * Instructions::min(llvm::Value *in1, llvm::Value *in2) +{ + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); +} + +llvm::Value * Instructions::mul(llvm::Value *in1, llvm::Value *in2) +{ + return m_builder.CreateMul(in1, in2, name("mul")); +} + +llvm::Value * Instructions::neg(llvm::Value *in) +{ + Value *neg = m_builder.CreateNeg(in, name("neg")); + return neg; +} + +llvm::Value * Instructions::nrm(llvm::Value *in) +{ + llvm::Value *v = rsq(in); + return mul(v, in); +} + +llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *x2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(0), + name("x2")); + llvm::Value *val = callPow(x1, x2); + return vectorFromVals(val, val, val, val); +} + +llvm::Value * Instructions::rcp(llvm::Value *in1) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *res = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), + x1, name("rcp")); + return vectorFromVals(res, res, res, res); +} + +llvm::Value * Instructions::rsq(llvm::Value *in1) +{ + Value *x = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("extractx")); + Value *abs = callFAbs(x); + Value *sqrt = callFSqrt(abs); + + Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), + sqrt, + name("rsqrt")); + return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt); +} + +llvm::Value * Instructions::scs(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("scs"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("scsres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::seq(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOEQ(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOEQ(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOEQ(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOEQ(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + +llvm::Value * Instructions::sfl(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + return vectorFromVals(const0f, const0f, const0f, const0f); +} + +llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + +llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + +llvm::Value * Instructions::sin(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("vsin"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("sinres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::sle(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOLE(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOLE(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOLE(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOLE(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + +llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + +llvm::Value * Instructions::sne(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpONE(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpONE(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpONE(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpONE(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + +llvm::Value * Instructions::str(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + + return vectorFromVals(const1f, const1f, const1f, const1f); +} + +llvm::Value * Instructions::sub(llvm::Value *in1, llvm::Value *in2) +{ + Value *res = m_builder.CreateSub(in1, in2, name("sub")); + return res; +} + +llvm::Value * Instructions::trunc(llvm::Value *in) +{ + std::vector<llvm::Value*> vec = extractVector(in); + Value *icastx = m_builder.CreateFPToSI(vec[0], IntegerType::get(32), + name("ftoix")); + Value *icasty = m_builder.CreateFPToSI(vec[1], IntegerType::get(32), + name("ftoiy")); + Value *icastz = m_builder.CreateFPToSI(vec[2], IntegerType::get(32), + name("ftoiz")); + Value *icastw = m_builder.CreateFPToSI(vec[3], IntegerType::get(32), + name("ftoiw")); + Value *fx = m_builder.CreateSIToFP(icastx, Type::FloatTy, + name("fx")); + Value *fy = m_builder.CreateSIToFP(icasty, Type::FloatTy, + name("fy")); + Value *fz = m_builder.CreateSIToFP(icastz, Type::FloatTy, + name("fz")); + Value *fw = m_builder.CreateSIToFP(icastw, Type::FloatTy, + name("fw")); + return vectorFromVals(fx, fy, fz, fw); +} + +llvm::Value * Instructions::x2d(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) +{ + std::vector<llvm::Value*> vec1 = extractVector(in1); + std::vector<llvm::Value*> vec2 = extractVector(in2); + std::vector<llvm::Value*> vec3 = extractVector(in3); + + Value *x2x3 = m_builder.CreateMul( vec2[0], vec3[0], name("x2x3")); + Value *y2y3 = m_builder.CreateMul( vec2[1], vec3[1], name("y2y3")); + Value *x1px2x3 = m_builder.CreateAdd (vec1[0], x2x3, name("x1 + x2x3")); + Value *x1px2x3py2y3 = m_builder.CreateAdd (x1px2x3, y2y3, name("x1 + x2x3 + y2y3")); + + Value *x2z3 = m_builder.CreateMul( vec2[0], vec3[2], name("x2z3")); + Value *y2w3 = m_builder.CreateMul( vec2[1], vec3[3], name("y2w3")); + Value *y1px2z3 = m_builder.CreateAdd (vec1[1], x2z3, name("y1 + x2z3")); + Value *y1px2z3py2w3 = m_builder.CreateAdd (y1px2z3, y2w3, name("y1 + x2z3 + y2w3")); + + return vectorFromVals(x1px2x3py2y3, y1px2z3py2w3, x1px2x3py2y3, y1px2z3py2w3); +} + +void Instructions::printVector(llvm::Value *val) +{ + static const char *frmt = "Vector is [%f, %f, %f, %f]\x0A"; + + if (!m_fmtPtr) { + Constant *format = ConstantArray::get(frmt, true); + ArrayType *arrayTy = ArrayType::get(IntegerType::get(8), strlen(frmt) + 1); + GlobalVariable* globalFormat = new GlobalVariable( + /*Type=*/arrayTy, + /*isConstant=*/true, + /*Linkage=*/GlobalValue::InternalLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/name(".str"), + m_mod); + globalFormat->setInitializer(format); + + Constant* const_int0 = Constant::getNullValue(IntegerType::get(32)); + std::vector<Constant*> const_ptr_21_indices; + const_ptr_21_indices.push_back(const_int0); + const_ptr_21_indices.push_back(const_int0); + m_fmtPtr = ConstantExpr::getGetElementPtr(globalFormat, + &const_ptr_21_indices[0], const_ptr_21_indices.size()); + } + + Function *func_printf = m_mod->getFunction("printf"); + if (!func_printf) + func_printf = declarePrintf(); + assert(func_printf); + std::vector<llvm::Value*> vec = extractVector(val); + Value *dx = m_builder.CreateFPExt(vec[0], Type::DoubleTy, name("dx")); + Value *dy = m_builder.CreateFPExt(vec[1], Type::DoubleTy, name("dy")); + Value *dz = m_builder.CreateFPExt(vec[2], Type::DoubleTy, name("dz")); + Value *dw = m_builder.CreateFPExt(vec[3], Type::DoubleTy, name("dw")); + std::vector<Value*> params; + params.push_back(m_fmtPtr); + params.push_back(dx); + params.push_back(dy); + params.push_back(dz); + params.push_back(dw); + CallInst *call = m_builder.CreateCall(func_printf, params.begin(), params.end(), + name("printf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(true); +} + +const char * Instructions::name(const char *prefix) +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +llvm::Value * Instructions::callCeil(llvm::Value *val) +{ + if (!m_llvmCeil) { + // predeclare the intrinsic + std::vector<const Type*> ceilArgs; + ceilArgs.push_back(Type::FloatTy); + AttrListPtr ceilPal; + FunctionType* ceilType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/ceilArgs, + /*isVarArg=*/false); + m_llvmCeil = Function::Create( + /*Type=*/ceilType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"ceilf", m_mod); + m_llvmCeil->setCallingConv(CallingConv::C); + m_llvmCeil->setAttributes(ceilPal); + } + CallInst *call = m_builder.CreateCall(m_llvmCeil, val, + name("ceilf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value *Instructions::callFAbs(llvm::Value *val) +{ + if (!m_llvmFAbs) { + // predeclare the intrinsic + std::vector<const Type*> fabsArgs; + fabsArgs.push_back(Type::FloatTy); + AttrListPtr fabsPal; + FunctionType* fabsType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fabsArgs, + /*isVarArg=*/false); + m_llvmFAbs = Function::Create( + /*Type=*/fabsType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"fabs", m_mod); + m_llvmFAbs->setCallingConv(CallingConv::C); + m_llvmFAbs->setAttributes(fabsPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFAbs, val, + name("fabs")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::callFExp(llvm::Value *val) +{ + if (!m_llvmFexp) { + // predeclare the intrinsic + std::vector<const Type*> fexpArgs; + fexpArgs.push_back(Type::FloatTy); + AttrListPtr fexpPal; + FunctionType* fexpType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fexpArgs, + /*isVarArg=*/false); + m_llvmFexp = Function::Create( + /*Type=*/fexpType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"expf", m_mod); + m_llvmFexp->setCallingConv(CallingConv::C); + m_llvmFexp->setAttributes(fexpPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFexp, val, + name("expf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::callFLog(llvm::Value *val) +{ + if (!m_llvmFlog) { + // predeclare the intrinsic + std::vector<const Type*> flogArgs; + flogArgs.push_back(Type::FloatTy); + AttrListPtr flogPal; + FunctionType* flogType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/flogArgs, + /*isVarArg=*/false); + m_llvmFlog = Function::Create( + /*Type=*/flogType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"logf", m_mod); + m_llvmFlog->setCallingConv(CallingConv::C); + m_llvmFlog->setAttributes(flogPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFlog, val, + name("logf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::callFloor(llvm::Value *val) +{ + if (!m_llvmFloor) { + // predeclare the intrinsic + std::vector<const Type*> floorArgs; + floorArgs.push_back(Type::FloatTy); + AttrListPtr floorPal; + FunctionType* floorType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/floorArgs, + /*isVarArg=*/false); + m_llvmFloor = Function::Create( + /*Type=*/floorType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"floorf", m_mod); + m_llvmFloor->setCallingConv(CallingConv::C); + m_llvmFloor->setAttributes(floorPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFloor, val, + name("floorf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value *Instructions::callFSqrt(llvm::Value *val) +{ + if (!m_llvmFSqrt) { + // predeclare the intrinsic + std::vector<const Type*> fsqrtArgs; + fsqrtArgs.push_back(Type::FloatTy); + AttrListPtr fsqrtPal; + FunctionType* fsqrtType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fsqrtArgs, + /*isVarArg=*/false); + m_llvmFSqrt = Function::Create( + /*Type=*/fsqrtType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"llvm.sqrt.f32", m_mod); + m_llvmFSqrt->setCallingConv(CallingConv::C); + m_llvmFSqrt->setAttributes(fsqrtPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val, + name("sqrt")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) +{ + if (!m_llvmPow) { + // predeclare the intrinsic + std::vector<const Type*> powArgs; + powArgs.push_back(Type::FloatTy); + powArgs.push_back(Type::FloatTy); + AttrListPtr powPal; + FunctionType* powType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/powArgs, + /*isVarArg=*/false); + m_llvmPow = Function::Create( + /*Type=*/powType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"llvm.pow.f32", m_mod); + m_llvmPow->setCallingConv(CallingConv::C); + m_llvmPow->setAttributes(powPal); + } + std::vector<Value*> params; + params.push_back(val1); + params.push_back(val2); + CallInst *call = m_builder.CreateCall(m_llvmPow, params.begin(), params.end(), + name("pow")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w) +{ + Constant *const_vec = Constant::getNullValue(m_floatVecType); + Value *res = m_builder.CreateInsertElement(const_vec, x, + m_storage->constantInt(0), + name("vecx")); + res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), + name("vecxy")); + res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), + name("vecxyz")); + if (w) + res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), + name("vecxyzw")); + return res; +} + +llvm::Value * Instructions::constVector(float x, float y, float z, float w) +{ + std::vector<Constant*> vec(4); + vec[0] = ConstantFP::get(APFloat(x)); + vec[1] = ConstantFP::get(APFloat(y)); + vec[2] = ConstantFP::get(APFloat(z)); + vec[3] = ConstantFP::get(APFloat(w)); + return ConstantVector::get(m_floatVecType, vec); +} + +llvm::Function * Instructions::declarePrintf() +{ + std::vector<const Type*> args; + AttrListPtr params; + FunctionType* funcTy = FunctionType::get( + /*Result=*/IntegerType::get(32), + /*Params=*/args, + /*isVarArg=*/true); + Function* func_printf = Function::Create( + /*Type=*/funcTy, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"printf", m_mod); + func_printf->setCallingConv(CallingConv::C); + func_printf->setAttributes(params); + return func_printf; +} + +llvm::Function * Instructions::declareFunc(int label) +{ + PointerType *vecPtr = PointerType::getUnqual(m_floatVecType); + std::vector<const Type*> args; + args.push_back(vecPtr); + args.push_back(vecPtr); + args.push_back(vecPtr); + args.push_back(vecPtr); + AttrListPtr params; + FunctionType *funcType = FunctionType::get( + /*Result=*/Type::VoidTy, + /*Params=*/args, + /*isVarArg=*/false); + std::string name = createFuncName(label); + Function *func = Function::Create( + /*Type=*/funcType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/name.c_str(), m_mod); + func->setCallingConv(CallingConv::C); + func->setAttributes(params); + return func; +} + +llvm::Function * Instructions::findFunction(int label) +{ + llvm::Function *func = m_functions[label]; + if (!func) { + func = declareFunc(label); + m_functions[label] = func; + } + return func; +} + +std::vector<llvm::Value*> Instructions::extractVector(llvm::Value *vec) +{ + std::vector<llvm::Value*> elems(4); + elems[0] = m_builder.CreateExtractElement(vec, m_storage->constantInt(0), + name("x")); + elems[1] = m_builder.CreateExtractElement(vec, m_storage->constantInt(1), + name("y")); + elems[2] = m_builder.CreateExtractElement(vec, m_storage->constantInt(2), + name("z")); + elems[3] = m_builder.CreateExtractElement(vec, m_storage->constantInt(3), + name("w")); + return elems; +} + + +#endif //MESA_LLVM + + diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h new file mode 100644 index 0000000000..e18571251e --- /dev/null +++ b/src/gallium/auxiliary/gallivm/instructions.h @@ -0,0 +1,175 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ + +#ifndef INSTRUCTIONS_H +#define INSTRUCTIONS_H + +#include <llvm/BasicBlock.h> +#include <llvm/Module.h> +#include <llvm/Value.h> +#include <llvm/Support/IRBuilder.h> + +#include <map> +#include <stack> + +namespace llvm { + class VectorType; + class Function; +} + +class Storage; + +class Instructions +{ +public: + Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, + Storage *storage); + + llvm::BasicBlock *currentBlock() const; + + llvm::Value *abs(llvm::Value *in1); + llvm::Value *add(llvm::Value *in1, llvm::Value *in2); + llvm::Value *arl(llvm::Value *in1); + void beginLoop(); + void bgnSub(unsigned); + void brk(); + void cal(int label, llvm::Value *input); + llvm::Value *ceil(llvm::Value *in); + llvm::Value *clamp(llvm::Value *in); + llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); + llvm::Value *cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); + llvm::Value *cnd0(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); + llvm::Value *cos(llvm::Value *in); + llvm::Value *cross(llvm::Value *in1, llvm::Value *in2); + llvm::Value *ddx(llvm::Value *in); + llvm::Value *ddy(llvm::Value *in); + llvm::Value *div(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dot2add(llvm::Value *in, llvm::Value *in2, llvm::Value *in3); + llvm::Value *dp2(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dp3(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dp4(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dph(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dst(llvm::Value *in1, llvm::Value *in2); + void elseop(); + void endif(); + void endLoop(); + void end(); + void endSub(); + llvm::Value *exp(llvm::Value *in); + llvm::Value *ex2(llvm::Value *in); + llvm::Value *floor(llvm::Value *in); + llvm::Value *frc(llvm::Value *in); + void ifop(llvm::Value *in); + llvm::Value *kil(llvm::Value *in); + llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3); + llvm::Value *lg2(llvm::Value *in); + llvm::Value *lit(llvm::Value *in); + llvm::Value *log(llvm::Value *in); + llvm::Value *madd(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3); + llvm::Value *max(llvm::Value *in1, llvm::Value *in2); + llvm::Value *min(llvm::Value *in1, llvm::Value *in2); + llvm::Value *mul(llvm::Value *in1, llvm::Value *in2); + llvm::Value *neg(llvm::Value *in); + llvm::Value *nrm(llvm::Value *in); + llvm::Value *pow(llvm::Value *in1, llvm::Value *in2); + llvm::Value *rcp(llvm::Value *in); + llvm::Value *rsq(llvm::Value *in); + llvm::Value *scs(llvm::Value *in); + llvm::Value *seq(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sfl(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sge(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sgt(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sin(llvm::Value *in); + llvm::Value *sle(llvm::Value *in1, llvm::Value *in2); + llvm::Value *slt(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sne(llvm::Value *in1, llvm::Value *in2); + llvm::Value *str(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sub(llvm::Value *in1, llvm::Value *in2); + llvm::Value *trunc(llvm::Value *in); + llvm::Value *x2d(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); + + void printVector(llvm::Value *val); +private: + const char *name(const char *prefix); + + llvm::Value *callCeil(llvm::Value *val); + llvm::Value *callFAbs(llvm::Value *val); + llvm::Value *callFExp(llvm::Value *val); + llvm::Value *callFLog(llvm::Value *val); + llvm::Value *callFloor(llvm::Value *val); + llvm::Value *callFSqrt(llvm::Value *val); + llvm::Value *callPow(llvm::Value *val1, llvm::Value *val2); + + llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w=0); + + llvm::Value *constVector(float x, float y, float z, float w); + + llvm::Function *declarePrintf(); + llvm::Function *declareFunc(int label); + + llvm::Function *findFunction(int label); + + std::vector<llvm::Value*> extractVector(llvm::Value *vec); +private: + llvm::Module *m_mod; + llvm::Function *m_func; + char m_name[32]; + llvm::IRBuilder<> m_builder; + int m_idx; + + llvm::VectorType *m_floatVecType; + + llvm::Function *m_llvmCeil; + llvm::Function *m_llvmFSqrt; + llvm::Function *m_llvmFAbs; + llvm::Function *m_llvmPow; + llvm::Function *m_llvmFloor; + llvm::Function *m_llvmFlog; + llvm::Function *m_llvmFexp; + llvm::Function *m_llvmLit; + + llvm::Constant *m_fmtPtr; + + std::stack<llvm::BasicBlock*> m_ifStack; + struct Loop { + llvm::BasicBlock *begin; + llvm::BasicBlock *end; + }; + std::stack<Loop> m_loopStack; + std::map<int, llvm::Function*> m_functions; + Storage *m_storage; +}; + +#endif diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp new file mode 100644 index 0000000000..d5600fd22d --- /dev/null +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -0,0 +1,522 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#include "instructionssoa.h" + +#include "storagesoa.h" + +#include "pipe/p_shader_tokens.h" +#include "util/u_memory.h" + +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/Module.h> +#include <llvm/Function.h> +#include <llvm/Instructions.h> +#include <llvm/Transforms/Utils/Cloning.h> +#include <llvm/ParameterAttributes.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/Bitcode/ReaderWriter.h> + + +#include <iostream> + + +/* disable some warnings. this file is autogenerated */ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif +using namespace llvm; +#include "gallivmsoabuiltins.cpp" +#if defined(__GNUC__) +#pragma GCC diagnostic warning "-Wunused-variable" +#endif + +InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func, + llvm::BasicBlock *block, StorageSoa *storage) + : m_builder(block), + m_storage(storage), + m_idx(0) +{ + createFunctionMap(); + createBuiltins(); +} + +const char * InstructionsSoa::name(const char *prefix) const +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + Constant *constVector = Constant::getNullValue(vectorType); + Value *res = m_builder.CreateInsertElement(constVector, x, + m_storage->constantInt(0), + name("vecx")); + res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), + name("vecxy")); + res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), + name("vecxyz")); + if (w) + res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), + name("vecxyzw")); + return res; +} + +void InstructionsSoa::end() +{ + m_builder.CreateRetVoid(); +} + +std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector) +{ + std::vector<llvm::Value*> res(4); + res[0] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(0), + name("extract1X")); + res[1] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(1), + name("extract2X")); + res[2] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(2), + name("extract3X")); + res[3] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(3), + name("extract4X")); + + return res; +} + +llvm::IRBuilder<>* InstructionsSoa::getIRBuilder() +{ + return &m_builder; +} + +void InstructionsSoa::createFunctionMap() +{ + m_functionsMap[TGSI_OPCODE_ABS] = "abs"; + m_functionsMap[TGSI_OPCODE_DP3] = "dp3"; + m_functionsMap[TGSI_OPCODE_DP4] = "dp4"; + m_functionsMap[TGSI_OPCODE_MIN] = "min"; + m_functionsMap[TGSI_OPCODE_MAX] = "max"; + m_functionsMap[TGSI_OPCODE_POWER] = "pow"; + m_functionsMap[TGSI_OPCODE_LIT] = "lit"; + m_functionsMap[TGSI_OPCODE_RSQ] = "rsq"; + m_functionsMap[TGSI_OPCODE_SLT] = "slt"; +} + +void InstructionsSoa::createDependencies() +{ + { + std::vector<std::string> powDeps(2); + powDeps[0] = "powf"; + powDeps[1] = "powvec"; + m_builtinDependencies["pow"] = powDeps; + } + { + std::vector<std::string> absDeps(2); + absDeps[0] = "fabsf"; + absDeps[1] = "absvec"; + m_builtinDependencies["abs"] = absDeps; + } + { + std::vector<std::string> maxDeps(1); + maxDeps[0] = "maxvec"; + m_builtinDependencies["max"] = maxDeps; + } + { + std::vector<std::string> minDeps(1); + minDeps[0] = "minvec"; + m_builtinDependencies["min"] = minDeps; + } + { + std::vector<std::string> litDeps(4); + litDeps[0] = "minvec"; + litDeps[1] = "maxvec"; + litDeps[2] = "powf"; + litDeps[3] = "powvec"; + m_builtinDependencies["lit"] = litDeps; + } + { + std::vector<std::string> rsqDeps(4); + rsqDeps[0] = "sqrtf"; + rsqDeps[1] = "sqrtvec"; + rsqDeps[2] = "fabsf"; + rsqDeps[3] = "absvec"; + m_builtinDependencies["rsq"] = rsqDeps; + } +} + +llvm::Function * InstructionsSoa::function(int op) +{ + if (m_functions.find(op) != m_functions.end()) + return m_functions[op]; + + std::string name = m_functionsMap[op]; + + std::cout <<"For op = "<<op<<", func is '"<<name<<"'"<<std::endl; + + std::vector<std::string> deps = m_builtinDependencies[name]; + for (unsigned int i = 0; i < deps.size(); ++i) { + llvm::Function *func = m_builtins->getFunction(deps[i]); + std::cout <<"\tinjecting dep = '"<<func->getName()<<"'"<<std::endl; + injectFunction(func); + } + + llvm::Function *originalFunc = m_builtins->getFunction(name); + injectFunction(originalFunc, op); + return m_functions[op]; +} + +llvm::Module * InstructionsSoa::currentModule() const +{ + BasicBlock *block = m_builder.GetInsertBlock(); + if (!block || !block->getParent()) + return 0; + + return block->getParent()->getParent(); +} + +void InstructionsSoa::createBuiltins() +{ + MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( + (const char*)&soabuiltins_data[0], + (const char*)&soabuiltins_data[Elements(soabuiltins_data)]); + m_builtins = ParseBitcodeFile(buffer); + std::cout<<"Builtins created at "<<m_builtins<<std::endl; + assert(m_builtins); + createDependencies(); +} + + +std::vector<llvm::Value*> InstructionsSoa::abs(const std::vector<llvm::Value*> in1) +{ + llvm::Function *func = function(TGSI_OPCODE_ABS); + return callBuiltin(func, in1); +} + +std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx")); + res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy")); + res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz")); + res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw")); + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::arl(const std::vector<llvm::Value*> in) +{ + std::vector<llvm::Value*> res(4); + + //Extract x's + llvm::Value *x1 = m_builder.CreateExtractElement(in[0], + m_storage->constantInt(0), + name("extractX")); + //cast it to an unsigned int + x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast")); + + res[0] = x1;//vectorFromVals(x1, x2, x3, x4); + //only x is valid. the others shouldn't be necessary + /* + res[1] = Constant::getNullValue(m_floatVecType); + res[2] = Constant::getNullValue(m_floatVecType); + res[3] = Constant::getNullValue(m_floatVecType); + */ + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_DP3); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> in) +{ + llvm::Function *func = function(TGSI_OPCODE_LIT); + return callBuiltin(func, in); +} + +std::vector<llvm::Value*> InstructionsSoa::madd(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2, + const std::vector<llvm::Value*> in3) +{ + std::vector<llvm::Value*> res = mul(in1, in2); + return add(res, in3); +} + +std::vector<llvm::Value*> InstructionsSoa::max(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_MAX); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::min(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_MIN); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx")); + res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly")); + res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz")); + res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw")); + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_POWER); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in) +{ + llvm::Function *func = function(TGSI_OPCODE_RSQ); + return callBuiltin(func, in); +} + +std::vector<llvm::Value*> InstructionsSoa::slt(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_SLT); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::sub(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx")); + res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby")); + res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz")); + res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw")); + + return res; +} + +void checkFunction(Function *func) +{ + for (Function::const_iterator BI = func->begin(), BE = func->end(); + BI != BE; ++BI) { + const BasicBlock &BB = *BI; + for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end(); + II != IE; ++II) { + const Instruction &I = *II; + std::cout<< "Instr = "<<I; + for (unsigned op = 0, E = I.getNumOperands(); op != E; ++op) { + const Value *Op = I.getOperand(op); + std::cout<< "\top = "<<Op<<"("<<op<<")"<<std::endl; + //I->setOperand(op, V); + } + } + } +} + +llvm::Value * InstructionsSoa::allocaTemp() +{ + VectorType *vector = VectorType::get(Type::FloatTy, 4); + ArrayType *vecArray = ArrayType::get(vector, 4); + AllocaInst *alloca = new AllocaInst(vecArray, name("tmpRes"), + m_builder.GetInsertBlock()); + + std::vector<Value*> indices; + indices.push_back(m_storage->constantInt(0)); + indices.push_back(m_storage->constantInt(0)); + GetElementPtrInst *getElem = GetElementPtrInst::Create(alloca, + indices.begin(), + indices.end(), + name("allocaPtr"), + m_builder.GetInsertBlock()); + return getElem; +} + +std::vector<llvm::Value*> InstructionsSoa::allocaToResult(llvm::Value *allocaPtr) +{ + GetElementPtrInst *xElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(0), + name("xPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *yElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(1), + name("yPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *zElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(2), + name("zPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *wElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(3), + name("wPtr"), + m_builder.GetInsertBlock()); + + std::vector<llvm::Value*> res(4); + res[0] = new LoadInst(xElemPtr, name("xRes"), false, m_builder.GetInsertBlock()); + res[1] = new LoadInst(yElemPtr, name("yRes"), false, m_builder.GetInsertBlock()); + res[2] = new LoadInst(zElemPtr, name("zRes"), false, m_builder.GetInsertBlock()); + res[3] = new LoadInst(wElemPtr, name("wRes"), false, m_builder.GetInsertBlock()); + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::dp4(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_DP4); + return callBuiltin(func, in1, in2); +} + +std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1) +{ + std::vector<Value*> params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); +} + +std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<Value*> params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + params.push_back(in2[0]); + params.push_back(in2[1]); + params.push_back(in2[2]); + params.push_back(in2[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); +} + +std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2, + const std::vector<llvm::Value*> in3) +{ + std::vector<Value*> params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + params.push_back(in2[0]); + params.push_back(in2[1]); + params.push_back(in2[2]); + params.push_back(in2[3]); + params.push_back(in3[0]); + params.push_back(in3[1]); + params.push_back(in3[2]); + params.push_back(in3[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); +} + +void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) +{ + assert(originalFunc); + std::cout << "injecting function originalFunc " <<originalFunc->getName() <<std::endl; + if (op != TGSI_OPCODE_LAST) { + /* in this case it's possible the function has been already + * injected as part of the dependency chain, which gets + * injected below */ + llvm::Function *func = currentModule()->getFunction(originalFunc->getName()); + if (func) { + m_functions[op] = func; + return; + } + } + llvm::Function *func = 0; + if (originalFunc->isDeclaration()) { + func = Function::Create(originalFunc->getFunctionType(), GlobalValue::ExternalLinkage, + originalFunc->getName(), currentModule()); + func->setCallingConv(CallingConv::C); + const AttrListPtr pal; + func->setAttributes(pal); + currentModule()->dump(); + } else { + DenseMap<const Value*, Value *> val; + val[m_builtins->getFunction("fabsf")] = currentModule()->getFunction("fabsf"); + val[m_builtins->getFunction("powf")] = currentModule()->getFunction("powf"); + val[m_builtins->getFunction("sqrtf")] = currentModule()->getFunction("sqrtf"); + func = CloneFunction(originalFunc, val); +#if 0 + std::cout <<" replacing "<<m_builtins->getFunction("powf") + <<", with " <<currentModule()->getFunction("powf")<<std::endl; + std::cout<<"1111-------------------------------"<<std::endl; + checkFunction(originalFunc); + std::cout<<"2222-------------------------------"<<std::endl; + checkFunction(func); + std::cout <<"XXXX = " <<val[m_builtins->getFunction("powf")]<<std::endl; +#endif + currentModule()->getFunctionList().push_back(func); + } + if (op != TGSI_OPCODE_LAST) { + m_functions[op] = func; + } +} + + diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h new file mode 100644 index 0000000000..d6831e0a6b --- /dev/null +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -0,0 +1,116 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INSTRUCTIONSSOA_H +#define INSTRUCTIONSSOA_H + +#include <pipe/p_shader_tokens.h> +#include <llvm/Support/IRBuilder.h> + +#include <map> +#include <vector> + +namespace llvm { + class Module; + class Function; + class BasicBlock; + class Value; +} +class StorageSoa; + +class InstructionsSoa +{ +public: + InstructionsSoa(llvm::Module *mod, llvm::Function *func, + llvm::BasicBlock *block, StorageSoa *storage); + + std::vector<llvm::Value*> abs(const std::vector<llvm::Value*> in1); + std::vector<llvm::Value*> arl(const std::vector<llvm::Value*> in); + std::vector<llvm::Value*> add(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> dp3(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> dp4(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> lit(const std::vector<llvm::Value*> in); + std::vector<llvm::Value*> madd(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2, + const std::vector<llvm::Value*> in3); + std::vector<llvm::Value*> max(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> min(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> mul(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> pow(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> rsq(const std::vector<llvm::Value*> in1); + std::vector<llvm::Value*> slt(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> sub(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + void end(); + + std::vector<llvm::Value*> extractVector(llvm::Value *vector); + llvm::IRBuilder<>* getIRBuilder(); +private: + const char * name(const char *prefix) const; + llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w); + void createFunctionMap(); + void createBuiltins(); + void createDependencies(); + llvm::Function *function(int); + llvm::Module *currentModule() const; + llvm::Value *allocaTemp(); + std::vector<llvm::Value*> allocaToResult(llvm::Value *allocaPtr); + std::vector<llvm::Value*> callBuiltin(llvm::Function *func, + const std::vector<llvm::Value*> in1); + std::vector<llvm::Value*> callBuiltin(llvm::Function *func, + const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2); + std::vector<llvm::Value*> callBuiltin(llvm::Function *func, + const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2, + const std::vector<llvm::Value*> in3); + void injectFunction(llvm::Function *originalFunc, int op = TGSI_OPCODE_LAST); +private: + llvm::IRBuilder<> m_builder; + StorageSoa *m_storage; + + std::map<int, std::string> m_functionsMap; + std::map<int, llvm::Function*> m_functions; + llvm::Module *m_builtins; + std::map<std::string, std::vector<std::string> > m_builtinDependencies; + +private: + mutable int m_idx; + mutable char m_name[32]; +}; + + +#endif diff --git a/src/gallium/auxiliary/gallivm/llvm_builtins.c b/src/gallium/auxiliary/gallivm/llvm_builtins.c new file mode 100644 index 0000000000..d5a003a48b --- /dev/null +++ b/src/gallium/auxiliary/gallivm/llvm_builtins.c @@ -0,0 +1,114 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +typedef __attribute__(( ext_vector_type(4) )) float float4; + +extern float powf(float a, float b); + +inline float approx(float a, float b) +{ + if (b < -128.0f) b = -128.0f; + if (b > 128.0f) b = 128.0f; + if (a < 0) a = 0; + return powf(a, b); +} + +inline float4 lit(float4 tmp) +{ + float4 result; + result.x = 1.0; + result.w = 1.0; + if (tmp.x > 0) { + result.y = tmp.x; + result.z = approx(tmp.y, tmp.w); + } else { + result.y = 0; + result.z = 0; + } + return result; +} + +inline float4 cmp(float4 tmp0, float4 tmp1, float4 tmp2) +{ + float4 result; + + result.x = (tmp0.x < 0.0) ? tmp1.x : tmp2.x; + result.y = (tmp0.y < 0.0) ? tmp1.y : tmp2.y; + result.z = (tmp0.z < 0.0) ? tmp1.z : tmp2.z; + result.w = (tmp0.w < 0.0) ? tmp1.w : tmp2.w; + + return result; +} + +extern float cosf(float val); +extern float sinf(float val); + +inline float4 vcos(float4 val) +{ + float4 result; + printf("VEC IN is %f %f %f %f\n", val.x, val.y, val.z, val.w); + result.x = cosf(val.x); + result.y = cosf(val.x); + result.z = cosf(val.x); + result.w = cosf(val.x); + printf("VEC OUT is %f %f %f %f\n", result.x, result.y, result.z, result.w); + return result; +} + +inline float4 scs(float4 val) +{ + float4 result; + float tmp = val.x; + result.x = cosf(tmp); + result.y = sinf(tmp); + return result; +} + + +inline float4 vsin(float4 val) +{ + float4 result; + float tmp = val.x; + float res = sinf(tmp); + result.x = res; + result.y = res; + result.z = res; + result.w = res; + return result; +} + +inline int kil(float4 val) +{ + if (val.x < 0 || val.y < 0 || val.z < 0 || val.w < 0) + return 1; + else + return 0; +} diff --git a/src/gallium/auxiliary/gallivm/loweringpass.cpp b/src/gallium/auxiliary/gallivm/loweringpass.cpp new file mode 100644 index 0000000000..556dbec366 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/loweringpass.cpp @@ -0,0 +1,17 @@ +#include "loweringpass.h" + +using namespace llvm; + +char LoweringPass::ID = 0; +RegisterPass<LoweringPass> X("lowering", "Lowering Pass"); + +LoweringPass::LoweringPass() + : ModulePass((intptr_t)&ID) +{ +} + +bool LoweringPass::runOnModule(Module &m) +{ + llvm::cerr << "Hello: " << m.getModuleIdentifier() << "\n"; + return false; +} diff --git a/src/gallium/auxiliary/gallivm/loweringpass.h b/src/gallium/auxiliary/gallivm/loweringpass.h new file mode 100644 index 0000000000..f62dcf6ba7 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/loweringpass.h @@ -0,0 +1,15 @@ +#ifndef LOWERINGPASS_H +#define LOWERINGPASS_H + +#include "llvm/Pass.h" +#include "llvm/Module.h" + +struct LoweringPass : public llvm::ModulePass +{ + static char ID; + LoweringPass(); + + virtual bool runOnModule(llvm::Module &m); +}; + +#endif diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c new file mode 100644 index 0000000000..cb85e1734e --- /dev/null +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -0,0 +1,210 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * This file is compiled with clang into the LLVM bitcode + * + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +typedef __attribute__(( ext_vector_type(4) )) float float4; + + +extern float fabsf(float val); + +/* helpers */ + +float4 absvec(float4 vec) +{ + float4 res; + res.x = fabsf(vec.x); + res.y = fabsf(vec.y); + res.z = fabsf(vec.z); + res.w = fabsf(vec.w); + + return res; +} + +float4 maxvec(float4 a, float4 b) +{ + return (float4){(a.x > b.x) ? a.x : b.x, + (a.y > b.y) ? a.y : b.y, + (a.z > b.z) ? a.z : b.z, + (a.w > b.w) ? a.w : b.w}; +} + +float4 minvec(float4 a, float4 b) +{ + return (float4){(a.x < b.x) ? a.x : b.x, + (a.y < b.y) ? a.y : b.y, + (a.z < b.z) ? a.z : b.z, + (a.w < b.w) ? a.w : b.w}; +} + +extern float powf(float num, float p); +extern float sqrtf(float x); + +float4 powvec(float4 vec, float4 q) +{ + float4 p; + p.x = powf(vec.x, q.x); + p.y = powf(vec.y, q.y); + p.z = powf(vec.z, q.z); + p.w = powf(vec.w, q.w); + return p; +} + +float4 sqrtvec(float4 vec) +{ + float4 p; + p.x = sqrtf(vec.x); + p.y = sqrtf(vec.y); + p.z = sqrtf(vec.z); + p.w = sqrtf(vec.w); + return p; +} + +float4 sltvec(float4 v1, float4 v2) +{ + float4 p; + p.x = (v1.x < v2.x) ? 1.0 : 0.0; + p.y = (v1.y < v2.y) ? 1.0 : 0.0; + p.z = (v1.z < v2.z) ? 1.0 : 0.0; + p.w = (v1.w < v2.w) ? 1.0 : 0.0; + return p; +} + + +/* instructions */ + +void abs(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) +{ + res[0] = absvec(tmp0x); + res[1] = absvec(tmp0y); + res[2] = absvec(tmp0z); + res[3] = absvec(tmp0w); +} + +void dp3(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + + (tmp0z * tmp1z); + + res[0] = dot; + res[1] = dot; + res[2] = dot; + res[3] = dot; +} + +void dp4(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + + (tmp0z * tmp1z) + (tmp0w * tmp1w); + + res[0] = dot; + res[1] = dot; + res[2] = dot; + res[3] = dot; +} + +void lit(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) +{ + const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0}; + const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f}; + const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f}; + + res[0] = (float4){1.0, 1.0, 1.0, 1.0}; + if (tmp0x.x > 0) { + float4 tmpy = maxvec(tmp0y, zerovec); + float4 tmpw = minvec(tmp0w, plus128); + tmpw = maxvec(tmpw, min128); + res[1] = tmp0x; + res[2] = powvec(tmpy, tmpw); + } else { + res[1] = zerovec; + res[2] = zerovec; + } + res[3] = (float4){1.0, 1.0, 1.0, 1.0}; +} + +void min(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + res[0] = minvec(tmp0x, tmp1x); + res[1] = minvec(tmp0y, tmp1y); + res[2] = minvec(tmp0z, tmp1z); + res[3] = minvec(tmp0w, tmp1w); +} + + +void max(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + res[0] = maxvec(tmp0x, tmp1x); + res[1] = maxvec(tmp0y, tmp1y); + res[2] = maxvec(tmp0z, tmp1z); + res[3] = maxvec(tmp0w, tmp1w); +} + +void pow(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + res[0] = powvec(tmp0x, tmp1x); + res[1] = res[0]; + res[2] = res[0]; + res[3] = res[0]; +} + +void rsq(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) +{ + const float4 onevec = (float4) {1., 1., 1., 1.}; + res[0] = onevec/sqrtvec(absvec(tmp0x)); + res[1] = onevec/sqrtvec(absvec(tmp0y)); + res[2] = onevec/sqrtvec(absvec(tmp0z)); + res[3] = onevec/sqrtvec(absvec(tmp0w)); +} + +void slt(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + res[0] = sltvec(tmp0x, tmp1x); + res[1] = sltvec(tmp0y, tmp1y); + res[2] = sltvec(tmp0z, tmp1z); + res[3] = sltvec(tmp0w, tmp1w); +} + diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp new file mode 100644 index 0000000000..6f373f6dd5 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/storage.cpp @@ -0,0 +1,364 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +#ifdef MESA_LLVM + +#include "storage.h" + +#include "gallivm_p.h" + +#include "pipe/p_shader_tokens.h" +#include <llvm/BasicBlock.h> +#include <llvm/Module.h> +#include <llvm/Value.h> + +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/InstrTypes.h> +#include <llvm/Instructions.h> + +using namespace llvm; + +Storage::Storage(llvm::BasicBlock *block, llvm::Value *input) + : m_block(block), + m_INPUT(input), + m_addrs(32), + m_idx(0) +{ + m_floatVecType = VectorType::get(Type::FloatTy, 4); + m_intVecType = VectorType::get(IntegerType::get(32), 4); + + m_undefFloatVec = UndefValue::get(m_floatVecType); + m_undefIntVec = UndefValue::get(m_intVecType); + m_extSwizzleVec = 0; + + m_numConsts = 0; +} + +//can only build vectors with all members in the [0, 9] range +llvm::Constant *Storage::shuffleMask(int vec) +{ + if (!m_extSwizzleVec) { + std::vector<Constant*> elems; + elems.push_back(ConstantFP::get(APFloat(0.f))); + elems.push_back(ConstantFP::get(APFloat(1.f))); + elems.push_back(ConstantFP::get(APFloat(0.f))); + elems.push_back(ConstantFP::get(APFloat(1.f))); + m_extSwizzleVec = ConstantVector::get(m_floatVecType, elems); + } + + if (m_intVecs.find(vec) != m_intVecs.end()) { + return m_intVecs[vec]; + } + int origVec = vec; + Constant* const_vec = 0; + if (origVec == 0) { + const_vec = Constant::getNullValue(m_intVecType); + } else { + int x = gallivm_x_swizzle(vec); + int y = gallivm_y_swizzle(vec); + int z = gallivm_z_swizzle(vec); + int w = gallivm_w_swizzle(vec); + std::vector<Constant*> elems; + elems.push_back(constantInt(x)); + elems.push_back(constantInt(y)); + elems.push_back(constantInt(z)); + elems.push_back(constantInt(w)); + const_vec = ConstantVector::get(m_intVecType, elems); + } + + m_intVecs[origVec] = const_vec; + return const_vec; +} + +llvm::ConstantInt *Storage::constantInt(int idx) +{ + if (m_constInts.find(idx) != m_constInts.end()) { + return m_constInts[idx]; + } + ConstantInt *const_int = ConstantInt::get(APInt(32, idx)); + m_constInts[idx] = const_int; + return const_int; +} + +llvm::Value *Storage::inputElement(int idx, llvm::Value *indIdx) +{ + Value *val = element(InputsArg, idx, indIdx); + LoadInst *load = new LoadInst(val, name("input"), false, m_block); + load->setAlignment(8); + + return load; +} + +llvm::Value *Storage::constElement(int idx, llvm::Value *indIdx) +{ + m_numConsts = ((idx + 1) > m_numConsts) ? (idx + 1) : m_numConsts; + + Value *elem = element(ConstsArg, idx, indIdx); + LoadInst *load = new LoadInst(elem, name("const"), false, m_block); + load->setAlignment(8); + return load; +} + +llvm::Value *Storage::shuffleVector(llvm::Value *vec, int shuffle) +{ + Constant *mask = shuffleMask(shuffle); + ShuffleVectorInst *res = + new ShuffleVectorInst(vec, m_extSwizzleVec, mask, + name("shuffle"), m_block); + return res; +} + + +llvm::Value *Storage::tempElement(int idx, llvm::Value *indIdx) +{ + Value *elem = element(TempsArg, idx, indIdx); + + LoadInst *load = new LoadInst(elem, name("temp"), false, m_block); + load->setAlignment(8); + + return load; +} + +void Storage::setTempElement(int idx, llvm::Value *val, int mask) +{ + if (mask != TGSI_WRITEMASK_XYZW) { + llvm::Value *templ = 0; + if (m_tempWriteMap[idx]) + templ = tempElement(idx); + val = maskWrite(val, mask, templ); + } + Value *elem = element(TempsArg, idx); + StoreInst *st = new StoreInst(val, elem, false, m_block); + st->setAlignment(8); + m_tempWriteMap[idx] = true; +} + +void Storage::setOutputElement(int dstIdx, llvm::Value *val, int mask) +{ + if (mask != TGSI_WRITEMASK_XYZW) { + llvm::Value *templ = 0; + if (m_destWriteMap[dstIdx]) + templ = outputElement(dstIdx); + val = maskWrite(val, mask, templ); + } + + Value *elem = element(DestsArg, dstIdx); + StoreInst *st = new StoreInst(val, elem, false, m_block); + st->setAlignment(8); + m_destWriteMap[dstIdx] = true; +} + +llvm::Value *Storage::maskWrite(llvm::Value *src, int mask, llvm::Value *templ) +{ + llvm::Value *dst = templ; + if (!dst) + dst = Constant::getNullValue(m_floatVecType); + if ((mask & TGSI_WRITEMASK_X)) { + llvm::Value *x = new ExtractElementInst(src, unsigned(0), + name("x"), m_block); + dst = InsertElementInst::Create(dst, x, unsigned(0), + name("dstx"), m_block); + } + if ((mask & TGSI_WRITEMASK_Y)) { + llvm::Value *y = new ExtractElementInst(src, unsigned(1), + name("y"), m_block); + dst = InsertElementInst::Create(dst, y, unsigned(1), + name("dsty"), m_block); + } + if ((mask & TGSI_WRITEMASK_Z)) { + llvm::Value *z = new ExtractElementInst(src, unsigned(2), + name("z"), m_block); + dst = InsertElementInst::Create(dst, z, unsigned(2), + name("dstz"), m_block); + } + if ((mask & TGSI_WRITEMASK_W)) { + llvm::Value *w = new ExtractElementInst(src, unsigned(3), + name("w"), m_block); + dst = InsertElementInst::Create(dst, w, unsigned(3), + name("dstw"), m_block); + } + return dst; +} + +const char * Storage::name(const char *prefix) +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +int Storage::numConsts() const +{ + return m_numConsts; +} + +llvm::Value * Storage::addrElement(int idx) const +{ + Value *ret = m_addrs[idx]; + if (!ret) + return m_undefFloatVec; + return ret; +} + +void Storage::setAddrElement(int idx, llvm::Value *val, int mask) +{ + if (mask != TGSI_WRITEMASK_XYZW) { + llvm::Value *templ = m_addrs[idx]; + val = maskWrite(val, mask, templ); + } + m_addrs[idx] = val; +} + +llvm::Value * Storage::extractIndex(llvm::Value *vec) +{ + llvm::Value *x = new ExtractElementInst(vec, unsigned(0), + name("x"), m_block); + return new FPToSIInst(x, IntegerType::get(32), name("intidx"), m_block); +} + +void Storage::setCurrentBlock(llvm::BasicBlock *block) +{ + m_block = block; +} + +llvm::Value * Storage::outputElement(int idx, llvm::Value *indIdx) +{ + Value *elem = element(DestsArg, idx, indIdx); + LoadInst *load = new LoadInst(elem, name("output"), false, m_block); + load->setAlignment(8); + + return load; +} + +llvm::Value * Storage::inputPtr() const +{ + return m_INPUT; +} + +void Storage::pushArguments(llvm::Value *input) +{ + m_argStack.push(m_INPUT); + + m_INPUT = input; +} + +void Storage::popArguments() +{ + m_INPUT = m_argStack.top(); + m_argStack.pop(); +} + +void Storage::pushTemps() +{ + m_extSwizzleVec = 0; +} + +void Storage::popTemps() +{ +} + +llvm::Value * Storage::immediateElement(int idx) +{ + return m_immediates[idx]; +} + +void Storage::addImmediate(float *val) +{ + std::vector<Constant*> vec(4); + vec[0] = ConstantFP::get(APFloat(val[0])); + vec[1] = ConstantFP::get(APFloat(val[1])); + vec[2] = ConstantFP::get(APFloat(val[2])); + vec[3] = ConstantFP::get(APFloat(val[3])); + m_immediates.push_back(ConstantVector::get(m_floatVecType, vec)); +} + + +llvm::Value * Storage::elemPtr(Args arg) +{ + std::vector<Value*> indices; + indices.push_back(constantInt(0)); + indices.push_back(constantInt(static_cast<int>(arg))); + GetElementPtrInst *getElem = GetElementPtrInst::Create(m_INPUT, + indices.begin(), + indices.end(), + name("input_ptr"), + m_block); + return new LoadInst(getElem, name("input_field"), false, m_block); +} + +llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx, + llvm::Value *indIdx ) +{ + GetElementPtrInst *getElem = 0; + + if (indIdx) { + getElem = GetElementPtrInst::Create(ptr, + BinaryOperator::create(Instruction::Add, + indIdx, + constantInt(idx), + name("add"), + m_block), + name("field"), + m_block); + } else { + getElem = GetElementPtrInst::Create(ptr, + constantInt(idx), + name("field"), + m_block); + } + return getElem; +} + +llvm::Value * Storage::element(Args arg, int idx, llvm::Value *indIdx ) +{ + Value *val = elemPtr(arg); + return elemIdx(val, idx, indIdx); +} + +void Storage::setKilElement(llvm::Value *val) +{ + std::vector<Value*> indices; + indices.push_back(constantInt(0)); + indices.push_back(constantInt(static_cast<int>(KilArg))); + GetElementPtrInst *elem = GetElementPtrInst::Create(m_INPUT, + indices.begin(), + indices.end(), + name("kil_ptr"), + m_block); + StoreInst *st = new StoreInst(val, elem, false, m_block); + st->setAlignment(8); +} + +#endif //MESA_LLVM + + diff --git a/src/gallium/auxiliary/gallivm/storage.h b/src/gallium/auxiliary/gallivm/storage.h new file mode 100644 index 0000000000..8574f7554e --- /dev/null +++ b/src/gallium/auxiliary/gallivm/storage.h @@ -0,0 +1,133 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ + +#ifndef STORAGE_H +#define STORAGE_H + +#include <map> +#include <set> +#include <stack> +#include <vector> + +namespace llvm { + class BasicBlock; + class Constant; + class ConstantInt; + class LoadInst; + class Value; + class VectorType; +} + +class Storage +{ +public: + Storage(llvm::BasicBlock *block, + llvm::Value *input); + + llvm::Value *inputPtr() const; + + void setCurrentBlock(llvm::BasicBlock *block); + + llvm::ConstantInt *constantInt(int); + llvm::Constant *shuffleMask(int vec); + llvm::Value *inputElement(int idx, llvm::Value *indIdx =0); + llvm::Value *constElement(int idx, llvm::Value *indIdx =0); + llvm::Value *outputElement(int idx, llvm::Value *indIdx =0); + llvm::Value *tempElement(int idx, llvm::Value *indIdx =0); + llvm::Value *immediateElement(int idx); + + void setOutputElement(int dstIdx, llvm::Value *val, int mask); + void setTempElement(int idx, llvm::Value *val, int mask); + + llvm::Value *addrElement(int idx) const; + void setAddrElement(int idx, llvm::Value *val, int mask); + + void setKilElement(llvm::Value *val); + + llvm::Value *shuffleVector(llvm::Value *vec, int shuffle); + + llvm::Value *extractIndex(llvm::Value *vec); + + int numConsts() const; + + void pushArguments(llvm::Value *input); + void popArguments(); + void pushTemps(); + void popTemps(); + + void addImmediate(float *val); + +private: + llvm::Value *maskWrite(llvm::Value *src, int mask, llvm::Value *templ); + const char *name(const char *prefix); + + enum Args { + DestsArg = 0, + InputsArg = 1, + TempsArg = 2, + ConstsArg = 3, + KilArg = 4 + }; + llvm::Value *elemPtr(Args arg); + llvm::Value *elemIdx(llvm::Value *ptr, int idx, + llvm::Value *indIdx = 0); + llvm::Value *element(Args arg, int idx, llvm::Value *indIdx = 0); + +private: + llvm::BasicBlock *m_block; + llvm::Value *m_INPUT; + + std::map<int, llvm::ConstantInt*> m_constInts; + std::map<int, llvm::Constant*> m_intVecs; + std::vector<llvm::Value*> m_addrs; + std::vector<llvm::Constant*> m_immediates; + + llvm::VectorType *m_floatVecType; + llvm::VectorType *m_intVecType; + + char m_name[32]; + int m_idx; + + int m_numConsts; + + std::map<int, bool > m_destWriteMap; + std::map<int, bool > m_tempWriteMap; + + llvm::Value *m_undefFloatVec; + llvm::Value *m_undefIntVec; + llvm::Value *m_extSwizzleVec; + + std::stack<llvm::Value*> m_argStack; + std::stack<std::vector<llvm::Value*> > m_tempStack; +}; + +#endif diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp new file mode 100644 index 0000000000..4fc075cf6d --- /dev/null +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -0,0 +1,420 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "storagesoa.h" + +#include "gallivm_p.h" + +#include "pipe/p_shader_tokens.h" +#include "pipe/p_debug.h" + +#include <llvm/BasicBlock.h> +#include <llvm/Module.h> +#include <llvm/Value.h> + +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/InstrTypes.h> +#include <llvm/Instructions.h> + +using namespace llvm; + + +StorageSoa::StorageSoa(llvm::BasicBlock *block, + llvm::Value *input, + llvm::Value *output, + llvm::Value *consts, + llvm::Value *temps) + : m_block(block), + m_input(input), + m_output(output), + m_consts(consts), + m_temps(temps), + m_immediates(0), + m_idx(0) +{ +} + +void StorageSoa::addImmediate(float *vec) +{ + std::vector<float> vals(4); + vals[0] = vec[0]; + vals[1] = vec[1]; + vals[2] = vec[2]; + vals[3] = vec[3]; + m_immediatesToFlush.push_back(vals); +} + +void StorageSoa::declareImmediates() +{ + if (m_immediatesToFlush.empty()) + return; + + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + ArrayType *vectorChannels = ArrayType::get(vectorType, 4); + ArrayType *arrayType = ArrayType::get(vectorChannels, m_immediatesToFlush.size()); + + m_immediates = new GlobalVariable( + /*Type=*/arrayType, + /*isConstant=*/false, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/name("immediates"), + currentModule()); + + std::vector<Constant*> arrayVals; + for (unsigned int i = 0; i < m_immediatesToFlush.size(); ++i) { + std::vector<float> vec = m_immediatesToFlush[i]; + std::vector<float> vals(4); + std::vector<Constant*> channelArray; + + vals[0] = vec[0]; vals[1] = vec[1]; vals[2] = vec[2]; vals[3] = vec[3]; + llvm::Constant *xChannel = createConstGlobalVector(vals); + + vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1]; + llvm::Constant *yChannel = createConstGlobalVector(vals); + + vals[0] = vec[2]; vals[1] = vec[2]; vals[2] = vec[2]; vals[3] = vec[2]; + llvm::Constant *zChannel = createConstGlobalVector(vals); + + vals[0] = vec[3]; vals[1] = vec[3]; vals[2] = vec[3]; vals[3] = vec[3]; + llvm::Constant *wChannel = createConstGlobalVector(vals); + channelArray.push_back(xChannel); + channelArray.push_back(yChannel); + channelArray.push_back(zChannel); + channelArray.push_back(wChannel); + Constant *constChannels = ConstantArray::get(vectorChannels, + channelArray); + arrayVals.push_back(constChannels); + } + Constant *constArray = ConstantArray::get(arrayType, arrayVals); + m_immediates->setInitializer(constArray); + + m_immediatesToFlush.clear(); +} + +llvm::Value *StorageSoa::addrElement(int idx) const +{ + std::map<int, llvm::Value*>::const_iterator itr = m_addresses.find(idx); + if (itr == m_addresses.end()) { + debug_printf("Trying to access invalid shader 'address'\n"); + return 0; + } + llvm::Value * res = (*itr).second; + + res = new LoadInst(res, name("addr"), false, m_block); + + return res; +} + +std::vector<llvm::Value*> StorageSoa::inputElement(llvm::Value *idx) +{ + std::vector<llvm::Value*> res(4); + + res[0] = element(m_input, idx, 0); + res[1] = element(m_input, idx, 1); + res[2] = element(m_input, idx, 2); + res[3] = element(m_input, idx, 3); + + return res; +} + +llvm::Value* StorageSoa::unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value* vector, int cc) +{ + std::vector<llvm::Value*> x(4); + x[0] = m_builder->CreateExtractElement(vector, + constantInt(cc), + name("x")); + + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + Constant *constVector = Constant::getNullValue(vectorType); + Value *res = m_builder->CreateInsertElement(constVector, x[0], + constantInt(0), + name("vecx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(1), + name("vecxx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(2), + name("vecxxx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(3), + name("vecxxxx")); + return res; +} + +std::vector<llvm::Value*> StorageSoa::constElement(llvm::IRBuilder<>* m_builder, llvm::Value *idx) +{ + llvm::Value* res; + std::vector<llvm::Value*> res2(4); + llvm::Value *xChannel, *yChannel, *zChannel, *wChannel; + + xChannel = elementPointer(m_consts, idx, 0); + + res = alignedArrayLoad(xChannel); + + res2[0]=unpackConstElement(m_builder, res,0); + res2[1]=unpackConstElement(m_builder, res,1); + res2[2]=unpackConstElement(m_builder, res,2); + res2[3]=unpackConstElement(m_builder, res,3); + + return res2; +} + +std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx) +{ + std::vector<llvm::Value*> res(4); + + res[0] = element(m_output, idx, 0); + res[1] = element(m_output, idx, 1); + res[2] = element(m_output, idx, 2); + res[3] = element(m_output, idx, 3); + + return res; +} + +std::vector<llvm::Value*> StorageSoa::tempElement(llvm::Value *idx) +{ + std::vector<llvm::Value*> res(4); + + res[0] = element(m_temps, idx, 0); + res[1] = element(m_temps, idx, 1); + res[2] = element(m_temps, idx, 2); + res[3] = element(m_temps, idx, 3); + + return res; +} + +std::vector<llvm::Value*> StorageSoa::immediateElement(llvm::Value *idx) +{ + std::vector<llvm::Value*> res(4); + + res[0] = element(m_immediates, idx, 0); + res[1] = element(m_immediates, idx, 1); + res[2] = element(m_immediates, idx, 2); + res[3] = element(m_immediates, idx, 3); + + return res; +} + +llvm::Value * StorageSoa::elementPointer(llvm::Value *ptr, llvm::Value *index, + int channel) const +{ + std::vector<Value*> indices; + if (m_immediates == ptr) + indices.push_back(constantInt(0)); + indices.push_back(index); + indices.push_back(constantInt(channel)); + + GetElementPtrInst *getElem = GetElementPtrInst::Create(ptr, + indices.begin(), + indices.end(), + name("ptr"), + m_block); + return getElem; +} + +llvm::Value * StorageSoa::element(llvm::Value *ptr, llvm::Value *index, + int channel) const +{ + llvm::Value *res = elementPointer(ptr, index, channel); + LoadInst *load = new LoadInst(res, name("element"), false, m_block); + //load->setAlignment(8); + return load; +} + +const char * StorageSoa::name(const char *prefix) const +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +llvm::ConstantInt * StorageSoa::constantInt(int idx) const +{ + if (m_constInts.find(idx) != m_constInts.end()) { + return m_constInts[idx]; + } + ConstantInt *constInt = ConstantInt::get(APInt(32, idx)); + m_constInts[idx] = constInt; + return constInt; +} + +llvm::Value *StorageSoa::alignedArrayLoad(llvm::Value *val) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + PointerType *vectorPtr = PointerType::get(vectorType, 0); + + CastInst *cast = new BitCastInst(val, vectorPtr, name("toVector"), m_block); + LoadInst *load = new LoadInst(cast, name("alignLoad"), false, m_block); + load->setAlignment(8); + return load; +} + +llvm::Module * StorageSoa::currentModule() const +{ + if (!m_block || !m_block->getParent()) + return 0; + + return m_block->getParent()->getParent(); +} + +llvm::Constant * StorageSoa::createConstGlobalFloat(const float val) +{ + Constant*c = ConstantFP::get(APFloat(val)); + return c; +} + +llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &vec) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + std::vector<Constant*> immValues; + ConstantFP *constx = ConstantFP::get(APFloat(vec[0])); + ConstantFP *consty = ConstantFP::get(APFloat(vec[1])); + ConstantFP *constz = ConstantFP::get(APFloat(vec[2])); + ConstantFP *constw = ConstantFP::get(APFloat(vec[3])); + immValues.push_back(constx); + immValues.push_back(consty); + immValues.push_back(constz); + immValues.push_back(constw); + Constant *constVector = ConstantVector::get(vectorType, immValues); + + return constVector; +} + +std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle, + llvm::IRBuilder<>* m_builder,llvm::Value *indIdx) +{ + std::vector<llvm::Value*> val(4); + + //if we have an indirect index, always use that + // if not use the integer offset to create one + llvm::Value *realIndex = 0; + if (indIdx) + realIndex = indIdx; + else + realIndex = constantInt(idx); + debug_printf("XXXXXXXXX realIdx = %p, indIdx = %p\n", realIndex, indIdx); + + switch(type) { + case TGSI_FILE_INPUT: + val = inputElement(realIndex); + break; + case TGSI_FILE_OUTPUT: + val = outputElement(realIndex); + break; + case TGSI_FILE_TEMPORARY: + val = tempElement(realIndex); + break; + case TGSI_FILE_CONSTANT: + val = constElement(m_builder, realIndex); + break; + case TGSI_FILE_IMMEDIATE: + val = immediateElement(realIndex); + break; + case TGSI_FILE_ADDRESS: + debug_printf("Address not handled in the load phase!\n"); + assert(0); + break; + default: + debug_printf("Unknown load!\n"); + assert(0); + break; + } + if (!gallivm_is_swizzle(swizzle)) + return val; + + std::vector<llvm::Value*> res(4); + + res[0] = val[gallivm_x_swizzle(swizzle)]; + res[1] = val[gallivm_y_swizzle(swizzle)]; + res[2] = val[gallivm_z_swizzle(swizzle)]; + res[3] = val[gallivm_w_swizzle(swizzle)]; + return res; +} + +void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val, + int mask) +{ + llvm::Value *out = 0; + switch(type) { + case TGSI_FILE_OUTPUT: + out = m_output; + break; + case TGSI_FILE_TEMPORARY: + out = m_temps; + break; + case TGSI_FILE_INPUT: + out = m_input; + break; + case TGSI_FILE_ADDRESS: { + llvm::Value *addr = m_addresses[idx]; + if (!addr) { + addAddress(idx); + addr = m_addresses[idx]; + assert(addr); + } + new StoreInst(val[0], addr, false, m_block); + return; + break; + } + default: + debug_printf("Can't save output of this type: %d !\n", type); + assert(0); + break; + } + llvm::Value *realIndex = constantInt(idx); + if ((mask & TGSI_WRITEMASK_X)) { + llvm::Value *xChannel = elementPointer(out, realIndex, 0); + new StoreInst(val[0], xChannel, false, m_block); + } + if ((mask & TGSI_WRITEMASK_Y)) { + llvm::Value *yChannel = elementPointer(out, realIndex, 1); + new StoreInst(val[1], yChannel, false, m_block); + } + if ((mask & TGSI_WRITEMASK_Z)) { + llvm::Value *zChannel = elementPointer(out, realIndex, 2); + new StoreInst(val[2], zChannel, false, m_block); + } + if ((mask & TGSI_WRITEMASK_W)) { + llvm::Value *wChannel = elementPointer(out, realIndex, 3); + new StoreInst(val[3], wChannel, false, m_block); + } +} + +void StorageSoa::addAddress(int idx) +{ + GlobalVariable *val = new GlobalVariable( + /*Type=*/IntegerType::get(32), + /*isConstant=*/false, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/name("address"), + currentModule()); + val->setInitializer(Constant::getNullValue(IntegerType::get(32))); + + debug_printf("adding to %d\n", idx); + m_addresses[idx] = val; +} diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h new file mode 100644 index 0000000000..f21ca6ec43 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/storagesoa.h @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef STORAGESOA_H +#define STORAGESOA_H + +#include <pipe/p_shader_tokens.h> +#include <llvm/Support/IRBuilder.h> + +#include <vector> +#include <list> +#include <map> + +namespace llvm { + class BasicBlock; + class Constant; + class ConstantInt; + class GlobalVariable; + class LoadInst; + class Value; + class VectorType; + class Module; +} + +class StorageSoa +{ +public: + StorageSoa(llvm::BasicBlock *block, + llvm::Value *input, + llvm::Value *output, + llvm::Value *consts, + llvm::Value *temps); + + + std::vector<llvm::Value*> load(enum tgsi_file_type type, int idx, int swizzle, + llvm::IRBuilder<>* m_builder, llvm::Value *indIdx =0); + void store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val, + int mask); + + void addImmediate(float *vec); + void declareImmediates(); + + void addAddress(int idx); + + llvm::Value * addrElement(int idx) const; + + llvm::ConstantInt *constantInt(int) const; +private: + llvm::Value *elementPointer(llvm::Value *ptr, llvm::Value *indIdx, + int channel) const; + llvm::Value *element(llvm::Value *ptr, llvm::Value *idx, + int channel) const; + const char *name(const char *prefix) const; + llvm::Value *alignedArrayLoad(llvm::Value *val); + llvm::Module *currentModule() const; + llvm::Constant *createConstGlobalFloat(const float val); + llvm::Constant *createConstGlobalVector(const std::vector<float> &vec); + + std::vector<llvm::Value*> inputElement(llvm::Value *indIdx); + llvm::Value* unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx, int cc); + std::vector<llvm::Value*> constElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx); + std::vector<llvm::Value*> outputElement(llvm::Value *indIdx); + std::vector<llvm::Value*> tempElement(llvm::Value *indIdx); + std::vector<llvm::Value*> immediateElement(llvm::Value *indIdx); +private: + llvm::BasicBlock *m_block; + + llvm::Value *m_input; + llvm::Value *m_output; + llvm::Value *m_consts; + llvm::Value *m_temps; + llvm::GlobalVariable *m_immediates; + + std::map<int, llvm::Value*> m_addresses; + + std::vector<std::vector<float> > m_immediatesToFlush; + + mutable std::map<int, llvm::ConstantInt*> m_constInts; + mutable char m_name[32]; + mutable int m_idx; +}; + +#endif diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp new file mode 100644 index 0000000000..1191a6cae9 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -0,0 +1,1169 @@ +#include "tgsitollvm.h" + +#include "gallivm.h" +#include "gallivm_p.h" + +#include "storage.h" +#include "instructions.h" +#include "storagesoa.h" +#include "instructionssoa.h" + +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_dump.h" + + +#include <llvm/Module.h> +#include <llvm/CallingConv.h> +#include <llvm/Constants.h> +#include <llvm/DerivedTypes.h> +#include <llvm/Instructions.h> +#include <llvm/ModuleProvider.h> +#include <llvm/Pass.h> +#include <llvm/PassManager.h> +#include <llvm/ParameterAttributes.h> +#include <llvm/Support/PatternMatch.h> +#include <llvm/ExecutionEngine/JIT.h> +#include <llvm/ExecutionEngine/Interpreter.h> +#include <llvm/ExecutionEngine/GenericValue.h> +#include <llvm/Support/MemoryBuffer.h> +#include <llvm/LinkAllPasses.h> +#include <llvm/Analysis/Verifier.h> +#include <llvm/Analysis/LoopPass.h> +#include <llvm/Target/TargetData.h> +#include <llvm/Bitcode/ReaderWriter.h> +#include <llvm/Transforms/Utils/Cloning.h> + + +#include <sstream> +#include <fstream> +#include <iostream> + +using namespace llvm; + +static inline FunctionType *vertexShaderFunctionType() +{ + //Function takes three arguments, + // the calling code has to make sure the types it will + // pass are castable to the following: + // [4 x <4 x float>] inputs, + // [4 x <4 x float>] output, + // [4 x [1 x float]] consts, + // [4 x <4 x float>] temps + + std::vector<const Type*> funcArgs; + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + ArrayType *vectorArray = ArrayType::get(vectorType, 4); + PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0); + + ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4); + ArrayType *constsArray = ArrayType::get(floatArray, 1); + PointerType *constsArrayPtr = PointerType::get(constsArray, 0); + + funcArgs.push_back(vectorArrayPtr);//inputs + funcArgs.push_back(vectorArrayPtr);//output + funcArgs.push_back(constsArrayPtr);//consts + funcArgs.push_back(vectorArrayPtr);//temps + + FunctionType *functionType = FunctionType::get( + /*Result=*/Type::VoidTy, + /*Params=*/funcArgs, + /*isVarArg=*/false); + + return functionType; +} + +static inline void +add_interpolator(struct gallivm_ir *ir, + struct gallivm_interpolate *interp) +{ + ir->interpolators[ir->num_interp] = *interp; + ++ir->num_interp; +} + +static void +translate_declaration(struct gallivm_ir *prog, + llvm::Module *module, + Storage *storage, + struct tgsi_full_declaration *decl, + struct tgsi_full_declaration *fd) +{ + if (decl->Declaration.File == TGSI_FILE_INPUT) { + unsigned first, last, mask; + uint interp_method; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + /* Do not touch WPOS.xy */ + if (first == 0) { + mask &= ~TGSI_WRITEMASK_XY; + if (mask == TGSI_WRITEMASK_NONE) { + first++; + if (first > last) { + return; + } + } + } + + interp_method = decl->Declaration.Interpolate; + + if (mask == TGSI_WRITEMASK_XYZW) { + unsigned i, j; + + for (i = first; i <= last; i++) { + for (j = 0; j < NUM_CHANNELS; j++) { + //interp( mach, i, j ); + struct gallivm_interpolate interp; + interp.type = interp_method; + interp.attrib = i; + interp.chan = j; + add_interpolator(prog, &interp); + } + } + } else { + unsigned i, j; + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + struct gallivm_interpolate interp; + interp.type = interp_method; + interp.attrib = i; + interp.chan = j; + add_interpolator(prog, &interp); + } + } + } + } + } +} + +static void +translate_declarationir(struct gallivm_ir *, + llvm::Module *, + StorageSoa *storage, + struct tgsi_full_declaration *decl, + struct tgsi_full_declaration *) +{ + if (decl->Declaration.File == TGSI_FILE_ADDRESS) { + int idx = decl->DeclarationRange.First; + storage->addAddress(idx); + } +} + +static void +translate_immediate(Storage *storage, + struct tgsi_full_immediate *imm) +{ + float vec[4]; + int i; + for (i = 0; i < imm->Immediate.Size - 1; ++i) { + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + vec[i] = imm->u.ImmediateFloat32[i].Float; + break; + default: + assert(0); + } + } + storage->addImmediate(vec); +} + + +static void +translate_immediateir(StorageSoa *storage, + struct tgsi_full_immediate *imm) +{ + float vec[4]; + int i; + for (i = 0; i < imm->Immediate.Size - 1; ++i) { + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + vec[i] = imm->u.ImmediateFloat32[i].Float; + break; + default: + assert(0); + } + } + storage->addImmediate(vec); +} + +static inline int +swizzleInt(struct tgsi_full_src_register *src) +{ + int swizzle = 0; + int start = 1000; + + for (int k = 0; k < 4; ++k) { + swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start; + start /= 10; + } + return swizzle; +} + +static inline llvm::Value * +swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src, + Storage *storage) +{ + int swizzle = swizzleInt(src); + + if (gallivm_is_swizzle(swizzle)) { + /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/ + val = storage->shuffleVector(val, swizzle); + } + return val; +} + +static void +translate_instruction(llvm::Module *module, + Storage *storage, + Instructions *instr, + struct tgsi_full_instruction *inst, + struct tgsi_full_instruction *fi, + unsigned instno) +{ + llvm::Value *inputs[4]; + inputs[0] = 0; + inputs[1] = 0; + inputs[2] = 0; + inputs[3] = 0; + + for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + llvm::Value *val = 0; + llvm::Value *indIdx = 0; + + if (src->SrcRegister.Indirect) { + indIdx = storage->addrElement(src->SrcRegisterInd.Index); + indIdx = storage->extractIndex(indIdx); + } + if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { + val = storage->constElement(src->SrcRegister.Index, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { + val = storage->inputElement(src->SrcRegister.Index, indIdx); + // FIXME we should not be generating elements for temporaries, this creates useless memory writes + } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { + val = storage->tempElement(src->SrcRegister.Index); + } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { + val = storage->outputElement(src->SrcRegister.Index, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + val = storage->immediateElement(src->SrcRegister.Index); + } else { + fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); + return; + } + + inputs[i] = swizzleVector(val, src, storage); + } + + /*if (inputs[0]) + instr->printVector(inputs[0]); + if (inputs[1]) + instr->printVector(inputs[1]);*/ + llvm::Value *out = 0; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: { + out = instr->arl(inputs[0]); + } + break; + case TGSI_OPCODE_MOV: { + out = inputs[0]; + } + break; + case TGSI_OPCODE_LIT: { + out = instr->lit(inputs[0]); + } + break; + case TGSI_OPCODE_RCP: { + out = instr->rcp(inputs[0]); + } + break; + case TGSI_OPCODE_RSQ: { + out = instr->rsq(inputs[0]); + } + break; + case TGSI_OPCODE_EXP: { + out = instr->exp(inputs[0]); + } + break; + case TGSI_OPCODE_LOG: { + out = instr->log(inputs[0]); + } + break; + case TGSI_OPCODE_MUL: { + out = instr->mul(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_ADD: { + out = instr->add(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP3: { + out = instr->dp3(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP4: { + out = instr->dp4(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DST: { + out = instr->dst(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MIN: { + out = instr->min(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MAX: { + out = instr->max(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SLT: { + out = instr->slt(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SGE: { + out = instr->sge(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MAD: { + out = instr->madd(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_SUB: { + out = instr->sub(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_LERP: { + out = instr->lerp(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_CND: { + out = instr->cnd(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_CND0: { + out = instr->cnd0(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_DOT2ADD: { + out = instr->dot2add(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_INDEX: + break; + case TGSI_OPCODE_NEGATE: { + out = instr->neg(inputs[0]); + } + break; + case TGSI_OPCODE_FRAC: { + out = instr->frc(inputs[0]); + } + break; + case TGSI_OPCODE_CLAMP: { + out = instr->clamp(inputs[0]); + } + break; + case TGSI_OPCODE_FLOOR: { + out = instr->floor(inputs[0]); + } + break; + case TGSI_OPCODE_ROUND: + break; + case TGSI_OPCODE_EXPBASE2: { + out = instr->ex2(inputs[0]); + } + break; + case TGSI_OPCODE_LOGBASE2: { + out = instr->lg2(inputs[0]); + } + break; + case TGSI_OPCODE_POWER: { + out = instr->pow(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_CROSSPRODUCT: { + out = instr->cross(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MULTIPLYMATRIX: + break; + case TGSI_OPCODE_ABS: { + out = instr->abs(inputs[0]); + } + break; + case TGSI_OPCODE_RCC: + break; + case TGSI_OPCODE_DPH: { + out = instr->dph(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_COS: { + out = instr->cos(inputs[0]); + } + break; + case TGSI_OPCODE_DDX: { + out = instr->ddx(inputs[0]); + } + break; + case TGSI_OPCODE_DDY: { + out = instr->ddy(inputs[0]); + } + break; + case TGSI_OPCODE_KILP: + break; + case TGSI_OPCODE_PK2H: + break; + case TGSI_OPCODE_PK2US: + break; + case TGSI_OPCODE_PK4B: + break; + case TGSI_OPCODE_PK4UB: + break; + case TGSI_OPCODE_RFL: + break; + case TGSI_OPCODE_SEQ: { + out = instr->seq(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SFL: { + out = instr->sfl(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SGT: { + out = instr->sgt(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SIN: { + out = instr->sin(inputs[0]); + } + break; + case TGSI_OPCODE_SLE: { + out = instr->sle(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SNE: { + out = instr->sne(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_STR: { + out = instr->str(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_TEX: + break; + case TGSI_OPCODE_TXD: + break; + case TGSI_OPCODE_UP2H: + break; + case TGSI_OPCODE_UP2US: + break; + case TGSI_OPCODE_UP4B: + break; + case TGSI_OPCODE_UP4UB: + break; + case TGSI_OPCODE_X2D: { + out = instr->x2d(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_ARA: + break; + case TGSI_OPCODE_ARR: + break; + case TGSI_OPCODE_BRA: + break; + case TGSI_OPCODE_CAL: { + instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr()); + return; + } + break; + case TGSI_OPCODE_RET: { + instr->end(); + return; + } + break; + case TGSI_OPCODE_SSG: + break; + case TGSI_OPCODE_CMP: { + out = instr->cmp(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_SCS: { + out = instr->scs(inputs[0]); + } + break; + case TGSI_OPCODE_TXB: + break; + case TGSI_OPCODE_NRM4: + case TGSI_OPCODE_NRM: { + out = instr->nrm(inputs[0]); + } + break; + case TGSI_OPCODE_DIV: { + out = instr->div(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP2: { + out = instr->dp2(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_TXL: + break; + case TGSI_OPCODE_BRK: { + instr->brk(); + return; + } + break; + case TGSI_OPCODE_IF: { + instr->ifop(inputs[0]); + storage->setCurrentBlock(instr->currentBlock()); + return; //just update the state + } + break; + case TGSI_OPCODE_LOOP: + break; + case TGSI_OPCODE_REP: + break; + case TGSI_OPCODE_ELSE: { + instr->elseop(); + storage->setCurrentBlock(instr->currentBlock()); + return; //only state update + } + break; + case TGSI_OPCODE_ENDIF: { + instr->endif(); + storage->setCurrentBlock(instr->currentBlock()); + return; //just update the state + } + break; + case TGSI_OPCODE_ENDLOOP: + break; + case TGSI_OPCODE_ENDREP: + break; + case TGSI_OPCODE_PUSHA: + break; + case TGSI_OPCODE_POPA: + break; + case TGSI_OPCODE_CEIL: + break; + case TGSI_OPCODE_I2F: + break; + case TGSI_OPCODE_NOT: + break; + case TGSI_OPCODE_TRUNC: { + out = instr->trunc(inputs[0]); + } + break; + case TGSI_OPCODE_SHL: + break; + case TGSI_OPCODE_SHR: + break; + case TGSI_OPCODE_AND: + break; + case TGSI_OPCODE_OR: + break; + case TGSI_OPCODE_MOD: + break; + case TGSI_OPCODE_XOR: + break; + case TGSI_OPCODE_SAD: + break; + case TGSI_OPCODE_TXF: + break; + case TGSI_OPCODE_TXQ: + break; + case TGSI_OPCODE_CONT: + break; + case TGSI_OPCODE_EMIT: + break; + case TGSI_OPCODE_ENDPRIM: + break; + case TGSI_OPCODE_BGNLOOP2: { + instr->beginLoop(); + storage->setCurrentBlock(instr->currentBlock()); + return; + } + break; + case TGSI_OPCODE_BGNSUB: { + instr->bgnSub(instno); + storage->setCurrentBlock(instr->currentBlock()); + storage->pushTemps(); + return; + } + break; + case TGSI_OPCODE_ENDLOOP2: { + instr->endLoop(); + storage->setCurrentBlock(instr->currentBlock()); + return; + } + break; + case TGSI_OPCODE_ENDSUB: { + instr->endSub(); + storage->setCurrentBlock(instr->currentBlock()); + storage->popArguments(); + storage->popTemps(); + return; + } + break; + case TGSI_OPCODE_NOISE1: + break; + case TGSI_OPCODE_NOISE2: + break; + case TGSI_OPCODE_NOISE3: + break; + case TGSI_OPCODE_NOISE4: + break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_M4X3: + break; + case TGSI_OPCODE_M3X4: + break; + case TGSI_OPCODE_M3X3: + break; + case TGSI_OPCODE_M3X2: + break; + case TGSI_OPCODE_CALLNZ: + break; + case TGSI_OPCODE_IFC: + break; + case TGSI_OPCODE_BREAKC: + break; + case TGSI_OPCODE_KIL: { + out = instr->kil(inputs[0]); + storage->setKilElement(out); + return; + } + break; + case TGSI_OPCODE_END: + instr->end(); + return; + break; + default: + fprintf(stderr, "ERROR: Unknown opcode %d\n", + inst->Instruction.Opcode); + assert(0); + break; + } + + if (!out) { + fprintf(stderr, "ERROR: unsupported opcode %d\n", + inst->Instruction.Opcode); + assert(!"Unsupported opcode"); + } + + /* # not sure if we need this */ + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + /*TXT( "_SAT" );*/ + break; + case TGSI_SAT_MINUS_PLUS_ONE: + /*TXT( "_SAT[-1,1]" );*/ + break; + default: + assert( 0 ); + } + + /* store results */ + for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + + if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + // FIXME we should not be generating elements for temporaries, this creates useless memory writes + } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { + storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { + storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else { + fprintf(stderr, "ERROR: unsupported LLVM destination!"); + assert(!"wrong destination"); + } + } +} + + +static void +translate_instructionir(llvm::Module *module, + StorageSoa *storage, + InstructionsSoa *instr, + struct tgsi_full_instruction *inst, + struct tgsi_full_instruction *fi, + unsigned instno) +{ + std::vector< std::vector<llvm::Value*> > inputs(inst->Instruction.NumSrcRegs); + + for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + std::vector<llvm::Value*> val; + llvm::Value *indIdx = 0; + int swizzle = swizzleInt(src); + + if (src->SrcRegister.Indirect) { + indIdx = storage->addrElement(src->SrcRegisterInd.Index); + } + val = storage->load((enum tgsi_file_type)src->SrcRegister.File, + src->SrcRegister.Index, swizzle, instr->getIRBuilder(), indIdx); + + inputs[i] = val; + } + + std::vector<llvm::Value*> out(4); + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: { + out = instr->arl(inputs[0]); + } + break; + case TGSI_OPCODE_MOV: { + out = inputs[0]; + } + break; + case TGSI_OPCODE_LIT: { + out = instr->lit(inputs[0]); + } + break; + case TGSI_OPCODE_RCP: { + } + break; + case TGSI_OPCODE_RSQ: { + out = instr->rsq(inputs[0]); + } + break; + case TGSI_OPCODE_EXP: + break; + case TGSI_OPCODE_LOG: + break; + case TGSI_OPCODE_MUL: { + out = instr->mul(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_ADD: { + out = instr->add(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP3: { + out = instr->dp3(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP4: { + out = instr->dp4(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DST: { + } + break; + case TGSI_OPCODE_MIN: { + out = instr->min(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MAX: { + out = instr->max(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SLT: { + out = instr->slt(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SGE: { + } + break; + case TGSI_OPCODE_MAD: { + out = instr->madd(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_SUB: { + out = instr->sub(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_LERP: { + } + break; + case TGSI_OPCODE_CND: + break; + case TGSI_OPCODE_CND0: + break; + case TGSI_OPCODE_DOT2ADD: + break; + case TGSI_OPCODE_INDEX: + break; + case TGSI_OPCODE_NEGATE: + break; + case TGSI_OPCODE_FRAC: { + } + break; + case TGSI_OPCODE_CLAMP: + break; + case TGSI_OPCODE_FLOOR: { + } + break; + case TGSI_OPCODE_ROUND: + break; + case TGSI_OPCODE_EXPBASE2: { + } + break; + case TGSI_OPCODE_LOGBASE2: { + } + break; + case TGSI_OPCODE_POWER: { + out = instr->pow(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_CROSSPRODUCT: { + } + break; + case TGSI_OPCODE_MULTIPLYMATRIX: + break; + case TGSI_OPCODE_ABS: { + out = instr->abs(inputs[0]); + } + break; + case TGSI_OPCODE_RCC: + break; + case TGSI_OPCODE_DPH: { + } + break; + case TGSI_OPCODE_COS: { + } + break; + case TGSI_OPCODE_DDX: + break; + case TGSI_OPCODE_DDY: + break; + case TGSI_OPCODE_KILP: + break; + case TGSI_OPCODE_PK2H: + break; + case TGSI_OPCODE_PK2US: + break; + case TGSI_OPCODE_PK4B: + break; + case TGSI_OPCODE_PK4UB: + break; + case TGSI_OPCODE_RFL: + break; + case TGSI_OPCODE_SEQ: + break; + case TGSI_OPCODE_SFL: + break; + case TGSI_OPCODE_SGT: { + } + break; + case TGSI_OPCODE_SIN: { + } + break; + case TGSI_OPCODE_SLE: + break; + case TGSI_OPCODE_SNE: + break; + case TGSI_OPCODE_STR: + break; + case TGSI_OPCODE_TEX: + break; + case TGSI_OPCODE_TXD: + break; + case TGSI_OPCODE_UP2H: + break; + case TGSI_OPCODE_UP2US: + break; + case TGSI_OPCODE_UP4B: + break; + case TGSI_OPCODE_UP4UB: + break; + case TGSI_OPCODE_X2D: + break; + case TGSI_OPCODE_ARA: + break; + case TGSI_OPCODE_ARR: + break; + case TGSI_OPCODE_BRA: + break; + case TGSI_OPCODE_CAL: { + } + break; + case TGSI_OPCODE_RET: { + } + break; + case TGSI_OPCODE_SSG: + break; + case TGSI_OPCODE_CMP: { + } + break; + case TGSI_OPCODE_SCS: { + } + break; + case TGSI_OPCODE_TXB: + break; + case TGSI_OPCODE_NRM: + break; + case TGSI_OPCODE_DIV: + break; + case TGSI_OPCODE_DP2: + break; + case TGSI_OPCODE_TXL: + break; + case TGSI_OPCODE_BRK: { + } + break; + case TGSI_OPCODE_IF: { + } + break; + case TGSI_OPCODE_LOOP: + break; + case TGSI_OPCODE_REP: + break; + case TGSI_OPCODE_ELSE: { + } + break; + case TGSI_OPCODE_ENDIF: { + } + break; + case TGSI_OPCODE_ENDLOOP: + break; + case TGSI_OPCODE_ENDREP: + break; + case TGSI_OPCODE_PUSHA: + break; + case TGSI_OPCODE_POPA: + break; + case TGSI_OPCODE_CEIL: + break; + case TGSI_OPCODE_I2F: + break; + case TGSI_OPCODE_NOT: + break; + case TGSI_OPCODE_TRUNC: { + } + break; + case TGSI_OPCODE_SHL: + break; + case TGSI_OPCODE_SHR: + break; + case TGSI_OPCODE_AND: + break; + case TGSI_OPCODE_OR: + break; + case TGSI_OPCODE_MOD: + break; + case TGSI_OPCODE_XOR: + break; + case TGSI_OPCODE_SAD: + break; + case TGSI_OPCODE_TXF: + break; + case TGSI_OPCODE_TXQ: + break; + case TGSI_OPCODE_CONT: + break; + case TGSI_OPCODE_EMIT: + break; + case TGSI_OPCODE_ENDPRIM: + break; + case TGSI_OPCODE_BGNLOOP2: { + } + break; + case TGSI_OPCODE_BGNSUB: { + } + break; + case TGSI_OPCODE_ENDLOOP2: { + } + break; + case TGSI_OPCODE_ENDSUB: { + } + break; + case TGSI_OPCODE_NOISE1: + break; + case TGSI_OPCODE_NOISE2: + break; + case TGSI_OPCODE_NOISE3: + break; + case TGSI_OPCODE_NOISE4: + break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_M4X3: + break; + case TGSI_OPCODE_M3X4: + break; + case TGSI_OPCODE_M3X3: + break; + case TGSI_OPCODE_M3X2: + break; + case TGSI_OPCODE_NRM4: + break; + case TGSI_OPCODE_CALLNZ: + break; + case TGSI_OPCODE_IFC: + break; + case TGSI_OPCODE_BREAKC: + break; + case TGSI_OPCODE_KIL: { + } + break; + case TGSI_OPCODE_END: + instr->end(); + return; + break; + default: + fprintf(stderr, "ERROR: Unknown opcode %d\n", + inst->Instruction.Opcode); + assert(0); + break; + } + + if (!out[0]) { + fprintf(stderr, "ERROR: unsupported opcode %d\n", + inst->Instruction.Opcode); + assert(!"Unsupported opcode"); + } + + /* store results */ + for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + storage->store((enum tgsi_file_type)dst->DstRegister.File, + dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } +} + +llvm::Module * +tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens) +{ + llvm::Module *mod = new Module("shader"); + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + unsigned instno = 0; + Function* shader = mod->getFunction("execute_shader"); + std::ostringstream stream; + if (ir->type == GALLIVM_VS) { + stream << "vs_shader"; + } else { + stream << "fs_shader"; + } + stream << ir->id; + std::string func_name = stream.str(); + shader->setName(func_name.c_str()); + + Function::arg_iterator args = shader->arg_begin(); + Value *ptr_INPUT = args++; + ptr_INPUT->setName("input"); + + BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); + + tgsi_parse_init(&parse, tokens); + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + Storage storage(label_entry, ptr_INPUT); + Instructions instr(mod, shader, label_entry, &storage); + while(!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + translate_declaration(ir, mod, &storage, + &parse.FullToken.FullDeclaration, + &fd); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + translate_immediate(&storage, + &parse.FullToken.FullImmediate); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + translate_instruction(mod, &storage, &instr, + &parse.FullToken.FullInstruction, + &fi, instno); + ++instno; + break; + + default: + assert(0); + } + } + + tgsi_parse_free(&parse); + + ir->num_consts = storage.numConsts(); + return mod; +} + +llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, + const struct tgsi_token *tokens) +{ + llvm::Module *mod = new Module("shader"); + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + unsigned instno = 0; + std::ostringstream stream; + if (ir->type == GALLIVM_VS) { + stream << "vs_shader"; + } else { + stream << "fs_shader"; + } + //stream << ir->id; + std::string func_name = stream.str(); + Function *shader = llvm::cast<Function>(mod->getOrInsertFunction( + func_name.c_str(), + vertexShaderFunctionType())); + + Function::arg_iterator args = shader->arg_begin(); + Value *input = args++; + input->setName("inputs"); + Value *output = args++; + output->setName("outputs"); + Value *consts = args++; + consts->setName("consts"); + Value *temps = args++; + temps->setName("temps"); + + BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); + + tgsi_parse_init(&parse, tokens); + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + + StorageSoa storage(label_entry, input, output, consts, temps); + InstructionsSoa instr(mod, shader, label_entry, &storage); + + while(!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + translate_declarationir(ir, mod, &storage, + &parse.FullToken.FullDeclaration, + &fd); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + translate_immediateir(&storage, + &parse.FullToken.FullImmediate); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + storage.declareImmediates(); + translate_instructionir(mod, &storage, &instr, + &parse.FullToken.FullInstruction, + &fi, instno); + ++instno; + break; + + default: + assert(0); + } + } + + tgsi_parse_free(&parse); + + return mod; +} diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.h b/src/gallium/auxiliary/gallivm/tgsitollvm.h new file mode 100644 index 0000000000..7ada04d629 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.h @@ -0,0 +1,20 @@ +#ifndef TGSITOLLVM_H +#define TGSITOLLVM_H + + +namespace llvm { + class Module; +} + +struct gallivm_ir; +struct tgsi_token; + + +llvm::Module * tgsi_to_llvm(struct gallivm_ir *ir, + const struct tgsi_token *tokens); + + +llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, + const struct tgsi_token *tokens); + +#endif diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile new file mode 100644 index 0000000000..f9b39d9ce0 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -0,0 +1,22 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = pipebuffer + +C_SOURCES = \ + pb_buffer_fenced.c \ + pb_buffer_malloc.c \ + pb_bufmgr_alt.c \ + pb_bufmgr_cache.c \ + pb_bufmgr_debug.c \ + pb_bufmgr_fenced.c \ + pb_bufmgr_mm.c \ + pb_bufmgr_pool.c \ + pb_bufmgr_slab.c \ + pb_validate.c \ + pb_winsys.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript new file mode 100644 index 0000000000..56a40dda0d --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -0,0 +1,19 @@ +Import('*') + +pipebuffer = env.ConvenienceLibrary( + target = 'pipebuffer', + source = [ + 'pb_buffer_fenced.c', + 'pb_buffer_malloc.c', + 'pb_bufmgr_alt.c', + 'pb_bufmgr_cache.c', + 'pb_bufmgr_debug.c', + 'pb_bufmgr_fenced.c', + 'pb_bufmgr_mm.c', + 'pb_bufmgr_pool.c', + 'pb_bufmgr_slab.c', + 'pb_validate.c', + 'pb_winsys.c', + ]) + +auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h new file mode 100644 index 0000000000..8505d333bd --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -0,0 +1,245 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Generic code for buffers. + * + * Behind a pipe buffle handle there can be DMA buffers, client (or user) + * buffers, regular malloced buffers, etc. This file provides an abstract base + * buffer handle that allows the driver to cope with all those kinds of buffers + * in a more flexible way. + * + * There is no obligation of a winsys driver to use this library. And a pipe + * driver should be completly agnostic about it. + * + * \author Jos� Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef PB_BUFFER_H_ +#define PB_BUFFER_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pb_vtbl; + +/** + * Buffer description. + * + * Used when allocating the buffer. + */ +struct pb_desc +{ + unsigned alignment; + unsigned usage; +}; + + +/** + * Base class for all pb_* buffers. + */ +struct pb_buffer +{ + struct pipe_buffer base; + + /** + * Pointer to the virtual function table. + * + * Avoid accessing this table directly. Use the inline functions below + * instead to avoid mistakes. + */ + const struct pb_vtbl *vtbl; +}; + + +/** + * Virtual function table for the buffer storage operations. + * + * Note that creation is not done through this table. + */ +struct pb_vtbl +{ + void (*destroy)( struct pb_buffer *buf ); + + /** + * Map the entire data store of a buffer object into the client's address. + * flags is bitmask of PIPE_BUFFER_FLAG_READ/WRITE. + */ + void *(*map)( struct pb_buffer *buf, + unsigned flags ); + + void (*unmap)( struct pb_buffer *buf ); + + /** + * Get the base buffer and the offset. + * + * A buffer can be subdivided in smaller buffers. This method should return + * the underlaying buffer, and the relative offset. + * + * Buffers without an underlaying base buffer should return themselves, with + * a zero offset. + * + * Note that this will increase the reference count of the base buffer. + */ + void (*get_base_buffer)( struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset ); +}; + + +static INLINE struct pipe_buffer * +pb_pipe_buffer( struct pb_buffer *pbuf ) +{ + assert(pbuf); + return &pbuf->base; +} + + +static INLINE struct pb_buffer * +pb_buffer( struct pipe_buffer *buf ) +{ + assert(buf); + /* Could add a magic cookie check on debug builds. + */ + return (struct pb_buffer *)buf; +} + + +/* Accessor functions for pb->vtbl: + */ +static INLINE void * +pb_map(struct pb_buffer *buf, + unsigned flags) +{ + assert(buf); + if(!buf) + return NULL; + return buf->vtbl->map(buf, flags); +} + + +static INLINE void +pb_unmap(struct pb_buffer *buf) +{ + assert(buf); + if(!buf) + return; + buf->vtbl->unmap(buf); +} + + +static INLINE void +pb_get_base_buffer( struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset ) +{ + assert(buf); + if(!buf) { + base_buf = NULL; + offset = 0; + return; + } + buf->vtbl->get_base_buffer(buf, base_buf, offset); +} + + +static INLINE void +pb_destroy(struct pb_buffer *buf) +{ + assert(buf); + if(!buf) + return; + buf->vtbl->destroy(buf); +} + + +/* XXX: thread safety issues! + */ +static INLINE void +pb_reference(struct pb_buffer **dst, + struct pb_buffer *src) +{ + if (src) + src->base.refcount++; + + if (*dst && --(*dst)->base.refcount == 0) + pb_destroy( *dst ); + + *dst = src; +} + + +/** + * Utility function to check whether the provided alignment is consistent with + * the requested or not. + */ +static INLINE boolean +pb_check_alignment(size_t requested, size_t provided) +{ + return requested <= provided && (provided % requested) == 0 ? TRUE : FALSE; +} + + +/** + * Utility function to check whether the provided alignment is consistent with + * the requested or not. + */ +static INLINE boolean +pb_check_usage(unsigned requested, unsigned provided) +{ + return (requested & provided) == requested ? TRUE : FALSE; +} + + +/** + * Malloc-based buffer to store data that can't be used by the graphics + * hardware. + */ +struct pb_buffer * +pb_malloc_buffer_create(size_t size, + const struct pb_desc *desc); + + +void +pb_init_winsys(struct pipe_winsys *winsys); + + +#ifdef __cplusplus +} +#endif + +#endif /*PB_BUFFER_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c new file mode 100644 index 0000000000..17b2781052 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -0,0 +1,462 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Implementation of fenced buffers. + * + * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com> + * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> + */ + + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) +#include <unistd.h> +#include <sched.h> +#endif + +#include "pipe/p_compiler.h" +#include "pipe/p_error.h" +#include "pipe/p_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_thread.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" + +#include "pb_buffer.h" +#include "pb_buffer_fenced.h" + + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + +#define PIPE_BUFFER_USAGE_CPU_READ_WRITE \ + ( PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE ) +#define PIPE_BUFFER_USAGE_GPU_READ_WRITE \ + ( PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ) +#define PIPE_BUFFER_USAGE_WRITE \ + ( PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE ) + + +struct fenced_buffer_list +{ + pipe_mutex mutex; + + struct pipe_winsys *winsys; + + size_t numDelayed; + + struct list_head delayed; +}; + + +/** + * Wrapper around a pipe buffer which adds fencing and reference counting. + */ +struct fenced_buffer +{ + struct pb_buffer base; + + struct pb_buffer *buffer; + + /* FIXME: protect access with mutex */ + + /** + * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current + * buffer usage. + */ + unsigned flags; + + unsigned mapcount; + struct pipe_fence_handle *fence; + + struct list_head head; + struct fenced_buffer_list *list; +}; + + +static INLINE struct fenced_buffer * +fenced_buffer(struct pb_buffer *buf) +{ + assert(buf); + assert(buf->vtbl == &fenced_buffer_vtbl); + return (struct fenced_buffer *)buf; +} + + +static INLINE void +_fenced_buffer_add(struct fenced_buffer *fenced_buf) +{ + struct fenced_buffer_list *fenced_list = fenced_buf->list; + + assert(fenced_buf->base.base.refcount); + assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); + assert(fenced_buf->fence); + + assert(!fenced_buf->head.prev); + assert(!fenced_buf->head.next); + LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed); + ++fenced_list->numDelayed; +} + + +/** + * Actually destroy the buffer. + */ +static INLINE void +_fenced_buffer_destroy(struct fenced_buffer *fenced_buf) +{ + assert(!fenced_buf->base.base.refcount); + assert(!fenced_buf->fence); + pb_reference(&fenced_buf->buffer, NULL); + FREE(fenced_buf); +} + + +static INLINE void +_fenced_buffer_remove(struct fenced_buffer_list *fenced_list, + struct fenced_buffer *fenced_buf) +{ + struct pipe_winsys *winsys = fenced_list->winsys; + + assert(fenced_buf->fence); + assert(fenced_buf->list == fenced_list); + + winsys->fence_reference(winsys, &fenced_buf->fence, NULL); + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; + + assert(fenced_buf->head.prev); + assert(fenced_buf->head.next); + LIST_DEL(&fenced_buf->head); +#ifdef DEBUG + fenced_buf->head.prev = NULL; + fenced_buf->head.next = NULL; +#endif + + assert(fenced_list->numDelayed); + --fenced_list->numDelayed; + + if(!fenced_buf->base.base.refcount) + _fenced_buffer_destroy(fenced_buf); +} + + +static INLINE enum pipe_error +_fenced_buffer_finish(struct fenced_buffer *fenced_buf) +{ + struct fenced_buffer_list *fenced_list = fenced_buf->list; + struct pipe_winsys *winsys = fenced_list->winsys; + +#if 0 + debug_warning("waiting for GPU"); +#endif + + assert(fenced_buf->fence); + if(fenced_buf->fence) { + if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0) { + return PIPE_ERROR; + } + /* Remove from the fenced list */ + /* TODO: remove consequents */ + _fenced_buffer_remove(fenced_list, fenced_buf); + } + + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; + return PIPE_OK; +} + + +/** + * Free as many fenced buffers from the list head as possible. + */ +static void +_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, + int wait) +{ + struct pipe_winsys *winsys = fenced_list->winsys; + struct list_head *curr, *next; + struct fenced_buffer *fenced_buf; + struct pipe_fence_handle *prev_fence = NULL; + + curr = fenced_list->delayed.next; + next = curr->next; + while(curr != &fenced_list->delayed) { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + + if(fenced_buf->fence != prev_fence) { + int signaled; + if (wait) + signaled = winsys->fence_finish(winsys, fenced_buf->fence, 0); + else + signaled = winsys->fence_signalled(winsys, fenced_buf->fence, 0); + if (signaled != 0) + break; + prev_fence = fenced_buf->fence; + } + else { + assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); + } + + _fenced_buffer_remove(fenced_list, fenced_buf); + + curr = next; + next = curr->next; + } +} + + +static void +fenced_buffer_destroy(struct pb_buffer *buf) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_buffer_list *fenced_list = fenced_buf->list; + + pipe_mutex_lock(fenced_list->mutex); + assert(fenced_buf->base.base.refcount == 0); + if (fenced_buf->fence) { + struct pipe_winsys *winsys = fenced_list->winsys; + if(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0) { + struct list_head *curr, *prev; + curr = &fenced_buf->head; + prev = curr->prev; + do { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); + _fenced_buffer_remove(fenced_list, fenced_buf); + curr = prev; + prev = curr->prev; + } while (curr != &fenced_list->delayed); + } + else { + /* delay destruction */ + } + } + else { + _fenced_buffer_destroy(fenced_buf); + } + pipe_mutex_unlock(fenced_list->mutex); +} + + +static void * +fenced_buffer_map(struct pb_buffer *buf, + unsigned flags) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + void *map; + + assert(!(flags & ~PIPE_BUFFER_USAGE_CPU_READ_WRITE)); + flags &= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + + /* Check for GPU read/write access */ + if(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) { + /* Wait for the GPU to finish writing */ + _fenced_buffer_finish(fenced_buf); + } + +#if 0 + /* Check for CPU write access (read is OK) */ + if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { + /* this is legal -- just for debugging */ + debug_warning("concurrent CPU writes"); + } +#endif + + map = pb_map(fenced_buf->buffer, flags); + if(map) { + ++fenced_buf->mapcount; + fenced_buf->flags |= flags; + } + + return map; +} + + +static void +fenced_buffer_unmap(struct pb_buffer *buf) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + assert(fenced_buf->mapcount); + if(fenced_buf->mapcount) { + pb_unmap(fenced_buf->buffer); + --fenced_buf->mapcount; + if(!fenced_buf->mapcount) + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; + } +} + + +static void +fenced_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); +} + + +const struct pb_vtbl +fenced_buffer_vtbl = { + fenced_buffer_destroy, + fenced_buffer_map, + fenced_buffer_unmap, + fenced_buffer_get_base_buffer +}; + + +struct pb_buffer * +fenced_buffer_create(struct fenced_buffer_list *fenced_list, + struct pb_buffer *buffer) +{ + struct fenced_buffer *buf; + + if(!buffer) + return NULL; + + buf = CALLOC_STRUCT(fenced_buffer); + if(!buf) { + pb_reference(&buffer, NULL); + return NULL; + } + + buf->base.base.refcount = 1; + buf->base.base.alignment = buffer->base.alignment; + buf->base.base.usage = buffer->base.usage; + buf->base.base.size = buffer->base.size; + + buf->base.vtbl = &fenced_buffer_vtbl; + buf->buffer = buffer; + buf->list = fenced_list; + + return &buf->base; +} + + +void +buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct fenced_buffer *fenced_buf; + struct fenced_buffer_list *fenced_list; + struct pipe_winsys *winsys; + /* FIXME: receive this as a parameter */ + unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0; + + /* This is a public function, so be extra cautious with the buffer passed, + * as happens frequently to receive null buffers, or pointer to buffers + * other than fenced buffers. */ + assert(buf); + if(!buf) + return; + assert(buf->vtbl == &fenced_buffer_vtbl); + if(buf->vtbl != &fenced_buffer_vtbl) + return; + + fenced_buf = fenced_buffer(buf); + fenced_list = fenced_buf->list; + winsys = fenced_list->winsys; + + if(!fence || fence == fenced_buf->fence) { + /* Handle the same fence case specially, not only because it is a fast + * path, but mostly to avoid serializing two writes with the same fence, + * as that would bring the hardware down to synchronous operation without + * any benefit. + */ + fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; + return; + } + + pipe_mutex_lock(fenced_list->mutex); + if (fenced_buf->fence) + _fenced_buffer_remove(fenced_list, fenced_buf); + if (fence) { + winsys->fence_reference(winsys, &fenced_buf->fence, fence); + fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; + _fenced_buffer_add(fenced_buf); + } + pipe_mutex_unlock(fenced_list->mutex); +} + + +struct fenced_buffer_list * +fenced_buffer_list_create(struct pipe_winsys *winsys) +{ + struct fenced_buffer_list *fenced_list; + + fenced_list = CALLOC_STRUCT(fenced_buffer_list); + if (!fenced_list) + return NULL; + + fenced_list->winsys = winsys; + + LIST_INITHEAD(&fenced_list->delayed); + + fenced_list->numDelayed = 0; + + pipe_mutex_init(fenced_list->mutex); + + return fenced_list; +} + + +void +fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, + int wait) +{ + pipe_mutex_lock(fenced_list->mutex); + _fenced_buffer_list_check_free(fenced_list, wait); + pipe_mutex_unlock(fenced_list->mutex); +} + + +void +fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) +{ + pipe_mutex_lock(fenced_list->mutex); + + /* Wait on outstanding fences */ + while (fenced_list->numDelayed) { + pipe_mutex_unlock(fenced_list->mutex); +#if defined(PIPE_OS_LINUX) + sched_yield(); +#endif + _fenced_buffer_list_check_free(fenced_list, 1); + pipe_mutex_lock(fenced_list->mutex); + } + + pipe_mutex_unlock(fenced_list->mutex); + + FREE(fenced_list); +} + + diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h new file mode 100644 index 0000000000..50d5891bdb --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h @@ -0,0 +1,126 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Buffer fencing. + * + * "Fenced buffers" is actually a misnomer. They should be referred as + * "fenceable buffers", i.e, buffers that can be fenced, but I couldn't find + * the word "fenceable" in the dictionary. + * + * A "fenced buffer" is a decorator around a normal buffer, which adds two + * special properties: + * - the ability for the destruction to be delayed by a fence; + * - reference counting. + * + * Usually DMA buffers have a life-time that will extend the life-time of its + * handle. The end-of-life is dictated by the fence signalling. + * + * Between the handle's destruction, and the fence signalling, the buffer is + * stored in a fenced buffer list. + * + * \author José Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef PB_BUFFER_FENCED_H_ +#define PB_BUFFER_FENCED_H_ + + +#include "pipe/p_debug.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pipe_winsys; +struct pipe_buffer; +struct pipe_fence_handle; + + +/** + * List of buffers which are awaiting fence signalling. + */ +struct fenced_buffer_list; + + +/** + * The fenced buffer's virtual function table. + * + * NOTE: Made public for debugging purposes. + */ +extern const struct pb_vtbl fenced_buffer_vtbl; + + +/** + * Create a fenced buffer list. + * + * See also fenced_bufmgr_create for a more convenient way to use this. + */ +struct fenced_buffer_list * +fenced_buffer_list_create(struct pipe_winsys *winsys); + + +/** + * Walk the fenced buffer list to check and free signalled buffers. + */ +void +fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, + int wait); + +void +fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list); + + +/** + * Wrap a buffer in a fenced buffer. + * + * NOTE: this will not increase the buffer reference count. + */ +struct pb_buffer * +fenced_buffer_create(struct fenced_buffer_list *fenced, + struct pb_buffer *buffer); + + +/** + * Set a buffer's fence. + * + * NOTE: Although it takes a generic pb_buffer argument, it will fail + * on everything but buffers returned by fenced_buffer_create. + */ +void +buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence); + + +#ifdef __cplusplus +} +#endif + +#endif /*PB_BUFFER_FENCED_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c new file mode 100644 index 0000000000..1bf22a2ec0 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -0,0 +1,166 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Implementation of malloc-based buffers to store data that can't be processed + * by the hardware. + * + * \author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "pipe/p_debug.h" +#include "util/u_memory.h" +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +struct malloc_buffer +{ + struct pb_buffer base; + void *data; +}; + + +extern const struct pb_vtbl malloc_buffer_vtbl; + +static INLINE struct malloc_buffer * +malloc_buffer(struct pb_buffer *buf) +{ + assert(buf); + assert(buf->vtbl == &malloc_buffer_vtbl); + return (struct malloc_buffer *)buf; +} + + +static void +malloc_buffer_destroy(struct pb_buffer *buf) +{ + align_free(malloc_buffer(buf)->data); + FREE(buf); +} + + +static void * +malloc_buffer_map(struct pb_buffer *buf, + unsigned flags) +{ + return malloc_buffer(buf)->data; +} + + +static void +malloc_buffer_unmap(struct pb_buffer *buf) +{ + /* No-op */ +} + + +static void +malloc_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + *base_buf = buf; + *offset = 0; +} + + +const struct pb_vtbl +malloc_buffer_vtbl = { + malloc_buffer_destroy, + malloc_buffer_map, + malloc_buffer_unmap, + malloc_buffer_get_base_buffer +}; + + +struct pb_buffer * +pb_malloc_buffer_create(size_t size, + const struct pb_desc *desc) +{ + struct malloc_buffer *buf; + + /* TODO: do a single allocation */ + + buf = CALLOC_STRUCT(malloc_buffer); + if(!buf) + return NULL; + + buf->base.base.refcount = 1; + buf->base.base.alignment = desc->alignment; + buf->base.base.usage = desc->usage; + buf->base.base.size = size; + buf->base.vtbl = &malloc_buffer_vtbl; + + buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment); + if(!buf->data) { + FREE(buf); + return NULL; + } + + return &buf->base; +} + + +static struct pb_buffer * +pb_malloc_bufmgr_create_buffer(struct pb_manager *mgr, + size_t size, + const struct pb_desc *desc) +{ + return pb_malloc_buffer_create(size, desc); +} + + +static void +pb_malloc_bufmgr_flush(struct pb_manager *mgr) +{ + /* No-op */ +} + + +static void +pb_malloc_bufmgr_destroy(struct pb_manager *mgr) +{ + /* No-op */ +} + + +static struct pb_manager +pb_malloc_bufmgr = { + pb_malloc_bufmgr_destroy, + pb_malloc_bufmgr_create_buffer, + pb_malloc_bufmgr_flush +}; + + +struct pb_manager * +pb_malloc_bufmgr_create(void) +{ + return &pb_malloc_bufmgr; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h new file mode 100644 index 0000000000..cafbee045a --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -0,0 +1,198 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Buffer management. + * + * A buffer manager does only one basic thing: it creates buffers. Actually, + * "buffer factory" would probably a more accurate description. + * + * You can chain buffer managers so that you can have a finer grained memory + * management and pooling. + * + * For example, for a simple batch buffer manager you would chain: + * - the native buffer manager, which provides DMA memory from the graphics + * memory space; + * - the pool buffer manager, which keep around a pool of equally sized buffers + * to avoid latency associated with the native buffer manager; + * - the fenced buffer manager, which will delay buffer destruction until the + * the moment the card finishing processing it. + * + * \author José Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef PB_BUFMGR_H_ +#define PB_BUFMGR_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_error.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pb_desc; +struct pipe_buffer; +struct pipe_winsys; + + +/** + * Abstract base class for all buffer managers. + */ +struct pb_manager +{ + void + (*destroy)( struct pb_manager *mgr ); + + struct pb_buffer * + (*create_buffer)( struct pb_manager *mgr, + size_t size, + const struct pb_desc *desc); + + /** + * Flush all temporary-held buffers. + * + * Used mostly to aid debugging memory issues or to clean up resources when + * the drivers are long lived. + */ + void + (*flush)( struct pb_manager *mgr ); +}; + + +/** + * Malloc buffer provider. + * + * Simple wrapper around pb_malloc_buffer_create for convenience. + */ +struct pb_manager * +pb_malloc_bufmgr_create(void); + + +/** + * Static buffer pool sub-allocator. + * + * Manages the allocation of equally sized buffers. It does so by allocating + * a single big buffer and divide it equally sized buffers. + * + * It is meant to manage the allocation of batch buffer pools. + */ +struct pb_manager * +pool_bufmgr_create(struct pb_manager *provider, + size_t n, size_t size, + const struct pb_desc *desc); + + +/** + * Static sub-allocator based the old memory manager. + * + * It managers buffers of different sizes. It does so by allocating a buffer + * with the size of the heap, and then using the old mm memory manager to manage + * that heap. + */ +struct pb_manager * +mm_bufmgr_create(struct pb_manager *provider, + size_t size, size_t align2); + +/** + * Same as mm_bufmgr_create. + * + * Buffer will be release when the manager is destroyed. + */ +struct pb_manager * +mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, + size_t size, size_t align2); + + +/** + * Slab sub-allocator. + */ +struct pb_manager * +pb_slab_manager_create(struct pb_manager *provider, + size_t bufSize, + size_t slabSize, + const struct pb_desc *desc); + +/** + * Allow a range of buffer size, by aggregating multiple slabs sub-allocators + * with different bucket sizes. + */ +struct pb_manager * +pb_slab_range_manager_create(struct pb_manager *provider, + size_t minBufSize, + size_t maxBufSize, + size_t slabSize, + const struct pb_desc *desc); + + +/** + * Time-based buffer cache. + * + * This manager keeps a cache of destroyed buffers during a time interval. + */ +struct pb_manager * +pb_cache_manager_create(struct pb_manager *provider, + unsigned usecs); + + +/** + * Fenced buffer manager. + * + * This manager is just meant for convenience. It wraps the buffers returned + * by another manager in fenced buffers, so that + * + * NOTE: the buffer manager that provides the buffers will be destroyed + * at the same time. + */ +struct pb_manager * +fenced_bufmgr_create(struct pb_manager *provider, + struct pipe_winsys *winsys); + + +struct pb_manager * +pb_alt_manager_create(struct pb_manager *provider1, + struct pb_manager *provider2); + + +/** + * Debug buffer manager to detect buffer under- and overflows. + * + * Band size should be a multiple of the largest alignment + */ +struct pb_manager * +pb_debug_manager_create(struct pb_manager *provider, size_t band_size); + + +#ifdef __cplusplus +} +#endif + +#endif /*PB_BUFMGR_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c new file mode 100644 index 0000000000..c956924cc7 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c @@ -0,0 +1,120 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Allocate buffers from two alternative buffer providers. + * + * \author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "util/u_memory.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +struct pb_alt_manager +{ + struct pb_manager base; + + struct pb_manager *provider1; + struct pb_manager *provider2; +}; + + +static INLINE struct pb_alt_manager * +pb_alt_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_alt_manager *)mgr; +} + + +static struct pb_buffer * +pb_alt_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_alt_manager *mgr = pb_alt_manager(_mgr); + struct pb_buffer *buf; + + buf = mgr->provider1->create_buffer(mgr->provider1, size, desc); + if(buf) + return buf; + + buf = mgr->provider2->create_buffer(mgr->provider2, size, desc); + return buf; +} + + +static void +pb_alt_manager_flush(struct pb_manager *_mgr) +{ + struct pb_alt_manager *mgr = pb_alt_manager(_mgr); + + assert(mgr->provider1->flush); + if(mgr->provider1->flush) + mgr->provider1->flush(mgr->provider1); + + assert(mgr->provider2->flush); + if(mgr->provider2->flush) + mgr->provider2->flush(mgr->provider2); +} + + +static void +pb_alt_manager_destroy(struct pb_manager *mgr) +{ + FREE(mgr); +} + + +struct pb_manager * +pb_alt_manager_create(struct pb_manager *provider1, + struct pb_manager *provider2) +{ + struct pb_alt_manager *mgr; + + if(!provider1 || !provider2) + return NULL; + + mgr = CALLOC_STRUCT(pb_alt_manager); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_alt_manager_destroy; + mgr->base.create_buffer = pb_alt_manager_create_buffer; + mgr->base.flush = pb_alt_manager_flush; + mgr->provider1 = provider1; + mgr->provider2 = provider2; + + return &mgr->base; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c new file mode 100644 index 0000000000..8f118874ec --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -0,0 +1,364 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Buffer cache. + * + * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com> + * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_thread.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" +#include "util/u_time.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct pb_cache_manager; + + +/** + * Wrapper around a pipe buffer which adds delayed destruction. + */ +struct pb_cache_buffer +{ + struct pb_buffer base; + + struct pb_buffer *buffer; + struct pb_cache_manager *mgr; + + /** Caching time interval */ + struct util_time start, end; + + struct list_head head; +}; + + +struct pb_cache_manager +{ + struct pb_manager base; + + struct pb_manager *provider; + unsigned usecs; + + pipe_mutex mutex; + + struct list_head delayed; + size_t numDelayed; +}; + + +static INLINE struct pb_cache_buffer * +pb_cache_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pb_cache_buffer *)buf; +} + + +static INLINE struct pb_cache_manager * +pb_cache_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_cache_manager *)mgr; +} + + +/** + * Actually destroy the buffer. + */ +static INLINE void +_pb_cache_buffer_destroy(struct pb_cache_buffer *buf) +{ + struct pb_cache_manager *mgr = buf->mgr; + + LIST_DEL(&buf->head); + assert(mgr->numDelayed); + --mgr->numDelayed; + assert(!buf->base.base.refcount); + pb_reference(&buf->buffer, NULL); + FREE(buf); +} + + +/** + * Free as many cache buffers from the list head as possible. + */ +static void +_pb_cache_buffer_list_check_free(struct pb_cache_manager *mgr) +{ + struct list_head *curr, *next; + struct pb_cache_buffer *buf; + struct util_time now; + + util_time_get(&now); + + curr = mgr->delayed.next; + next = curr->next; + while(curr != &mgr->delayed) { + buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + + if(!util_time_timeout(&buf->start, &buf->end, &now)) + break; + + _pb_cache_buffer_destroy(buf); + + curr = next; + next = curr->next; + } +} + + +static void +pb_cache_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + struct pb_cache_manager *mgr = buf->mgr; + + pipe_mutex_lock(mgr->mutex); + assert(buf->base.base.refcount == 0); + + _pb_cache_buffer_list_check_free(mgr); + + util_time_get(&buf->start); + util_time_add(&buf->start, mgr->usecs, &buf->end); + LIST_ADDTAIL(&buf->head, &mgr->delayed); + ++mgr->numDelayed; + pipe_mutex_unlock(mgr->mutex); +} + + +static void * +pb_cache_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + return pb_map(buf->buffer, flags); +} + + +static void +pb_cache_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_unmap(buf->buffer); +} + + +static void +pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_get_base_buffer(buf->buffer, base_buf, offset); +} + + +const struct pb_vtbl +pb_cache_buffer_vtbl = { + pb_cache_buffer_destroy, + pb_cache_buffer_map, + pb_cache_buffer_unmap, + pb_cache_buffer_get_base_buffer +}; + + +static INLINE boolean +pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, + size_t size, + const struct pb_desc *desc) +{ + if(buf->base.base.size < size) + return FALSE; + + /* be lenient with size */ + if(buf->base.base.size >= 2*size) + return FALSE; + + if(!pb_check_alignment(desc->alignment, buf->base.base.alignment)) + return FALSE; + + if(!pb_check_usage(desc->usage, buf->base.base.usage)) + return FALSE; + + return TRUE; +} + + +static struct pb_buffer * +pb_cache_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_cache_manager *mgr = pb_cache_manager(_mgr); + struct pb_cache_buffer *buf; + struct pb_cache_buffer *curr_buf; + struct list_head *curr, *next; + struct util_time now; + + pipe_mutex_lock(mgr->mutex); + + buf = NULL; + curr = mgr->delayed.next; + next = curr->next; + + /* search in the expired buffers, freeing them in the process */ + util_time_get(&now); + while(curr != &mgr->delayed) { + curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + if(!buf && pb_cache_is_buffer_compat(curr_buf, size, desc)) + buf = curr_buf; + else if(util_time_timeout(&curr_buf->start, &curr_buf->end, &now)) + _pb_cache_buffer_destroy(curr_buf); + else + /* This buffer (and all hereafter) are still hot in cache */ + break; + curr = next; + next = curr->next; + } + + /* keep searching in the hot buffers */ + if(!buf) { + while(curr != &mgr->delayed) { + curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + if(pb_cache_is_buffer_compat(curr_buf, size, desc)) { + buf = curr_buf; + break; + } + /* no need to check the timeout here */ + curr = next; + next = curr->next; + } + } + + if(buf) { + LIST_DEL(&buf->head); + pipe_mutex_unlock(mgr->mutex); + ++buf->base.base.refcount; + return &buf->base; + } + + pipe_mutex_unlock(mgr->mutex); + + buf = CALLOC_STRUCT(pb_cache_buffer); + if(!buf) + return NULL; + + buf->buffer = mgr->provider->create_buffer(mgr->provider, size, desc); + if(!buf->buffer) { + FREE(buf); + return NULL; + } + + assert(buf->buffer->base.refcount >= 1); + assert(pb_check_alignment(desc->alignment, buf->buffer->base.alignment)); + assert(pb_check_usage(desc->usage, buf->buffer->base.usage)); + assert(buf->buffer->base.size >= size); + + buf->base.base.refcount = 1; + buf->base.base.alignment = buf->buffer->base.alignment; + buf->base.base.usage = buf->buffer->base.usage; + buf->base.base.size = buf->buffer->base.size; + + buf->base.vtbl = &pb_cache_buffer_vtbl; + buf->mgr = mgr; + + return &buf->base; +} + + +static void +pb_cache_manager_flush(struct pb_manager *_mgr) +{ + struct pb_cache_manager *mgr = pb_cache_manager(_mgr); + struct list_head *curr, *next; + struct pb_cache_buffer *buf; + + pipe_mutex_lock(mgr->mutex); + curr = mgr->delayed.next; + next = curr->next; + while(curr != &mgr->delayed) { + buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + _pb_cache_buffer_destroy(buf); + curr = next; + next = curr->next; + } + pipe_mutex_unlock(mgr->mutex); + + assert(mgr->provider->flush); + if(mgr->provider->flush) + mgr->provider->flush(mgr->provider); +} + + +static void +pb_cache_manager_destroy(struct pb_manager *mgr) +{ + pb_cache_manager_flush(mgr); + FREE(mgr); +} + + +struct pb_manager * +pb_cache_manager_create(struct pb_manager *provider, + unsigned usecs) +{ + struct pb_cache_manager *mgr; + + if(!provider) + return NULL; + + mgr = CALLOC_STRUCT(pb_cache_manager); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_cache_manager_destroy; + mgr->base.create_buffer = pb_cache_manager_create_buffer; + mgr->base.flush = pb_cache_manager_flush; + mgr->provider = provider; + mgr->usecs = usecs; + LIST_INITHEAD(&mgr->delayed); + mgr->numDelayed = 0; + pipe_mutex_init(mgr->mutex); + + return &mgr->base; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c new file mode 100644 index 0000000000..1675e6e182 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -0,0 +1,367 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Debug buffer manager to detect buffer under- and overflows. + * + * \author José Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_thread.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" +#include "util/u_time.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +#ifdef DEBUG + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct pb_debug_manager; + + +/** + * Wrapper around a pipe buffer which adds delayed destruction. + */ +struct pb_debug_buffer +{ + struct pb_buffer base; + + struct pb_buffer *buffer; + struct pb_debug_manager *mgr; + + size_t underflow_size; + size_t overflow_size; +}; + + +struct pb_debug_manager +{ + struct pb_manager base; + + struct pb_manager *provider; + + size_t band_size; +}; + + +static INLINE struct pb_debug_buffer * +pb_debug_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pb_debug_buffer *)buf; +} + + +static INLINE struct pb_debug_manager * +pb_debug_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_debug_manager *)mgr; +} + + +static const uint8_t random_pattern[32] = { + 0xaf, 0xcf, 0xa5, 0xa2, 0xc2, 0x63, 0x15, 0x1a, + 0x7e, 0xe2, 0x7e, 0x84, 0x15, 0x49, 0xa2, 0x1e, + 0x49, 0x63, 0xf5, 0x52, 0x74, 0x66, 0x9e, 0xc4, + 0x6d, 0xcf, 0x2c, 0x4a, 0x74, 0xe6, 0xfd, 0x94 +}; + + +static INLINE void +fill_random_pattern(uint8_t *dst, size_t size) +{ + size_t i = 0; + while(size--) { + *dst++ = random_pattern[i++]; + i &= sizeof(random_pattern) - 1; + } +} + + +static INLINE boolean +check_random_pattern(const uint8_t *dst, size_t size, + size_t *min_ofs, size_t *max_ofs) +{ + boolean result = TRUE; + size_t i; + *min_ofs = size; + *max_ofs = 0; + for(i = 0; i < size; ++i) { + if(*dst++ != random_pattern[i % sizeof(random_pattern)]) { + *min_ofs = MIN2(*min_ofs, i); + *max_ofs = MAX2(*max_ofs, i); + result = FALSE; + } + } + return result; +} + + +static void +pb_debug_buffer_fill(struct pb_debug_buffer *buf) +{ + uint8_t *map; + + map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(map); + if(map) { + fill_random_pattern(map, buf->underflow_size); + fill_random_pattern(map + buf->underflow_size + buf->base.base.size, + buf->overflow_size); + pb_unmap(buf->buffer); + } +} + + +/** + * Check for under/over flows. + * + * Should be called with the buffer unmaped. + */ +static void +pb_debug_buffer_check(struct pb_debug_buffer *buf) +{ + uint8_t *map; + + map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); + assert(map); + if(map) { + boolean underflow, overflow; + size_t min_ofs, max_ofs; + + underflow = !check_random_pattern(map, buf->underflow_size, + &min_ofs, &max_ofs); + if(underflow) { + debug_printf("buffer underflow (offset -%u%s to -%u bytes) detected\n", + buf->underflow_size - min_ofs, + min_ofs == 0 ? "+" : "", + buf->underflow_size - max_ofs); + } + + overflow = !check_random_pattern(map + buf->underflow_size + buf->base.base.size, + buf->overflow_size, + &min_ofs, &max_ofs); + if(overflow) { + debug_printf("buffer overflow (size %u plus offset %u to %u%s bytes) detected\n", + buf->base.base.size, + min_ofs, + max_ofs, + max_ofs == buf->overflow_size - 1 ? "+" : ""); + } + + debug_assert(!underflow && !overflow); + + /* re-fill if not aborted */ + if(underflow) + fill_random_pattern(map, buf->underflow_size); + if(overflow) + fill_random_pattern(map + buf->underflow_size + buf->base.base.size, + buf->overflow_size); + + pb_unmap(buf->buffer); + } +} + + +static void +pb_debug_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + + assert(!buf->base.base.refcount); + + pb_debug_buffer_check(buf); + + pb_reference(&buf->buffer, NULL); + FREE(buf); +} + + +static void * +pb_debug_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + void *map; + + pb_debug_buffer_check(buf); + + map = pb_map(buf->buffer, flags); + if(!map) + return NULL; + + return (uint8_t *)map + buf->underflow_size; +} + + +static void +pb_debug_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + pb_unmap(buf->buffer); + + pb_debug_buffer_check(buf); +} + + +static void +pb_debug_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + pb_get_base_buffer(buf->buffer, base_buf, offset); + *offset += buf->underflow_size; +} + + +const struct pb_vtbl +pb_debug_buffer_vtbl = { + pb_debug_buffer_destroy, + pb_debug_buffer_map, + pb_debug_buffer_unmap, + pb_debug_buffer_get_base_buffer +}; + + +static struct pb_buffer * +pb_debug_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_debug_manager *mgr = pb_debug_manager(_mgr); + struct pb_debug_buffer *buf; + struct pb_desc real_desc; + size_t real_size; + + buf = CALLOC_STRUCT(pb_debug_buffer); + if(!buf) + return NULL; + + real_size = size + 2*mgr->band_size; + real_desc = *desc; + real_desc.usage |= PIPE_BUFFER_USAGE_CPU_WRITE; + real_desc.usage |= PIPE_BUFFER_USAGE_CPU_READ; + + buf->buffer = mgr->provider->create_buffer(mgr->provider, + real_size, + &real_desc); + if(!buf->buffer) { + FREE(buf); + return NULL; + } + + assert(buf->buffer->base.refcount >= 1); + assert(pb_check_alignment(real_desc.alignment, buf->buffer->base.alignment)); + assert(pb_check_usage(real_desc.usage, buf->buffer->base.usage)); + assert(buf->buffer->base.size >= real_size); + + buf->base.base.refcount = 1; + buf->base.base.alignment = desc->alignment; + buf->base.base.usage = desc->usage; + buf->base.base.size = size; + + buf->base.vtbl = &pb_debug_buffer_vtbl; + buf->mgr = mgr; + + buf->underflow_size = mgr->band_size; + buf->overflow_size = buf->buffer->base.size - buf->underflow_size - size; + + pb_debug_buffer_fill(buf); + + return &buf->base; +} + + +static void +pb_debug_manager_flush(struct pb_manager *_mgr) +{ + struct pb_debug_manager *mgr = pb_debug_manager(_mgr); + assert(mgr->provider->flush); + if(mgr->provider->flush) + mgr->provider->flush(mgr->provider); +} + + +static void +pb_debug_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_debug_manager *mgr = pb_debug_manager(_mgr); + mgr->provider->destroy(mgr->provider); + FREE(mgr); +} + + +struct pb_manager * +pb_debug_manager_create(struct pb_manager *provider, size_t band_size) +{ + struct pb_debug_manager *mgr; + + if(!provider) + return NULL; + + mgr = CALLOC_STRUCT(pb_debug_manager); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_debug_manager_destroy; + mgr->base.create_buffer = pb_debug_manager_create_buffer; + mgr->base.flush = pb_debug_manager_flush; + mgr->provider = provider; + mgr->band_size = band_size; + + return &mgr->base; +} + + +#else /* !DEBUG */ + + +struct pb_manager * +pb_debug_manager_create(struct pb_manager *provider, size_t band_size) +{ + return provider; +} + + +#endif /* !DEBUG */ diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c new file mode 100644 index 0000000000..633ee70a75 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -0,0 +1,149 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * \file + * A buffer manager that wraps buffers in fenced buffers. + * + * \author José Fonseca <jrfonseca@tungstengraphics.dot.com> + */ + + +#include "pipe/p_debug.h" +#include "util/u_memory.h" + +#include "pb_buffer.h" +#include "pb_buffer_fenced.h" +#include "pb_bufmgr.h" + + +struct fenced_pb_manager +{ + struct pb_manager base; + + struct pb_manager *provider; + + struct fenced_buffer_list *fenced_list; +}; + + +static INLINE struct fenced_pb_manager * +fenced_pb_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct fenced_pb_manager *)mgr; +} + + +static struct pb_buffer * +fenced_bufmgr_create_buffer(struct pb_manager *mgr, + size_t size, + const struct pb_desc *desc) +{ + struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); + struct pb_buffer *buf; + struct pb_buffer *fenced_buf; + + /* check for free buffers before allocating new ones */ + fenced_buffer_list_check_free(fenced_mgr->fenced_list, 0); + + buf = fenced_mgr->provider->create_buffer(fenced_mgr->provider, size, desc); + if(!buf) { + /* try harder to get a buffer */ + fenced_buffer_list_check_free(fenced_mgr->fenced_list, 1); + + buf = fenced_mgr->provider->create_buffer(fenced_mgr->provider, size, desc); + if(!buf) { + /* give up */ + return NULL; + } + } + + fenced_buf = fenced_buffer_create(fenced_mgr->fenced_list, buf); + if(!fenced_buf) { + assert(buf->base.refcount == 1); + pb_destroy(buf); + } + + return fenced_buf; +} + + +static void +fenced_bufmgr_flush(struct pb_manager *mgr) +{ + struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); + + fenced_buffer_list_check_free(fenced_mgr->fenced_list, TRUE); + + assert(fenced_mgr->provider->flush); + if(fenced_mgr->provider->flush) + fenced_mgr->provider->flush(fenced_mgr->provider); +} + + +static void +fenced_bufmgr_destroy(struct pb_manager *mgr) +{ + struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); + + fenced_buffer_list_destroy(fenced_mgr->fenced_list); + + if(fenced_mgr->provider) + fenced_mgr->provider->destroy(fenced_mgr->provider); + + FREE(fenced_mgr); +} + + +struct pb_manager * +fenced_bufmgr_create(struct pb_manager *provider, + struct pipe_winsys *winsys) +{ + struct fenced_pb_manager *fenced_mgr; + + if(!provider) + return NULL; + + fenced_mgr = CALLOC_STRUCT(fenced_pb_manager); + if (!fenced_mgr) + return NULL; + + fenced_mgr->base.destroy = fenced_bufmgr_destroy; + fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer; + fenced_mgr->base.flush = fenced_bufmgr_flush; + + fenced_mgr->provider = provider; + fenced_mgr->fenced_list = fenced_buffer_list_create(winsys); + if(!fenced_mgr->fenced_list) { + FREE(fenced_mgr); + return NULL; + } + + return &fenced_mgr->base; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c new file mode 100644 index 0000000000..fe80ca30ee --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -0,0 +1,299 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Buffer manager using the old texture memory manager. + * + * \author José Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" +#include "util/u_mm.h" +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct mm_pb_manager +{ + struct pb_manager base; + + pipe_mutex mutex; + + size_t size; + struct mem_block *heap; + + size_t align2; + + struct pb_buffer *buffer; + void *map; +}; + + +static INLINE struct mm_pb_manager * +mm_pb_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct mm_pb_manager *)mgr; +} + + +struct mm_buffer +{ + struct pb_buffer base; + + struct mm_pb_manager *mgr; + + struct mem_block *block; +}; + + +static INLINE struct mm_buffer * +mm_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct mm_buffer *)buf; +} + + +static void +mm_buffer_destroy(struct pb_buffer *buf) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + + assert(buf->base.refcount == 0); + + pipe_mutex_lock(mm->mutex); + mmFreeMem(mm_buf->block); + FREE(buf); + pipe_mutex_unlock(mm->mutex); +} + + +static void * +mm_buffer_map(struct pb_buffer *buf, + unsigned flags) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + + return (unsigned char *) mm->map + mm_buf->block->ofs; +} + + +static void +mm_buffer_unmap(struct pb_buffer *buf) +{ + /* No-op */ +} + + +static void +mm_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + pb_get_base_buffer(mm->buffer, base_buf, offset); + *offset += mm_buf->block->ofs; +} + + +static const struct pb_vtbl +mm_buffer_vtbl = { + mm_buffer_destroy, + mm_buffer_map, + mm_buffer_unmap, + mm_buffer_get_base_buffer +}; + + +static struct pb_buffer * +mm_bufmgr_create_buffer(struct pb_manager *mgr, + size_t size, + const struct pb_desc *desc) +{ + struct mm_pb_manager *mm = mm_pb_manager(mgr); + struct mm_buffer *mm_buf; + + /* We don't handle alignments larger then the one initially setup */ + assert(desc->alignment % (1 << mm->align2) == 0); + if(desc->alignment % (1 << mm->align2)) + return NULL; + + pipe_mutex_lock(mm->mutex); + + mm_buf = CALLOC_STRUCT(mm_buffer); + if (!mm_buf) { + pipe_mutex_unlock(mm->mutex); + return NULL; + } + + mm_buf->base.base.refcount = 1; + mm_buf->base.base.alignment = desc->alignment; + mm_buf->base.base.usage = desc->usage; + mm_buf->base.base.size = size; + + mm_buf->base.vtbl = &mm_buffer_vtbl; + + mm_buf->mgr = mm; + + mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0); + if(!mm_buf->block) { + debug_printf("warning: heap full\n"); +#if 0 + mmDumpMemInfo(mm->heap); +#endif + + mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0); + if(!mm_buf->block) { + FREE(mm_buf); + pipe_mutex_unlock(mm->mutex); + return NULL; + } + } + + /* Some sanity checks */ + assert(0 <= (unsigned)mm_buf->block->ofs && (unsigned)mm_buf->block->ofs < mm->size); + assert(size <= (unsigned)mm_buf->block->size && (unsigned)mm_buf->block->ofs + (unsigned)mm_buf->block->size <= mm->size); + + pipe_mutex_unlock(mm->mutex); + return SUPER(mm_buf); +} + + +static void +mm_bufmgr_flush(struct pb_manager *mgr) +{ + /* No-op */ +} + + +static void +mm_bufmgr_destroy(struct pb_manager *mgr) +{ + struct mm_pb_manager *mm = mm_pb_manager(mgr); + + pipe_mutex_lock(mm->mutex); + + mmDestroy(mm->heap); + + pb_unmap(mm->buffer); + pb_reference(&mm->buffer, NULL); + + pipe_mutex_unlock(mm->mutex); + + FREE(mgr); +} + + +struct pb_manager * +mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, + size_t size, size_t align2) +{ + struct mm_pb_manager *mm; + + if(!buffer) + return NULL; + + mm = CALLOC_STRUCT(mm_pb_manager); + if (!mm) + return NULL; + + mm->base.destroy = mm_bufmgr_destroy; + mm->base.create_buffer = mm_bufmgr_create_buffer; + mm->base.flush = mm_bufmgr_flush; + + mm->size = size; + mm->align2 = align2; /* 64-byte alignment */ + + pipe_mutex_init(mm->mutex); + + mm->buffer = buffer; + + mm->map = pb_map(mm->buffer, + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); + if(!mm->map) + goto failure; + + mm->heap = mmInit(0, size); + if (!mm->heap) + goto failure; + + return SUPER(mm); + +failure: +if(mm->heap) + mmDestroy(mm->heap); + if(mm->map) + pb_unmap(mm->buffer); + if(mm) + FREE(mm); + return NULL; +} + + +struct pb_manager * +mm_bufmgr_create(struct pb_manager *provider, + size_t size, size_t align2) +{ + struct pb_buffer *buffer; + struct pb_manager *mgr; + struct pb_desc desc; + + if(!provider) + return NULL; + + memset(&desc, 0, sizeof(desc)); + desc.alignment = 1 << align2; + + buffer = provider->create_buffer(provider, size, &desc); + if (!buffer) + return NULL; + + mgr = mm_bufmgr_create_from_buffer(buffer, size, align2); + if (!mgr) { + pb_reference(&buffer, NULL); + return NULL; + } + + return mgr; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c new file mode 100644 index 0000000000..61ac291ed7 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -0,0 +1,298 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * \file + * Batch buffer pool management. + * + * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com> + * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct pool_pb_manager +{ + struct pb_manager base; + + pipe_mutex mutex; + + size_t bufSize; + size_t bufAlign; + + size_t numFree; + size_t numTot; + + struct list_head free; + + struct pb_buffer *buffer; + void *map; + + struct pool_buffer *bufs; +}; + + +static INLINE struct pool_pb_manager * +pool_pb_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pool_pb_manager *)mgr; +} + + +struct pool_buffer +{ + struct pb_buffer base; + + struct pool_pb_manager *mgr; + + struct list_head head; + + size_t start; +}; + + +static INLINE struct pool_buffer * +pool_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pool_buffer *)buf; +} + + + +static void +pool_buffer_destroy(struct pb_buffer *buf) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + + assert(pool_buf->base.base.refcount == 0); + + pipe_mutex_lock(pool->mutex); + LIST_ADD(&pool_buf->head, &pool->free); + pool->numFree++; + pipe_mutex_unlock(pool->mutex); +} + + +static void * +pool_buffer_map(struct pb_buffer *buf, unsigned flags) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + void *map; + + pipe_mutex_lock(pool->mutex); + map = (unsigned char *) pool->map + pool_buf->start; + pipe_mutex_unlock(pool->mutex); + return map; +} + + +static void +pool_buffer_unmap(struct pb_buffer *buf) +{ + /* No-op */ +} + + +static void +pool_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + pb_get_base_buffer(pool->buffer, base_buf, offset); + *offset += pool_buf->start; +} + + +static const struct pb_vtbl +pool_buffer_vtbl = { + pool_buffer_destroy, + pool_buffer_map, + pool_buffer_unmap, + pool_buffer_get_base_buffer +}; + + +static struct pb_buffer * +pool_bufmgr_create_buffer(struct pb_manager *mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pool_pb_manager *pool = pool_pb_manager(mgr); + struct pool_buffer *pool_buf; + struct list_head *item; + + assert(size == pool->bufSize); + assert(pool->bufAlign % desc->alignment == 0); + + pipe_mutex_lock(pool->mutex); + + if (pool->numFree == 0) { + pipe_mutex_unlock(pool->mutex); + debug_printf("warning: out of fixed size buffer objects\n"); + return NULL; + } + + item = pool->free.next; + + if (item == &pool->free) { + pipe_mutex_unlock(pool->mutex); + debug_printf("error: fixed size buffer pool corruption\n"); + return NULL; + } + + LIST_DEL(item); + --pool->numFree; + + pipe_mutex_unlock(pool->mutex); + + pool_buf = LIST_ENTRY(struct pool_buffer, item, head); + assert(pool_buf->base.base.refcount == 0); + pool_buf->base.base.refcount = 1; + pool_buf->base.base.alignment = desc->alignment; + pool_buf->base.base.usage = desc->usage; + + return SUPER(pool_buf); +} + + +static void +pool_bufmgr_flush(struct pb_manager *mgr) +{ + /* No-op */ +} + + +static void +pool_bufmgr_destroy(struct pb_manager *mgr) +{ + struct pool_pb_manager *pool = pool_pb_manager(mgr); + pipe_mutex_lock(pool->mutex); + + FREE(pool->bufs); + + pb_unmap(pool->buffer); + pb_reference(&pool->buffer, NULL); + + pipe_mutex_unlock(pool->mutex); + + FREE(mgr); +} + + +struct pb_manager * +pool_bufmgr_create(struct pb_manager *provider, + size_t numBufs, + size_t bufSize, + const struct pb_desc *desc) +{ + struct pool_pb_manager *pool; + struct pool_buffer *pool_buf; + size_t i; + + if(!provider) + return NULL; + + pool = CALLOC_STRUCT(pool_pb_manager); + if (!pool) + return NULL; + + pool->base.destroy = pool_bufmgr_destroy; + pool->base.create_buffer = pool_bufmgr_create_buffer; + pool->base.flush = pool_bufmgr_flush; + + LIST_INITHEAD(&pool->free); + + pool->numTot = numBufs; + pool->numFree = numBufs; + pool->bufSize = bufSize; + pool->bufAlign = desc->alignment; + + pipe_mutex_init(pool->mutex); + + pool->buffer = provider->create_buffer(provider, numBufs*bufSize, desc); + if (!pool->buffer) + goto failure; + + pool->map = pb_map(pool->buffer, + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); + if(!pool->map) + goto failure; + + pool->bufs = (struct pool_buffer *)CALLOC(numBufs, sizeof(*pool->bufs)); + if (!pool->bufs) + goto failure; + + pool_buf = pool->bufs; + for (i = 0; i < numBufs; ++i) { + pool_buf->base.base.refcount = 0; + pool_buf->base.base.alignment = 0; + pool_buf->base.base.usage = 0; + pool_buf->base.base.size = bufSize; + pool_buf->base.vtbl = &pool_buffer_vtbl; + pool_buf->mgr = pool; + pool_buf->start = i * bufSize; + LIST_ADDTAIL(&pool_buf->head, &pool->free); + pool_buf++; + } + + return SUPER(pool); + +failure: + if(pool->bufs) + FREE(pool->bufs); + if(pool->map) + pb_unmap(pool->buffer); + if(pool->buffer) + pb_reference(&pool->buffer, NULL); + if(pool) + FREE(pool); + return NULL; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c new file mode 100644 index 0000000000..2a80154920 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -0,0 +1,563 @@ +/************************************************************************** + * + * Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA + * All Rights Reserved. + * + * Permission is hereby granted, FREE of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * @file + * S-lab pool implementation. + * + * @sa http://en.wikipedia.org/wiki/Slab_allocation + * + * @author Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_error.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "util/u_double_list.h" +#include "util/u_time.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +struct pb_slab; + + +/** + * Buffer in a slab. + * + * Sub-allocation of a contiguous buffer. + */ +struct pb_slab_buffer +{ + struct pb_buffer base; + + struct pb_slab *slab; + + struct list_head head; + + unsigned mapCount; + + /** Offset relative to the start of the slab buffer. */ + size_t start; + + /** Use when validating, to signal that all mappings are finished */ + /* TODO: Actually validation does not reach this stage yet */ + pipe_condvar event; +}; + + +/** + * Slab -- a contiguous piece of memory. + */ +struct pb_slab +{ + struct list_head head; + struct list_head freeBuffers; + size_t numBuffers; + size_t numFree; + + struct pb_slab_buffer *buffers; + struct pb_slab_manager *mgr; + + /** Buffer from the provider */ + struct pb_buffer *bo; + + void *virtual; +}; + + +/** + * It adds/removes slabs as needed in order to meet the allocation/destruction + * of individual buffers. + */ +struct pb_slab_manager +{ + struct pb_manager base; + + /** From where we get our buffers */ + struct pb_manager *provider; + + /** Size of the buffers we hand on downstream */ + size_t bufSize; + + /** Size of the buffers we request upstream */ + size_t slabSize; + + /** + * Alignment, usage to be used to allocate the slab buffers. + * + * We can only provide buffers which are consistent (in alignment, usage) + * with this description. + */ + struct pb_desc desc; + + /** + * Partial slabs + * + * Full slabs are not stored in any list. Empty slabs are destroyed + * immediatly. + */ + struct list_head slabs; + + pipe_mutex mutex; +}; + + +/** + * Wrapper around several slabs, therefore capable of handling buffers of + * multiple sizes. + * + * This buffer manager just dispatches buffer allocations to the appropriate slab + * manager, according to the requested buffer size, or by passes the slab + * managers altogether for even greater sizes. + * + * The data of this structure remains constant after + * initialization and thus needs no mutex protection. + */ +struct pb_slab_range_manager +{ + struct pb_manager base; + + struct pb_manager *provider; + + size_t minBufSize; + size_t maxBufSize; + + /** @sa pb_slab_manager::desc */ + struct pb_desc desc; + + unsigned numBuckets; + size_t *bucketSizes; + + /** Array of pb_slab_manager, one for each bucket size */ + struct pb_manager **buckets; +}; + + +static INLINE struct pb_slab_buffer * +pb_slab_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pb_slab_buffer *)buf; +} + + +static INLINE struct pb_slab_manager * +pb_slab_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_slab_manager *)mgr; +} + + +static INLINE struct pb_slab_range_manager * +pb_slab_range_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_slab_range_manager *)mgr; +} + + +/** + * Delete a buffer from the slab delayed list and put + * it on the slab FREE list. + */ +static void +pb_slab_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + struct pb_slab *slab = buf->slab; + struct pb_slab_manager *mgr = slab->mgr; + struct list_head *list = &buf->head; + + pipe_mutex_lock(mgr->mutex); + + assert(buf->base.base.refcount == 0); + + buf->mapCount = 0; + + LIST_DEL(list); + LIST_ADDTAIL(list, &slab->freeBuffers); + slab->numFree++; + + if (slab->head.next == &slab->head) + LIST_ADDTAIL(&slab->head, &mgr->slabs); + + /* If the slab becomes totally empty, free it */ + if (slab->numFree == slab->numBuffers) { + list = &slab->head; + LIST_DELINIT(list); + pb_reference(&slab->bo, NULL); + FREE(slab->buffers); + FREE(slab); + } + + pipe_mutex_unlock(mgr->mutex); +} + + +static void * +pb_slab_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + + ++buf->mapCount; + return (void *) ((uint8_t *) buf->slab->virtual + buf->start); +} + + +static void +pb_slab_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + + --buf->mapCount; + if (buf->mapCount == 0) + pipe_condvar_broadcast(buf->event); +} + + +static void +pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + pb_get_base_buffer(buf->slab->bo, base_buf, offset); + *offset += buf->start; +} + + +static const struct pb_vtbl +pb_slab_buffer_vtbl = { + pb_slab_buffer_destroy, + pb_slab_buffer_map, + pb_slab_buffer_unmap, + pb_slab_buffer_get_base_buffer +}; + + +/** + * Create a new slab. + * + * Called when we ran out of free slabs. + */ +static enum pipe_error +pb_slab_create(struct pb_slab_manager *mgr) +{ + struct pb_slab *slab; + struct pb_slab_buffer *buf; + unsigned numBuffers; + unsigned i; + enum pipe_error ret; + + slab = CALLOC_STRUCT(pb_slab); + if (!slab) + return PIPE_ERROR_OUT_OF_MEMORY; + + slab->bo = mgr->provider->create_buffer(mgr->provider, mgr->slabSize, &mgr->desc); + if(!slab->bo) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out_err0; + } + + /* Note down the slab virtual address. All mappings are accessed directly + * through this address so it is required that the buffer is pinned. */ + slab->virtual = pb_map(slab->bo, + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); + if(!slab->virtual) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out_err1; + } + pb_unmap(slab->bo); + + numBuffers = slab->bo->base.size / mgr->bufSize; + + slab->buffers = CALLOC(numBuffers, sizeof(*slab->buffers)); + if (!slab->buffers) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out_err1; + } + + LIST_INITHEAD(&slab->head); + LIST_INITHEAD(&slab->freeBuffers); + slab->numBuffers = numBuffers; + slab->numFree = 0; + slab->mgr = mgr; + + buf = slab->buffers; + for (i=0; i < numBuffers; ++i) { + buf->base.base.refcount = 0; + buf->base.base.size = mgr->bufSize; + buf->base.base.alignment = 0; + buf->base.base.usage = 0; + buf->base.vtbl = &pb_slab_buffer_vtbl; + buf->slab = slab; + buf->start = i* mgr->bufSize; + buf->mapCount = 0; + pipe_condvar_init(buf->event); + LIST_ADDTAIL(&buf->head, &slab->freeBuffers); + slab->numFree++; + buf++; + } + + /* Add this slab to the list of partial slabs */ + LIST_ADDTAIL(&slab->head, &mgr->slabs); + + return PIPE_OK; + +out_err1: + pb_reference(&slab->bo, NULL); +out_err0: + FREE(slab); + return ret; +} + + +static struct pb_buffer * +pb_slab_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_slab_manager *mgr = pb_slab_manager(_mgr); + static struct pb_slab_buffer *buf; + struct pb_slab *slab; + struct list_head *list; + + /* check size */ + assert(size <= mgr->bufSize); + if(size > mgr->bufSize) + return NULL; + + /* check if we can provide the requested alignment */ + assert(pb_check_alignment(desc->alignment, mgr->desc.alignment)); + if(!pb_check_alignment(desc->alignment, mgr->desc.alignment)) + return NULL; + assert(pb_check_alignment(desc->alignment, mgr->bufSize)); + if(!pb_check_alignment(desc->alignment, mgr->bufSize)) + return NULL; + + assert(pb_check_usage(desc->usage, mgr->desc.usage)); + if(!pb_check_usage(desc->usage, mgr->desc.usage)) + return NULL; + + pipe_mutex_lock(mgr->mutex); + + /* Create a new slab, if we run out of partial slabs */ + if (mgr->slabs.next == &mgr->slabs) { + (void) pb_slab_create(mgr); + if (mgr->slabs.next == &mgr->slabs) { + pipe_mutex_unlock(mgr->mutex); + return NULL; + } + } + + /* Allocate the buffer from a partial (or just created) slab */ + list = mgr->slabs.next; + slab = LIST_ENTRY(struct pb_slab, list, head); + + /* If totally full remove from the partial slab list */ + if (--slab->numFree == 0) + LIST_DELINIT(list); + + list = slab->freeBuffers.next; + LIST_DELINIT(list); + + pipe_mutex_unlock(mgr->mutex); + buf = LIST_ENTRY(struct pb_slab_buffer, list, head); + + ++buf->base.base.refcount; + buf->base.base.alignment = desc->alignment; + buf->base.base.usage = desc->usage; + + return &buf->base; +} + + +static void +pb_slab_manager_flush(struct pb_manager *_mgr) +{ + struct pb_slab_manager *mgr = pb_slab_manager(_mgr); + + assert(mgr->provider->flush); + if(mgr->provider->flush) + mgr->provider->flush(mgr->provider); +} + + +static void +pb_slab_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_slab_manager *mgr = pb_slab_manager(_mgr); + + /* TODO: cleanup all allocated buffers */ + FREE(mgr); +} + + +struct pb_manager * +pb_slab_manager_create(struct pb_manager *provider, + size_t bufSize, + size_t slabSize, + const struct pb_desc *desc) +{ + struct pb_slab_manager *mgr; + + mgr = CALLOC_STRUCT(pb_slab_manager); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_slab_manager_destroy; + mgr->base.create_buffer = pb_slab_manager_create_buffer; + mgr->base.flush = pb_slab_manager_flush; + + mgr->provider = provider; + mgr->bufSize = bufSize; + mgr->slabSize = slabSize; + mgr->desc = *desc; + + LIST_INITHEAD(&mgr->slabs); + + pipe_mutex_init(mgr->mutex); + + return &mgr->base; +} + + +static struct pb_buffer * +pb_slab_range_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); + size_t bufSize; + unsigned i; + + bufSize = mgr->minBufSize; + for (i = 0; i < mgr->numBuckets; ++i) { + if(bufSize >= size) + return mgr->buckets[i]->create_buffer(mgr->buckets[i], size, desc); + bufSize *= 2; + } + + /* Fall back to allocate a buffer object directly from the provider. */ + return mgr->provider->create_buffer(mgr->provider, size, desc); +} + + +static void +pb_slab_range_manager_flush(struct pb_manager *_mgr) +{ + struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); + + /* Individual slabs don't hold any temporary buffers so no need to call them */ + + assert(mgr->provider->flush); + if(mgr->provider->flush) + mgr->provider->flush(mgr->provider); +} + + +static void +pb_slab_range_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); + unsigned i; + + for (i = 0; i < mgr->numBuckets; ++i) + mgr->buckets[i]->destroy(mgr->buckets[i]); + FREE(mgr->buckets); + FREE(mgr->bucketSizes); + FREE(mgr); +} + + +struct pb_manager * +pb_slab_range_manager_create(struct pb_manager *provider, + size_t minBufSize, + size_t maxBufSize, + size_t slabSize, + const struct pb_desc *desc) +{ + struct pb_slab_range_manager *mgr; + size_t bufSize; + unsigned i; + + if(!provider) + return NULL; + + mgr = CALLOC_STRUCT(pb_slab_range_manager); + if (!mgr) + goto out_err0; + + mgr->base.destroy = pb_slab_range_manager_destroy; + mgr->base.create_buffer = pb_slab_range_manager_create_buffer; + mgr->base.flush = pb_slab_range_manager_flush; + + mgr->provider = provider; + mgr->minBufSize = minBufSize; + mgr->maxBufSize = maxBufSize; + + mgr->numBuckets = 1; + bufSize = minBufSize; + while(bufSize < maxBufSize) { + bufSize *= 2; + ++mgr->numBuckets; + } + + mgr->buckets = CALLOC(mgr->numBuckets, sizeof(*mgr->buckets)); + if (!mgr->buckets) + goto out_err1; + + bufSize = minBufSize; + for (i = 0; i < mgr->numBuckets; ++i) { + mgr->buckets[i] = pb_slab_manager_create(provider, bufSize, slabSize, desc); + if(!mgr->buckets[i]) + goto out_err2; + bufSize *= 2; + } + + return &mgr->base; + +out_err2: + for (i = 0; i < mgr->numBuckets; ++i) + if(mgr->buckets[i]) + mgr->buckets[i]->destroy(mgr->buckets[i]); + FREE(mgr->buckets); +out_err1: + FREE(mgr); +out_err0: + return NULL; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c new file mode 100644 index 0000000000..1e54fc39d4 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -0,0 +1,153 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Buffer validation. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_error.h" +#include "util/u_memory.h" +#include "pipe/p_debug.h" + +#include "pb_buffer.h" +#include "pb_buffer_fenced.h" +#include "pb_validate.h" + + +#define PB_VALIDATE_INITIAL_SIZE 1 /* 512 */ + + +struct pb_validate +{ + struct pb_buffer **buffers; + unsigned used; + unsigned size; +}; + + +enum pipe_error +pb_validate_add_buffer(struct pb_validate *vl, + struct pb_buffer *buf) +{ + assert(buf); + if(!buf) + return PIPE_ERROR; + + /* We only need to store one reference for each buffer, so avoid storing + * consecutive references for the same buffer. It might not be the more + * common pasttern, but it is easy to implement. + */ + if(vl->used && vl->buffers[vl->used - 1] == buf) { + return PIPE_OK; + } + + /* Grow the table */ + if(vl->used == vl->size) { + unsigned new_size; + struct pb_buffer **new_buffers; + + new_size = vl->size * 2; + if(!new_size) + return PIPE_ERROR_OUT_OF_MEMORY; + + new_buffers = (struct pb_buffer **)REALLOC(vl->buffers, + vl->size*sizeof(struct pb_buffer *), + new_size*sizeof(struct pb_buffer *)); + if(!new_buffers) + return PIPE_ERROR_OUT_OF_MEMORY; + + memset(new_buffers + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_buffer *)); + + vl->size = new_size; + vl->buffers = new_buffers; + } + + assert(!vl->buffers[vl->used]); + pb_reference(&vl->buffers[vl->used], buf); + ++vl->used; + + return PIPE_OK; +} + + +enum pipe_error +pb_validate_validate(struct pb_validate *vl) +{ + /* FIXME: go through each buffer, ensure its not mapped, its address is + * available -- requires a new pb_buffer interface */ + return PIPE_OK; +} + + +void +pb_validate_fence(struct pb_validate *vl, + struct pipe_fence_handle *fence) +{ + unsigned i; + for(i = 0; i < vl->used; ++i) { + buffer_fence(vl->buffers[i], fence); + pb_reference(&vl->buffers[i], NULL); + } + vl->used = 0; +} + + +void +pb_validate_destroy(struct pb_validate *vl) +{ + unsigned i; + for(i = 0; i < vl->used; ++i) + pb_reference(&vl->buffers[i], NULL); + FREE(vl->buffers); + FREE(vl); +} + + +struct pb_validate * +pb_validate_create() +{ + struct pb_validate *vl; + + vl = CALLOC_STRUCT(pb_validate); + if(!vl) + return NULL; + + vl->size = PB_VALIDATE_INITIAL_SIZE; + vl->buffers = (struct pb_buffer **)CALLOC(vl->size, sizeof(struct pb_buffer *)); + if(!vl->buffers) { + FREE(vl); + return NULL; + } + + return vl; +} + diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.h b/src/gallium/auxiliary/pipebuffer/pb_validate.h new file mode 100644 index 0000000000..3db1d5330b --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.h @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Buffer validation. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef PB_VALIDATE_H_ +#define PB_VALIDATE_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_error.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pb_buffer; +struct pipe_fence_handle; + + +/** + * Buffer validation list. + * + * It holds a list of buffers to be validated and fenced when flushing. + */ +struct pb_validate; + + +enum pipe_error +pb_validate_add_buffer(struct pb_validate *vl, + struct pb_buffer *buf); + +/** + * Validate all buffers for hardware access. + * + * Should be called right before issuing commands to the hardware. + */ +enum pipe_error +pb_validate_validate(struct pb_validate *vl); + +/** + * Fence all buffers and clear the list. + * + * Should be called right before issuing commands to the hardware. + */ +void +pb_validate_fence(struct pb_validate *vl, + struct pipe_fence_handle *fence); + +struct pb_validate * +pb_validate_create(void); + +void +pb_validate_destroy(struct pb_validate *vl); + + +#ifdef __cplusplus +} +#endif + +#endif /*PB_VALIDATE_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c new file mode 100644 index 0000000000..28d137dbc4 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_winsys.c @@ -0,0 +1,170 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Implementation of client buffer (also designated as "user buffers"), which + * are just state-tracker owned data masqueraded as buffers. + * + * \author José Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "pipe/p_winsys.h" +#include "util/u_memory.h" + +#include "pb_buffer.h" + + +/** + * User buffers are special buffers that initially reference memory + * held by the user but which may if necessary copy that memory into + * device memory behind the scenes, for submission to hardware. + * + * These are particularly useful when the referenced data is never + * submitted to hardware at all, in the particular case of software + * vertex processing. + */ +struct pb_user_buffer +{ + struct pb_buffer base; + void *data; +}; + + +extern const struct pb_vtbl pb_user_buffer_vtbl; + + +static INLINE struct pb_user_buffer * +pb_user_buffer(struct pb_buffer *buf) +{ + assert(buf); + assert(buf->vtbl == &pb_user_buffer_vtbl); + return (struct pb_user_buffer *)buf; +} + + +static void +pb_user_buffer_destroy(struct pb_buffer *buf) +{ + assert(buf); + FREE(buf); +} + + +static void * +pb_user_buffer_map(struct pb_buffer *buf, + unsigned flags) +{ + return pb_user_buffer(buf)->data; +} + + +static void +pb_user_buffer_unmap(struct pb_buffer *buf) +{ + /* No-op */ +} + + +static void +pb_user_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + *base_buf = buf; + *offset = 0; +} + + +const struct pb_vtbl +pb_user_buffer_vtbl = { + pb_user_buffer_destroy, + pb_user_buffer_map, + pb_user_buffer_unmap, + pb_user_buffer_get_base_buffer +}; + + +static struct pipe_buffer * +pb_winsys_user_buffer_create(struct pipe_winsys *winsys, + void *data, + unsigned bytes) +{ + struct pb_user_buffer *buf = CALLOC_STRUCT(pb_user_buffer); + + if(!buf) + return NULL; + + buf->base.base.refcount = 1; + buf->base.base.size = bytes; + buf->base.base.alignment = 0; + buf->base.base.usage = 0; + + buf->base.vtbl = &pb_user_buffer_vtbl; + buf->data = data; + + return &buf->base.base; +} + + +static void * +pb_winsys_buffer_map(struct pipe_winsys *winsys, + struct pipe_buffer *buf, + unsigned flags) +{ + (void)winsys; + return pb_map(pb_buffer(buf), flags); +} + + +static void +pb_winsys_buffer_unmap(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + (void)winsys; + pb_unmap(pb_buffer(buf)); +} + + +static void +pb_winsys_buffer_destroy(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + (void)winsys; + pb_destroy(pb_buffer(buf)); +} + + +void +pb_init_winsys(struct pipe_winsys *winsys) +{ + winsys->user_buffer_create = pb_winsys_user_buffer_create; + winsys->buffer_map = pb_winsys_buffer_map; + winsys->buffer_unmap = pb_winsys_buffer_unmap; + winsys->buffer_destroy = pb_winsys_buffer_destroy; +} diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile new file mode 100644 index 0000000000..252dc5274a --- /dev/null +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -0,0 +1,16 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = rtasm + +C_SOURCES = \ + rtasm_cpu.c \ + rtasm_execmem.c \ + rtasm_x86sse.c \ + rtasm_ppc.c \ + rtasm_ppc_spe.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript new file mode 100644 index 0000000000..eb48368acc --- /dev/null +++ b/src/gallium/auxiliary/rtasm/SConscript @@ -0,0 +1,13 @@ +Import('*') + +rtasm = env.ConvenienceLibrary( + target = 'rtasm', + source = [ + 'rtasm_cpu.c', + 'rtasm_execmem.c', + 'rtasm_x86sse.c', + 'rtasm_ppc.c', + 'rtasm_ppc_spe.c', + ]) + +auxiliaries.insert(0, rtasm) diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c new file mode 100644 index 0000000000..5499018b21 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c @@ -0,0 +1,65 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_debug.h" +#include "rtasm_cpu.h" + + +static boolean rtasm_sse_enabled(void) +{ + static boolean firsttime = 1; + static boolean enabled; + + /* This gets called quite often at the moment: + */ + if (firsttime) { + enabled = !debug_get_bool_option("GALLIUM_NOSSE", FALSE); + firsttime = FALSE; + } + return enabled; +} + +int rtasm_cpu_has_sse(void) +{ + /* FIXME: actually detect this at run-time */ +#if defined(PIPE_ARCH_X86) + return rtasm_sse_enabled(); +#else + return 0; +#endif +} + +int rtasm_cpu_has_sse2(void) +{ + /* FIXME: actually detect this at run-time */ +#if defined(PIPE_ARCH_X86) + return rtasm_sse_enabled(); +#else + return 0; +#endif +} diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.h b/src/gallium/auxiliary/rtasm/rtasm_cpu.h new file mode 100644 index 0000000000..ebc71634fd --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Runtime detection of CPU capabilities. + */ + +#ifndef _RTASM_CPU_H_ +#define _RTASM_CPU_H_ + + +int rtasm_cpu_has_sse(void); + +int rtasm_cpu_has_sse2(void); + + +#endif /* _RTASM_CPU_H_ */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c new file mode 100644 index 0000000000..19087589a8 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -0,0 +1,136 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * \file exemem.c + * Functions for allocating executable memory. + * + * \author Keith Whitwell + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "util/u_memory.h" + +#include "rtasm_execmem.h" + + +#if defined(__linux__) + +/* + * Allocate a large block of memory which can hold code then dole it out + * in pieces by means of the generic memory manager code. +*/ + +#include <unistd.h> +#include <sys/mman.h> +#include "pipe/p_thread.h" +#include "util/u_mm.h" + +#define EXEC_HEAP_SIZE (10*1024*1024) + +pipe_static_mutex(exec_mutex); + +static struct mem_block *exec_heap = NULL; +static unsigned char *exec_mem = NULL; + + +static void +init_heap(void) +{ + if (!exec_heap) + exec_heap = mmInit( 0, EXEC_HEAP_SIZE ); + + if (!exec_mem) + exec_mem = (unsigned char *) mmap(0, EXEC_HEAP_SIZE, + PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +} + + +void * +rtasm_exec_malloc(size_t size) +{ + struct mem_block *block = NULL; + void *addr = NULL; + + pipe_mutex_lock(exec_mutex); + + init_heap(); + + if (exec_heap) { + size = (size + 31) & ~31; + block = mmAllocMem( exec_heap, size, 32, 0 ); + } + + if (block) + addr = exec_mem + block->ofs; + else + debug_printf("rtasm_exec_malloc failed\n"); + + pipe_mutex_unlock(exec_mutex); + + return addr; +} + + +void +rtasm_exec_free(void *addr) +{ + pipe_mutex_lock(exec_mutex); + + if (exec_heap) { + struct mem_block *block = mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); + + if (block) + mmFreeMem(block); + } + + pipe_mutex_unlock(exec_mutex); +} + + +#else + +/* + * Just use regular memory. + */ + +void * +rtasm_exec_malloc(size_t size) +{ + return MALLOC( size ); +} + + +void +rtasm_exec_free(void *addr) +{ + FREE(addr); +} + + +#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.h b/src/gallium/auxiliary/rtasm/rtasm_execmem.h new file mode 100644 index 0000000000..155c6d34e0 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file exemem.c + * Functions for allocating executable memory. + * + * \author Keith Whitwell + */ + +#ifndef _RTASM_EXECMEM_H_ +#define _RTASM_EXECMEM_H_ + +#include "pipe/p_compiler.h" + + +extern void * +rtasm_exec_malloc( size_t size ); + + +extern void +rtasm_exec_free( void *addr ); + + +#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c new file mode 100644 index 0000000000..7dd8263749 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -0,0 +1,924 @@ +/************************************************************************** + * + * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * PPC code generation. + * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf + * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf + * + * Other PPC refs: + * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2 + * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html + * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf + * + * \author Brian Paul + */ + + +#include <stdio.h> +#include "util/u_memory.h" +#include "pipe/p_debug.h" +#include "rtasm_ppc.h" + + +void +ppc_init_func(struct ppc_function *p, unsigned max_inst) +{ + uint i; + + p->store = align_malloc(max_inst * PPC_INST_SIZE, 16); + p->num_inst = 0; + p->max_inst = max_inst; + p->reg_used = 0x0; + p->fp_used = 0x0; + p->vec_used = 0x0; + + /* only allow using gp registers 3..12 for now */ + for (i = 0; i < 3; i++) + ppc_reserve_register(p, i); + for (i = 12; i < PPC_NUM_REGS; i++) + ppc_reserve_register(p, i); +} + + +void +ppc_release_func(struct ppc_function *p) +{ + assert(p->num_inst <= p->max_inst); + if (p->store != NULL) { + align_free(p->store); + } + p->store = NULL; +} + + +void (*ppc_get_func(struct ppc_function *p))(void) +{ +#if 0 + DUMP_END(); + if (DISASSEM && p->store) + debug_printf("disassemble %p %p\n", p->store, p->csr); + + if (p->store == p->error_overflow) + return (void (*)(void)) NULL; + else +#endif + return (void (*)(void)) p->store; +} + + +void +ppc_dump_func(const struct ppc_function *p) +{ + uint i; + for (i = 0; i < p->num_inst; i++) { + debug_printf("%3u: 0x%08x\n", i, p->store[i]); + } +} + + +/** + * Mark a register as being unavailable. + */ +int +ppc_reserve_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_REGS); + p->reg_used |= (1 << reg); + return reg; +} + + +/** + * Allocate a general purpose register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->reg_used & mask) == 0) { + p->reg_used |= mask; + return i; + } + } + return -1; +} + + +/** + * Mark the given general purpose register as "unallocated". + */ +void +ppc_release_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_REGS); + assert(p->reg_used & (1 << reg)); + p->reg_used &= ~(1 << reg); +} + + +/** + * Allocate a floating point register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_fp_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_FP_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->fp_used & mask) == 0) { + p->fp_used |= mask; + return i; + } + } + return -1; +} + + +/** + * Mark the given floating point register as "unallocated". + */ +void +ppc_release_fp_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_FP_REGS); + assert(p->fp_used & (1 << reg)); + p->fp_used &= ~(1 << reg); +} + + +/** + * Allocate a vector register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_vec_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_VEC_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->vec_used & mask) == 0) { + p->vec_used |= mask; + return i; + } + } + return -1; +} + + +/** + * Mark the given vector register as "unallocated". + */ +void +ppc_release_vec_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_VEC_REGS); + assert(p->vec_used & (1 << reg)); + p->vec_used &= ~(1 << reg); +} + + + +union vx_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned op2:11; + } inst; +}; + +static inline void +emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +{ + union vx_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + +union vxr_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned rC:1; + unsigned op2:10; + } inst; +}; + +static inline void +emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +{ + union vxr_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.rC = 0; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + +union va_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned vC:5; + unsigned op2:6; + } inst; +}; + +static inline void +emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) +{ + union va_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.vC = vC; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + +union i_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned li:24; + unsigned aa:1; + unsigned lk:1; + } inst; +}; + +static INLINE void +emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk) +{ + union i_inst inst; + inst.inst.op = op; + inst.inst.li = li; + inst.inst.aa = aa; + inst.inst.lk = lk; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +} + + +union xl_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned bo:5; + unsigned bi:5; + unsigned unused:3; + unsigned bh:2; + unsigned op2:10; + unsigned lk:1; + } inst; +}; + +static INLINE void +emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh, + uint op2, uint lk) +{ + union xl_inst inst; + inst.inst.op = op; + inst.inst.bo = bo; + inst.inst.bi = bi; + inst.inst.unused = 0x0; + inst.inst.bh = bh; + inst.inst.op2 = op2; + inst.inst.lk = lk; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +} + +static INLINE void +dump_xl(const char *name, uint inst) +{ + union xl_inst i; + + i.bits = inst; + debug_printf("%s = 0x%08x\n", name, inst); + debug_printf(" op: %d 0x%x\n", i.inst.op, i.inst.op); + debug_printf(" bo: %d 0x%x\n", i.inst.bo, i.inst.bo); + debug_printf(" bi: %d 0x%x\n", i.inst.bi, i.inst.bi); + debug_printf(" unused: %d 0x%x\n", i.inst.unused, i.inst.unused); + debug_printf(" bh: %d 0x%x\n", i.inst.bh, i.inst.bh); + debug_printf(" op2: %d 0x%x\n", i.inst.op2, i.inst.op2); + debug_printf(" lk: %d 0x%x\n", i.inst.lk, i.inst.lk); +} + + +union x_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vrs:5; + unsigned ra:5; + unsigned rb:5; + unsigned op2:10; + unsigned unused:1; + } inst; +}; + +static INLINE void +emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2) +{ + union x_inst inst; + inst.inst.op = op; + inst.inst.vrs = vrs; + inst.inst.ra = ra; + inst.inst.rb = rb; + inst.inst.op2 = op2; + inst.inst.unused = 0x0; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +} + + +union d_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned rt:5; + unsigned ra:5; + unsigned si:16; + } inst; +}; + +static inline void +emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) +{ + union d_inst inst; + assert(si >= -32768); + assert(si <= 32767); + inst.inst.op = op; + inst.inst.rt = rt; + inst.inst.ra = ra; + inst.inst.si = (unsigned) (si & 0xffff); + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + +union a_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned frt:5; + unsigned fra:5; + unsigned frb:5; + unsigned unused:5; + unsigned op2:5; + unsigned rc:1; + } inst; +}; + +static inline void +emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, + uint rc) +{ + union a_inst inst; + inst.inst.op = op; + inst.inst.frt = frt; + inst.inst.fra = fra; + inst.inst.frb = frb; + inst.inst.unused = 0x0; + inst.inst.op2 = op2; + inst.inst.rc = rc; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + +union xo_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned rt:5; + unsigned ra:5; + unsigned rb:5; + unsigned oe:1; + unsigned op2:9; + unsigned rc:1; + } inst; +}; + +static INLINE void +emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, + uint op2, uint rc) +{ + union xo_inst inst; + inst.inst.op = op; + inst.inst.rt = rt; + inst.inst.ra = ra; + inst.inst.rb = rb; + inst.inst.oe = oe; + inst.inst.op2 = op2; + inst.inst.rc = rc; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +} + + + + + +/** + ** float vector arithmetic + **/ + +/** vector float add */ +void +ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 10, vD, vA, vB); +} + +/** vector float substract */ +void +ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 74, vD, vA, vB); +} + +/** vector float min */ +void +ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1098, vD, vA, vB); +} + +/** vector float max */ +void +ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1034, vD, vA, vB); +} + +/** vector float mult add: vD = vA * vB + vC */ +void +ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 46, vD, vA, vC, vB); /* note arg order */ +} + +/** vector float compare greater than */ +void +ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 710, vD, vA, vB); +} + +/** vector float compare greater than or equal to */ +void +ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 454, vD, vA, vB); +} + +/** vector float compare equal */ +void +ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 198, vD, vA, vB); +} + +/** vector float 2^x */ +void +ppc_vexptefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 394, vD, 0, vB); +} + +/** vector float log2(x) */ +void +ppc_vlogefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 458, vD, 0, vB); +} + +/** vector float reciprocol */ +void +ppc_vrefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 266, vD, 0, vB); +} + +/** vector float reciprocol sqrt estimate */ +void +ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 330, vD, 0, vB); +} + +/** vector float round to negative infinity */ +void +ppc_vrfim(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 714, vD, 0, vB); +} + +/** vector float round to positive infinity */ +void +ppc_vrfip(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 650, vD, 0, vB); +} + +/** vector float round to nearest int */ +void +ppc_vrfin(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 522, vD, 0, vB); +} + +/** vector float round to int toward zero */ +void +ppc_vrfiz(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 586, vD, 0, vB); +} + +/** vector store: store vR at mem[vA+vB] */ +void +ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB) +{ + emit_x(p, 31, vR, vA, vB, 231); +} + +/** vector load: vR = mem[vA+vB] */ +void +ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB) +{ + emit_x(p, 31, vR, vA, vB, 103); +} + +/** load vector element word: vR = mem_word[ra+rb] */ +void +ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb) +{ + emit_x(p, 31, vr, ra, rb, 71); +} + + + + +/** + ** vector bitwise operations + **/ + +/** vector and */ +void +ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1028, vD, vA, vB); +} + +/** vector and complement */ +void +ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1092, vD, vA, vB); +} + +/** vector or */ +void +ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1156, vD, vA, vB); +} + +/** vector nor */ +void +ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1284, vD, vA, vB); +} + +/** vector xor */ +void +ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1220, vD, vA, vB); +} + +/** Pseudo-instruction: vector move */ +void +ppc_vmove(struct ppc_function *p, uint vD, uint vA) +{ + ppc_vor(p, vD, vA, vA); +} + +/** Set vector register to {0,0,0,0} */ +void +ppc_vzero(struct ppc_function *p, uint vr) +{ + ppc_vxor(p, vr, vr, vr); +} + + + + +/** + ** Vector shuffle / select / splat / etc + **/ + +/** vector permute */ +void +ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 43, vD, vA, vB, vC); +} + +/** vector select */ +void +ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 42, vD, vA, vB, vC); +} + +/** vector splat byte */ +void +ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 42, vD, imm, vB); +} + +/** vector splat half word */ +void +ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 588, vD, imm, vB); +} + +/** vector splat word */ +void +ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 652, vD, imm, vB); +} + +/** vector splat signed immediate word */ +void +ppc_vspltisw(struct ppc_function *p, uint vD, int imm) +{ + assert(imm >= -16); + assert(imm < 15); + emit_vx(p, 908, vD, imm, 0); +} + +/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ +void +ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 388, vD, vA, vB); +} + + + + +/** + ** integer arithmetic + **/ + +/** rt = ra + imm */ +void +ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 14, rt, ra, imm); +} + +/** rt = ra + (imm << 16) */ +void +ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 15, rt, ra, imm); +} + +/** rt = ra + rb */ +void +ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_xo(p, 31, rt, ra, rb, 0, 266, 0); +} + +/** rt = ra AND ra */ +void +ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 28); /* note argument order */ +} + +/** rt = ra AND imm */ +void +ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 28, ra, rt, imm); /* note argument order */ +} + +/** rt = ra OR ra */ +void +ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 444); /* note argument order */ +} + +/** rt = ra OR imm */ +void +ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 24, ra, rt, imm); /* note argument order */ +} + +/** rt = ra XOR ra */ +void +ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 316); /* note argument order */ +} + +/** rt = ra XOR imm */ +void +ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 26, ra, rt, imm); /* note argument order */ +} + +/** pseudo instruction: move: rt = ra */ +void +ppc_mr(struct ppc_function *p, uint rt, uint ra) +{ + ppc_or(p, rt, ra, ra); +} + +/** pseudo instruction: load immediate: rt = imm */ +void +ppc_li(struct ppc_function *p, uint rt, int imm) +{ + ppc_addi(p, rt, 0, imm); +} + +/** rt = imm << 16 */ +void +ppc_lis(struct ppc_function *p, uint rt, int imm) +{ + ppc_addis(p, rt, 0, imm); +} + +/** rt = imm */ +void +ppc_load_int(struct ppc_function *p, uint rt, int imm) +{ + ppc_lis(p, rt, (imm >> 16)); /* rt = imm >> 16 */ + ppc_ori(p, rt, rt, (imm & 0xffff)); /* rt = rt | (imm & 0xffff) */ +} + + + + +/** + ** integer load/store + **/ + +/** store rs at memory[(ra)+d], + * then update ra = (ra)+d + */ +void +ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d) +{ + emit_d(p, 37, rs, ra, d); +} + +/** store rs at memory[(ra)+d] */ +void +ppc_stw(struct ppc_function *p, uint rs, uint ra, int d) +{ + emit_d(p, 36, rs, ra, d); +} + +/** Load rt = mem[(ra)+d]; then zero set high 32 bits to zero. */ +void +ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d) +{ + emit_d(p, 32, rt, ra, d); +} + + + +/** + ** Float (non-vector) arithmetic + **/ + +/** add: frt = fra + frb */ +void +ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb) +{ + emit_a(p, 63, frt, fra, frb, 21, 0); +} + +/** sub: frt = fra - frb */ +void +ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb) +{ + emit_a(p, 63, frt, fra, frb, 20, 0); +} + +/** convert to int: rt = (int) ra */ +void +ppc_fctiwz(struct ppc_function *p, uint rt, uint fra) +{ + emit_x(p, 63, rt, 0, fra, 15); +} + +/** store frs at mem[(ra)+offset] */ +void +ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset) +{ + emit_d(p, 52, frs, ra, offset); +} + +/** store frs at mem[(ra)+(rb)] */ +void +ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb) +{ + emit_x(p, 31, frs, ra, rb, 983); +} + +/** load frt = mem[(ra)+offset] */ +void +ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset) +{ + emit_d(p, 48, frt, ra, offset); +} + + + + + +/** + ** branch instructions + **/ + +/** BLR: Branch to link register (p. 35) */ +void +ppc_blr(struct ppc_function *p) +{ + emit_i(p, 18, 0, 0, 1); +} + +/** Branch Conditional to Link Register (p. 36) */ +void +ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg) +{ + emit_xl(p, 19, condOp, condReg, branchHint, 16, 0); +} + +/** Pseudo instruction: return from subroutine */ +void +ppc_return(struct ppc_function *p) +{ + ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0); +} diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h new file mode 100644 index 0000000000..f938d8d759 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -0,0 +1,324 @@ +/************************************************************************** + * + * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * PPC code generation. + * \author Brian Paul + */ + + +#ifndef RTASM_PPC_H +#define RTASM_PPC_H + + +#include "pipe/p_compiler.h" + + +#define PPC_INST_SIZE 4 /**< 4 bytes / instruction */ + +#define PPC_NUM_REGS 32 +#define PPC_NUM_FP_REGS 32 +#define PPC_NUM_VEC_REGS 32 + +/** Stack pointer register */ +#define PPC_REG_SP 1 + +/** Branch conditions */ +#define BRANCH_COND_ALWAYS 0x14 /* binary 1z1zz (z=ignored) */ + +/** Branch hints */ +#define BRANCH_HINT_SUB_RETURN 0x0 /* binary 00 */ + + +struct ppc_function +{ + uint32_t *store; /**< instruction buffer */ + uint num_inst; + uint max_inst; + uint32_t reg_used; /** used/free general-purpose registers bitmask */ + uint32_t fp_used; /** used/free floating point registers bitmask */ + uint32_t vec_used; /** used/free vector registers bitmask */ +}; + + + +extern void ppc_init_func(struct ppc_function *p, unsigned max_inst); +extern void ppc_release_func(struct ppc_function *p); +extern void (*ppc_get_func( struct ppc_function *p ))( void ); +extern void ppc_dump_func(const struct ppc_function *p); + +extern int ppc_reserve_register(struct ppc_function *p, int reg); +extern int ppc_allocate_register(struct ppc_function *p); +extern void ppc_release_register(struct ppc_function *p, int reg); +extern int ppc_allocate_fp_register(struct ppc_function *p); +extern void ppc_release_fp_register(struct ppc_function *p, int reg); +extern int ppc_allocate_vec_register(struct ppc_function *p); +extern void ppc_release_vec_register(struct ppc_function *p, int reg); + + + +/** + ** float vector arithmetic + **/ + +/** vector float add */ +extern void +ppc_vaddfp(struct ppc_function *p,uint vD, uint vA, uint vB); + +/** vector float substract */ +extern void +ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float min */ +extern void +ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float max */ +extern void +ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float mult add */ +extern void +ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector float compare greater than */ +extern void +ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float compare greater than or equal to */ +extern void +ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float compare equal */ +extern void +ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float 2^x */ +extern void +ppc_vexptefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float log2(x) */ +extern void +ppc_vlogefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float reciprocol */ +extern void +ppc_vrefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float reciprocol sqrt estimate */ +extern void +ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to negative infinity */ +extern void +ppc_vrfim(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to positive infinity */ +extern void +ppc_vrfip(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to nearest int */ +extern void +ppc_vrfin(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to int toward zero */ +extern void +ppc_vrfiz(struct ppc_function *p, uint vD, uint vB); + + +/** vector store: store vR at mem[vA+vB] */ +extern void +ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB); + +/** vector load: vR = mem[vA+vB] */ +extern void +ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB); + +/** load vector element word: vR = mem_word[vA+vB] */ +extern void +ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB); + + + +/** + ** vector bitwise operations + **/ + + +/** vector and */ +extern void +ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector and complement */ +extern void +ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector or */ +extern void +ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector nor */ +extern void +ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector xor */ +extern void +ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** Pseudo-instruction: vector move */ +extern void +ppc_vmove(struct ppc_function *p, uint vD, uint vA); + +/** Set vector register to {0,0,0,0} */ +extern void +ppc_vzero(struct ppc_function *p, uint vr); + + + +/** + ** Vector shuffle / select / splat / etc + **/ + +/** vector permute */ +extern void +ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector select */ +extern void +ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector splat byte */ +extern void +ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat half word */ +extern void +ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat word */ +extern void +ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat signed immediate word */ +extern void +ppc_vspltisw(struct ppc_function *p, uint vD, int imm); + +/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ +extern void +ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB); + + + +/** + ** scalar arithmetic + **/ + +extern void +ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_mr(struct ppc_function *p, uint rt, uint ra); + +extern void +ppc_li(struct ppc_function *p, uint rt, int imm); + +extern void +ppc_lis(struct ppc_function *p, uint rt, int imm); + +extern void +ppc_load_int(struct ppc_function *p, uint rt, int imm); + + + +/** + ** scalar load/store + **/ + +extern void +ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d); + +extern void +ppc_stw(struct ppc_function *p, uint rs, uint ra, int d); + +extern void +ppc_lwz(struct ppc_function *p, uint rs, uint ra, int d); + + + +/** + ** Float (non-vector) arithmetic + **/ + +extern void +ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb); + +extern void +ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb); + +extern void +ppc_fctiwz(struct ppc_function *p, uint rt, uint ra); + +extern void +ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset); + +extern void +ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb); + + + +/** + ** branch instructions + **/ + +extern void +ppc_blr(struct ppc_function *p); + +void +ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg); + +extern void +ppc_return(struct ppc_function *p); + + +#endif /* RTASM_PPC_H */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c new file mode 100644 index 0000000000..dea1aed032 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -0,0 +1,1035 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Real-time assembly generation interface for Cell B.E. SPEs. + * + * \author Ian Romanick <idr@us.ibm.com> + * \author Brian Paul + */ + + +#include <stdio.h> +#include "pipe/p_compiler.h" +#include "util/u_memory.h" +#include "rtasm_ppc_spe.h" + + +#ifdef GALLIUM_CELL +/** + * SPE instruction types + * + * There are 6 primary instruction encodings used on the Cell's SPEs. Each of + * the following unions encodes one type. + * + * \bug + * If, at some point, we start generating SPE code from a little-endian host + * these unions will not work. + */ +/*@{*/ +/** + * Encode one output register with two input registers + */ +union spe_inst_RR { + uint32_t bits; + struct { + unsigned op:11; + unsigned rB:7; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with three input registers + */ +union spe_inst_RRR { + uint32_t bits; + struct { + unsigned op:4; + unsigned rT:7; + unsigned rB:7; + unsigned rA:7; + unsigned rC:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and a 7-bit signed immed + */ +union spe_inst_RI7 { + uint32_t bits; + struct { + unsigned op:11; + unsigned i7:7; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and an 8-bit signed immed + */ +union spe_inst_RI8 { + uint32_t bits; + struct { + unsigned op:10; + unsigned i8:8; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and a 10-bit signed immed + */ +union spe_inst_RI10 { + uint32_t bits; + struct { + unsigned op:8; + unsigned i10:10; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with a 16-bit signed immediate + */ +union spe_inst_RI16 { + uint32_t bits; + struct { + unsigned op:9; + unsigned i16:16; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with a 18-bit signed immediate + */ +union spe_inst_RI18 { + uint32_t bits; + struct { + unsigned op:7; + unsigned i18:18; + unsigned rT:7; + } inst; +}; +/*@}*/ + + +static void +indent(const struct spe_function *p) +{ + int i; + for (i = 0; i < p->indent; i++) { + putchar(' '); + } +} + + +static const char * +rem_prefix(const char *longname) +{ + return longname + 4; +} + + +static const char * +reg_name(int reg) +{ + switch (reg) { + case SPE_REG_SP: + return "$sp"; + case SPE_REG_RA: + return "$lr"; + default: + { + /* cycle through four buffers to handle multiple calls per printf */ + static char buf[4][10]; + static int b = 0; + b = (b + 1) % 4; + sprintf(buf[b], "$%d", reg); + return buf[b]; + } + } +} + + +static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, unsigned rB, const char *name) +{ + union spe_inst_RR inst; + inst.inst.op = op; + inst.inst.rB = rB; + inst.inst.rA = rA; + inst.inst.rT = rT; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\t%s, %s, %s\n", + rem_prefix(name), reg_name(rT), reg_name(rA), reg_name(rB)); + } +} + + +static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, unsigned rB, unsigned rC, const char *name) +{ + union spe_inst_RRR inst; + inst.inst.op = op; + inst.inst.rT = rT; + inst.inst.rB = rB; + inst.inst.rA = rA; + inst.inst.rC = rC; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\t%s, %s, %s, %s\n", rem_prefix(name), reg_name(rT), + reg_name(rA), reg_name(rB), reg_name(rC)); + } +} + + +static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm, const char *name) +{ + union spe_inst_RI7 inst; + inst.inst.op = op; + inst.inst.i7 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\t%s, %s, 0x%x\n", + rem_prefix(name), reg_name(rT), reg_name(rA), imm); + } +} + + + +static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm, const char *name) +{ + union spe_inst_RI8 inst; + inst.inst.op = op; + inst.inst.i8 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\t%s, %s, 0x%x\n", + rem_prefix(name), reg_name(rT), reg_name(rA), imm); + } +} + + + +static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm, const char *name) +{ + union spe_inst_RI10 inst; + inst.inst.op = op; + inst.inst.i10 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\t%s, %s, 0x%x\n", + rem_prefix(name), reg_name(rT), reg_name(rA), imm); + } +} + + +/** As above, but do range checking on signed immediate value */ +static void emit_RI10s(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm, const char *name) +{ + assert(imm <= 511); + assert(imm >= -512); + emit_RI10(p, op, rT, rA, imm, name); +} + + +static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, + int imm, const char *name) +{ + union spe_inst_RI16 inst; + inst.inst.op = op; + inst.inst.i16 = imm; + inst.inst.rT = rT; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); + } +} + + +static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, + int imm, const char *name) +{ + union spe_inst_RI18 inst; + inst.inst.op = op; + inst.inst.i18 = imm; + inst.inst.rT = rT; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); + } +} + + + + +#define EMIT_(_name, _op) \ +void _name (struct spe_function *p, unsigned rT) \ +{ \ + emit_RR(p, _op, rT, 0, 0, __FUNCTION__); \ +} + +#define EMIT_R(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA) \ +{ \ + emit_RR(p, _op, rT, rA, 0, __FUNCTION__); \ +} + +#define EMIT_RR(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) \ +{ \ + emit_RR(p, _op, rT, rA, rB, __FUNCTION__); \ +} + +#define EMIT_RRR(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB, unsigned rC) \ +{ \ + emit_RRR(p, _op, rT, rA, rB, rC, __FUNCTION__); \ +} + +#define EMIT_RI7(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI7(p, _op, rT, rA, imm, __FUNCTION__); \ +} + +#define EMIT_RI8(_name, _op, bias) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI8(p, _op, rT, rA, bias - imm, __FUNCTION__); \ +} + +#define EMIT_RI10(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI10(p, _op, rT, rA, imm, __FUNCTION__); \ +} + +#define EMIT_RI10s(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI10s(p, _op, rT, rA, imm, __FUNCTION__); \ +} + +#define EMIT_RI16(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, int imm) \ +{ \ + emit_RI16(p, _op, rT, imm, __FUNCTION__); \ +} + +#define EMIT_RI18(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, int imm) \ +{ \ + emit_RI18(p, _op, rT, imm, __FUNCTION__); \ +} + +#define EMIT_I16(_name, _op) \ +void _name (struct spe_function *p, int imm) \ +{ \ + emit_RI16(p, _op, 0, imm, __FUNCTION__); \ +} + +#include "rtasm_ppc_spe.h" + + + +/** + * Initialize an spe_function. + * \param code_size size of instruction buffer to allocate, in bytes. + */ +void spe_init_func(struct spe_function *p, unsigned code_size) +{ + unsigned int i; + + p->store = align_malloc(code_size, 16); + p->num_inst = 0; + p->max_inst = code_size / SPE_INST_SIZE; + + p->set_count = 0; + memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0])); + + /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. + */ + p->regs[0] = p->regs[1] = p->regs[2] = 1; + for (i = 80; i <= 127; i++) { + p->regs[i] = 1; + } + + p->print = false; + p->indent = 0; +} + + +void spe_release_func(struct spe_function *p) +{ + assert(p->num_inst <= p->max_inst); + if (p->store != NULL) { + align_free(p->store); + } + p->store = NULL; +} + + +/** Return current code size in bytes. */ +unsigned spe_code_size(const struct spe_function *p) +{ + return p->num_inst * SPE_INST_SIZE; +} + + +/** + * Allocate a SPE register. + * \return register index or -1 if none left. + */ +int spe_allocate_available_register(struct spe_function *p) +{ + unsigned i; + for (i = 0; i < SPE_NUM_REGS; i++) { + if (p->regs[i] == 0) { + p->regs[i] = 1; + return i; + } + } + + return -1; +} + + +/** + * Mark the given SPE register as "allocated". + */ +int spe_allocate_register(struct spe_function *p, int reg) +{ + assert(reg < SPE_NUM_REGS); + assert(p->regs[reg] == 0); + p->regs[reg] = 1; + return reg; +} + + +/** + * Mark the given SPE register as "unallocated". Note that this should + * only be used on registers allocated in the current register set; an + * assertion will fail if an attempt is made to deallocate a register + * allocated in an earlier register set. + */ +void spe_release_register(struct spe_function *p, int reg) +{ + assert(reg < SPE_NUM_REGS); + assert(p->regs[reg] == 1); + + p->regs[reg] = 0; +} + +/** + * Start a new set of registers. This can be called if + * it will be difficult later to determine exactly what + * registers were actually allocated during a code generation + * sequence, and you really just want to deallocate all of them. + */ +void spe_allocate_register_set(struct spe_function *p) +{ + unsigned int i; + + /* Keep track of the set count. If it ever wraps around to 0, + * we're in trouble. + */ + p->set_count++; + assert(p->set_count > 0); + + /* Increment the allocation count of all registers currently + * allocated. Then any registers that are allocated in this set + * will be the only ones with a count of 1; they'll all be released + * when the register set is released. + */ + for (i = 0; i < SPE_NUM_REGS; i++) { + if (p->regs[i] > 0) + p->regs[i]++; + } +} + +void spe_release_register_set(struct spe_function *p) +{ + unsigned int i; + + /* If the set count drops below zero, we're in trouble. */ + assert(p->set_count > 0); + p->set_count--; + + /* Drop the allocation level of all registers. Any allocated + * during this register set will drop to 0 and then become + * available. + */ + for (i = 0; i < SPE_NUM_REGS; i++) { + if (p->regs[i] > 0) + p->regs[i]--; + } +} + + +unsigned +spe_get_registers_used(const struct spe_function *p, ubyte used[]) +{ + unsigned i, num = 0; + /* only count registers in the range available to callers */ + for (i = 2; i < 80; i++) { + if (p->regs[i]) { + used[num++] = i; + } + } + return num; +} + + +void +spe_print_code(struct spe_function *p, boolean enable) +{ + p->print = enable; +} + + +void +spe_indent(struct spe_function *p, int spaces) +{ + p->indent += spaces; +} + + +void +spe_comment(struct spe_function *p, int rel_indent, const char *s) +{ + if (p->print) { + p->indent += rel_indent; + indent(p); + p->indent -= rel_indent; + printf("# %s\n", s); + } +} + + +/** + * Load quad word. + * NOTE: offset is in bytes and the least significant 4 bits must be zero! + */ +void spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) +{ + const boolean pSave = p->print; + + /* offset must be a multiple of 16 */ + assert(offset % 16 == 0); + /* offset must fit in 10-bit signed int field, after shifting */ + assert((offset >> 4) <= 511); + assert((offset >> 4) >= -512); + + p->print = FALSE; + emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd"); + p->print = pSave; + + if (p->print) { + indent(p); + printf("lqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); + } +} + + +/** + * Store quad word. + * NOTE: offset is in bytes and the least significant 4 bits must be zero! + */ +void spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) +{ + const boolean pSave = p->print; + + /* offset must be a multiple of 16 */ + assert(offset % 16 == 0); + /* offset must fit in 10-bit signed int field, after shifting */ + assert((offset >> 4) <= 511); + assert((offset >> 4) >= -512); + + p->print = FALSE; + emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd"); + p->print = pSave; + + if (p->print) { + indent(p); + printf("stqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); + } +} + + +/** + * For branch instructions: + * \param d if 1, disable interupts if branch is taken + * \param e if 1, enable interupts if branch is taken + * If d and e are both zero, don't change interupt status (right?) + */ + +/** Branch Indirect to address in rA */ +void spe_bi(struct spe_function *p, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4), __FUNCTION__); +} + +/** Interupt Return */ +void spe_iret(struct spe_function *p, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4), __FUNCTION__); +} + +/** Branch indirect and set link on external data */ +void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d, + int e) +{ + emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4), __FUNCTION__); +} + +/** Branch indirect and set link. Save PC in rT, jump to rA. */ +void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d, + int e) +{ + emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4), __FUNCTION__); +} + +/** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */ +void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4), __FUNCTION__); +} + +/** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */ +void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4), __FUNCTION__); +} + +/** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */ +void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4), __FUNCTION__); +} + +/** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */ +void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4), __FUNCTION__); +} + + +/* Hint-for-branch instructions + */ +#if 0 +hbr; +hbra; +hbrr; +#endif + + +/* Control instructions + */ +#if 0 +stop; +EMIT_RR (spe_stopd, 0x140); +EMIT_ (spe_lnop, 0x001); +EMIT_ (spe_nop, 0x201); +sync; +EMIT_ (spe_dsync, 0x003); +EMIT_R (spe_mfspr, 0x00c); +EMIT_R (spe_mtspr, 0x10c); +#endif + + +/** + ** Helper / "macro" instructions. + ** Use somewhat verbose names as a reminder that these aren't native + ** SPE instructions. + **/ + + +void +spe_load_float(struct spe_function *p, unsigned rT, float x) +{ + if (x == 0.0f) { + spe_il(p, rT, 0x0); + } + else if (x == 0.5f) { + spe_ilhu(p, rT, 0x3f00); + } + else if (x == 1.0f) { + spe_ilhu(p, rT, 0x3f80); + } + else if (x == -1.0f) { + spe_ilhu(p, rT, 0xbf80); + } + else { + union { + float f; + unsigned u; + } bits; + bits.f = x; + spe_ilhu(p, rT, bits.u >> 16); + spe_iohl(p, rT, bits.u & 0xffff); + } +} + + +void +spe_load_int(struct spe_function *p, unsigned rT, int i) +{ + if (-32768 <= i && i <= 32767) { + spe_il(p, rT, i); + } + else { + spe_ilhu(p, rT, i >> 16); + if (i & 0xffff) + spe_iohl(p, rT, i & 0xffff); + } +} + +void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) +{ + /* If the whole value is in the lower 18 bits, use ila, which + * doesn't sign-extend. Otherwise, if the two halfwords of + * the constant are identical, use ilh. Otherwise, if every byte of + * the desired value is 0x00 or 0xff, we can use Form Select Mask for + * Bytes Immediate (fsmbi) to load the value in a single instruction. + * Otherwise, in the general case, we have to use ilhu followed by iohl. + */ + if ((ui & 0x0003ffff) == ui) { + spe_ila(p, rT, ui); + } + else if ((ui >> 16) == (ui & 0xffff)) { + spe_ilh(p, rT, ui & 0xffff); + } + else if ( + ((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) && + ((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) && + ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) && + ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000) + ) { + unsigned int mask = 0; + /* fsmbi duplicates each bit in the given mask eight times, + * using a 16-bit value to initialize a 16-byte quadword. + * Each 4-bit nybble of the mask corresponds to a full word + * of the result; look at the value and figure out the mask + * (replicated for each word in the quadword), and then + * form the "select mask" to get the value. + */ + if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111; + if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222; + if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444; + if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888; + spe_fsmbi(p, rT, mask); + } + else { + /* The general case: this usually uses two instructions, but + * may use only one if the low-order 16 bits of each word are 0. + */ + spe_ilhu(p, rT, ui >> 16); + if (ui & 0xffff) + spe_iohl(p, rT, ui & 0xffff); + } +} + +/** + * This function is constructed identically to spe_xor_uint() below. + * Changes to one should be made in the other. + */ +void +spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If we can, emit a single instruction, either And Byte Immediate + * (which uses the same constant across each byte), And Halfword Immediate + * (which sign-extends a 10-bit immediate to 16 bits and uses that + * across each halfword), or And Word Immediate (which sign-extends + * a 10-bit immediate to 32 bits). + * + * Otherwise, we'll need to use a temporary register. + */ + unsigned int tmp; + + /* If the upper 23 bits are all 0s or all 1s, sign extension + * will work and we can use And Word Immediate + */ + tmp = ui & 0xfffffe00; + if (tmp == 0xfffffe00 || tmp == 0) { + spe_andi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric along halfword boundaries and + * the upper 7 bits of each halfword are all 0s or 1s, we + * can use And Halfword Immediate + */ + tmp = ui & 0xfe00fe00; + if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { + spe_andhi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric in each byte, then we can use + * the And Byte Immediate instruction. + */ + tmp = ui & 0x000000ff; + if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { + spe_andbi(p, rT, rA, tmp); + return; + } + + /* Otherwise, we'll have to use a temporary register. */ + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_and(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); +} + + +/** + * This function is constructed identically to spe_and_uint() above. + * Changes to one should be made in the other. + */ +void +spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If we can, emit a single instruction, either Exclusive Or Byte + * Immediate (which uses the same constant across each byte), Exclusive + * Or Halfword Immediate (which sign-extends a 10-bit immediate to + * 16 bits and uses that across each halfword), or Exclusive Or Word + * Immediate (which sign-extends a 10-bit immediate to 32 bits). + * + * Otherwise, we'll need to use a temporary register. + */ + unsigned int tmp; + + /* If the upper 23 bits are all 0s or all 1s, sign extension + * will work and we can use Exclusive Or Word Immediate + */ + tmp = ui & 0xfffffe00; + if (tmp == 0xfffffe00 || tmp == 0) { + spe_xori(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric along halfword boundaries and + * the upper 7 bits of each halfword are all 0s or 1s, we + * can use Exclusive Or Halfword Immediate + */ + tmp = ui & 0xfe00fe00; + if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { + spe_xorhi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric in each byte, then we can use + * the Exclusive Or Byte Immediate instruction. + */ + tmp = ui & 0x000000ff; + if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { + spe_xorbi(p, rT, rA, tmp); + return; + } + + /* Otherwise, we'll have to use a temporary register. */ + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_xor(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); +} + +void +spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If the comparison value is 9 bits or less, it fits inside a + * Compare Equal Word Immediate instruction. + */ + if ((ui & 0x000001ff) == ui) { + spe_ceqi(p, rT, rA, ui); + } + /* Otherwise, we're going to have to load a word first. */ + else { + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_ceq(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); + } +} + +void +spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If the comparison value is 10 bits or less, it fits inside a + * Compare Logical Greater Than Word Immediate instruction. + */ + if ((ui & 0x000003ff) == ui) { + spe_clgti(p, rT, rA, ui); + } + /* Otherwise, we're going to have to load a word first. */ + else { + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_clgt(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); + } +} + +void +spe_splat(struct spe_function *p, unsigned rT, unsigned rA) +{ + /* Duplicate bytes 0, 1, 2, and 3 across the whole register */ + spe_ila(p, rT, 0x00010203); + spe_shufb(p, rT, rA, rA, rT); +} + + +void +spe_complement(struct spe_function *p, unsigned rT, unsigned rA) +{ + spe_nor(p, rT, rA, rA); +} + + +void +spe_move(struct spe_function *p, unsigned rT, unsigned rA) +{ + /* Use different instructions depending on the instruction address + * to take advantage of the dual pipelines. + */ + if (p->num_inst & 1) + spe_shlqbyi(p, rT, rA, 0); /* odd pipe */ + else + spe_ori(p, rT, rA, 0); /* even pipe */ +} + + +void +spe_zero(struct spe_function *p, unsigned rT) +{ + spe_xor(p, rT, rT, rT); +} + + +void +spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word) +{ + assert(word >= 0); + assert(word <= 3); + + if (word == 0) { + int tmp1 = rT; + spe_ila(p, tmp1, 66051); + spe_shufb(p, rT, rA, rA, tmp1); + } + else { + /* XXX review this, we may not need the rotqbyi instruction */ + int tmp1 = rT; + int tmp2 = spe_allocate_available_register(p); + + spe_ila(p, tmp1, 66051); + spe_rotqbyi(p, tmp2, rA, 4 * word); + spe_shufb(p, rT, tmp2, tmp2, tmp1); + + spe_release_register(p, tmp2); + } +} + +/** + * For each 32-bit float element of rA and rB, choose the smaller of the + * two, compositing them into the rT register. + * + * The Float Compare Greater Than (fcgt) instruction will put 1s into + * compare_reg where rA > rB, and 0s where rA <= rB. + * + * Then the Select Bits (selb) instruction will take bits from rA where + * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA + * where rA <= rB and from rB where rB > rA, which is exactly the + * "min" operation. + * + * The compare_reg could in many cases be the same as rT, unless + * rT == rA || rt == rB. But since this is common in constructions + * like "x = min(x, a)", we always allocate a new register to be safe. + */ +void +spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) +{ + unsigned int compare_reg = spe_allocate_available_register(p); + spe_fcgt(p, compare_reg, rA, rB); + spe_selb(p, rT, rA, rB, compare_reg); + spe_release_register(p, compare_reg); +} + +/** + * For each 32-bit float element of rA and rB, choose the greater of the + * two, compositing them into the rT register. + * + * The logic is similar to that of spe_float_min() above; the only + * difference is that the registers on spe_selb() have been reversed, + * so that the larger of the two is selected instead of the smaller. + */ +void +spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) +{ + unsigned int compare_reg = spe_allocate_available_register(p); + spe_fcgt(p, compare_reg, rA, rB); + spe_selb(p, rT, rB, rA, compare_reg); + spe_release_register(p, compare_reg); +} + +#endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h new file mode 100644 index 0000000000..d6a3c02f20 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -0,0 +1,433 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Real-time assembly generation interface for Cell B.E. SPEs. + * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf + * + * \author Ian Romanick <idr@us.ibm.com> + * \author Brian Paul + */ + +#ifndef RTASM_PPC_SPE_H +#define RTASM_PPC_SPE_H + +/** 4 bytes per instruction */ +#define SPE_INST_SIZE 4 + +/** number of general-purpose SIMD registers */ +#define SPE_NUM_REGS 128 + +/** Return Address register (aka $lr / Link Register) */ +#define SPE_REG_RA 0 + +/** Stack Pointer register (aka $sp) */ +#define SPE_REG_SP 1 + + +struct spe_function +{ + uint32_t *store; /**< instruction buffer */ + uint num_inst; + uint max_inst; + + /** + * The "set count" reflects the number of nested register sets + * are allowed. In the unlikely case that we exceed the set count, + * register allocation will start to be confused, which is critical + * enough that we check for it. + */ + unsigned char set_count; + + /** + * Flags for used and unused registers. Each byte corresponds to a + * register; a 0 in that byte means that the register is available. + * A value of 1 means that the register was allocated in the current + * register set. Any other value N means that the register was allocated + * N register sets ago. + * + * \sa + * spe_allocate_register, spe_allocate_available_register, + * spe_allocate_register_set, spe_release_register_set, spe_release_register, + */ + unsigned char regs[SPE_NUM_REGS]; + + boolean print; /**< print/dump instructions as they're emitted? */ + int indent; /**< number of spaces to indent */ +}; + + +extern void spe_init_func(struct spe_function *p, unsigned code_size); +extern void spe_release_func(struct spe_function *p); +extern unsigned spe_code_size(const struct spe_function *p); + +extern int spe_allocate_available_register(struct spe_function *p); +extern int spe_allocate_register(struct spe_function *p, int reg); +extern void spe_release_register(struct spe_function *p, int reg); +extern void spe_allocate_register_set(struct spe_function *p); +extern void spe_release_register_set(struct spe_function *p); + +extern unsigned +spe_get_registers_used(const struct spe_function *p, ubyte used[]); + +extern void spe_print_code(struct spe_function *p, boolean enable); +extern void spe_indent(struct spe_function *p, int spaces); +extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); + + +#endif /* RTASM_PPC_SPE_H */ + +#ifndef EMIT_ +#define EMIT_(name, _op) \ + extern void _name (struct spe_function *p, unsigned rT) +#define EMIT_R(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA) +#define EMIT_RR(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + unsigned rB) +#define EMIT_RRR(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + unsigned rB, unsigned rC) +#define EMIT_RI7(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI8(_name, _op, bias) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI10(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI10s(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI16(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, int imm) +#define EMIT_RI18(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, int imm) +#define EMIT_I16(_name, _op) \ + extern void _name (struct spe_function *p, int imm) +#define UNDEF_EMIT_MACROS +#endif /* EMIT_ */ + + +/* Memory load / store instructions + */ +EMIT_RR (spe_lqx, 0x1c4); +EMIT_RI16(spe_lqa, 0x061); +EMIT_RI16(spe_lqr, 0x067); +EMIT_RR (spe_stqx, 0x144); +EMIT_RI16(spe_stqa, 0x041); +EMIT_RI16(spe_stqr, 0x047); +EMIT_RI7 (spe_cbd, 0x1f4); +EMIT_RR (spe_cbx, 0x1d4); +EMIT_RI7 (spe_chd, 0x1f5); +EMIT_RI7 (spe_chx, 0x1d5); +EMIT_RI7 (spe_cwd, 0x1f6); +EMIT_RI7 (spe_cwx, 0x1d6); +EMIT_RI7 (spe_cdd, 0x1f7); +EMIT_RI7 (spe_cdx, 0x1d7); + + +/* Constant formation instructions + */ +EMIT_RI16(spe_ilh, 0x083); +EMIT_RI16(spe_ilhu, 0x082); +EMIT_RI16(spe_il, 0x081); +EMIT_RI18(spe_ila, 0x021); +EMIT_RI16(spe_iohl, 0x0c1); +EMIT_RI16(spe_fsmbi, 0x065); + + + +/* Integer and logical instructions + */ +EMIT_RR (spe_ah, 0x0c8); +EMIT_RI10(spe_ahi, 0x01d); +EMIT_RR (spe_a, 0x0c0); +EMIT_RI10s(spe_ai, 0x01c); +EMIT_RR (spe_sfh, 0x048); +EMIT_RI10(spe_sfhi, 0x00d); +EMIT_RR (spe_sf, 0x040); +EMIT_RI10(spe_sfi, 0x00c); +EMIT_RR (spe_addx, 0x340); +EMIT_RR (spe_cg, 0x0c2); +EMIT_RR (spe_cgx, 0x342); +EMIT_RR (spe_sfx, 0x341); +EMIT_RR (spe_bg, 0x042); +EMIT_RR (spe_bgx, 0x343); +EMIT_RR (spe_mpy, 0x3c4); +EMIT_RR (spe_mpyu, 0x3cc); +EMIT_RI10(spe_mpyi, 0x074); +EMIT_RI10(spe_mpyui, 0x075); +EMIT_RRR (spe_mpya, 0x00c); +EMIT_RR (spe_mpyh, 0x3c5); +EMIT_RR (spe_mpys, 0x3c7); +EMIT_RR (spe_mpyhh, 0x3c6); +EMIT_RR (spe_mpyhha, 0x346); +EMIT_RR (spe_mpyhhu, 0x3ce); +EMIT_RR (spe_mpyhhau, 0x34e); +EMIT_R (spe_clz, 0x2a5); +EMIT_R (spe_cntb, 0x2b4); +EMIT_R (spe_fsmb, 0x1b6); +EMIT_R (spe_fsmh, 0x1b5); +EMIT_R (spe_fsm, 0x1b4); +EMIT_R (spe_gbb, 0x1b2); +EMIT_R (spe_gbh, 0x1b1); +EMIT_R (spe_gb, 0x1b0); +EMIT_RR (spe_avgb, 0x0d3); +EMIT_RR (spe_absdb, 0x053); +EMIT_RR (spe_sumb, 0x253); +EMIT_R (spe_xsbh, 0x2b6); +EMIT_R (spe_xshw, 0x2ae); +EMIT_R (spe_xswd, 0x2a6); +EMIT_RR (spe_and, 0x0c1); +EMIT_RR (spe_andc, 0x2c1); +EMIT_RI10s(spe_andbi, 0x016); +EMIT_RI10s(spe_andhi, 0x015); +EMIT_RI10s(spe_andi, 0x014); +EMIT_RR (spe_or, 0x041); +EMIT_RR (spe_orc, 0x2c9); +EMIT_RI10s(spe_orbi, 0x006); +EMIT_RI10s(spe_orhi, 0x005); +EMIT_RI10s(spe_ori, 0x004); +EMIT_R (spe_orx, 0x1f0); +EMIT_RR (spe_xor, 0x241); +EMIT_RI10s(spe_xorbi, 0x026); +EMIT_RI10s(spe_xorhi, 0x025); +EMIT_RI10s(spe_xori, 0x024); +EMIT_RR (spe_nand, 0x0c9); +EMIT_RR (spe_nor, 0x049); +EMIT_RR (spe_eqv, 0x249); +EMIT_RRR (spe_selb, 0x008); +EMIT_RRR (spe_shufb, 0x00b); + + +/* Shift and rotate instructions + */ +EMIT_RR (spe_shlh, 0x05f); +EMIT_RI7 (spe_shlhi, 0x07f); +EMIT_RR (spe_shl, 0x05b); +EMIT_RI7 (spe_shli, 0x07b); +EMIT_RR (spe_shlqbi, 0x1db); +EMIT_RI7 (spe_shlqbii, 0x1fb); +EMIT_RR (spe_shlqby, 0x1df); +EMIT_RI7 (spe_shlqbyi, 0x1ff); +EMIT_RR (spe_shlqbybi, 0x1cf); +EMIT_RR (spe_roth, 0x05c); +EMIT_RI7 (spe_rothi, 0x07c); +EMIT_RR (spe_rot, 0x058); +EMIT_RI7 (spe_roti, 0x078); +EMIT_RR (spe_rotqby, 0x1dc); +EMIT_RI7 (spe_rotqbyi, 0x1fc); +EMIT_RR (spe_rotqbybi, 0x1cc); +EMIT_RR (spe_rotqbi, 0x1d8); +EMIT_RI7 (spe_rotqbii, 0x1f8); +EMIT_RR (spe_rothm, 0x05d); +EMIT_RI7 (spe_rothmi, 0x07d); +EMIT_RR (spe_rotm, 0x059); +EMIT_RI7 (spe_rotmi, 0x079); +EMIT_RR (spe_rotqmby, 0x1dd); +EMIT_RI7 (spe_rotqmbyi, 0x1fd); +EMIT_RR (spe_rotqmbybi, 0x1cd); +EMIT_RR (spe_rotqmbi, 0x1c9); +EMIT_RI7 (spe_rotqmbii, 0x1f9); +EMIT_RR (spe_rotmah, 0x05e); +EMIT_RI7 (spe_rotmahi, 0x07e); +EMIT_RR (spe_rotma, 0x05a); +EMIT_RI7 (spe_rotmai, 0x07a); + + +/* Compare, branch, and halt instructions + */ +EMIT_RR (spe_heq, 0x3d8); +EMIT_RI10(spe_heqi, 0x07f); +EMIT_RR (spe_hgt, 0x258); +EMIT_RI10(spe_hgti, 0x04f); +EMIT_RR (spe_hlgt, 0x2d8); +EMIT_RI10(spe_hlgti, 0x05f); +EMIT_RR (spe_ceqb, 0x3d0); +EMIT_RI10(spe_ceqbi, 0x07e); +EMIT_RR (spe_ceqh, 0x3c8); +EMIT_RI10(spe_ceqhi, 0x07d); +EMIT_RR (spe_ceq, 0x3c0); +EMIT_RI10(spe_ceqi, 0x07c); +EMIT_RR (spe_cgtb, 0x250); +EMIT_RI10(spe_cgtbi, 0x04e); +EMIT_RR (spe_cgth, 0x248); +EMIT_RI10(spe_cgthi, 0x04d); +EMIT_RR (spe_cgt, 0x240); +EMIT_RI10(spe_cgti, 0x04c); +EMIT_RR (spe_clgtb, 0x2d0); +EMIT_RI10(spe_clgtbi, 0x05e); +EMIT_RR (spe_clgth, 0x2c8); +EMIT_RI10(spe_clgthi, 0x05d); +EMIT_RR (spe_clgt, 0x2c0); +EMIT_RI10(spe_clgti, 0x05c); +EMIT_I16 (spe_br, 0x064); +EMIT_I16 (spe_bra, 0x060); +EMIT_RI16(spe_brsl, 0x066); +EMIT_RI16(spe_brasl, 0x062); +EMIT_RI16(spe_brnz, 0x042); +EMIT_RI16(spe_brz, 0x040); +EMIT_RI16(spe_brhnz, 0x046); +EMIT_RI16(spe_brhz, 0x044); + +extern void +spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset); + +extern void +spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset); + +extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e); +extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e); +extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); + + +/** Load/splat immediate float into rT. */ +extern void +spe_load_float(struct spe_function *p, unsigned rT, float x); + +/** Load/splat immediate int into rT. */ +extern void +spe_load_int(struct spe_function *p, unsigned rT, int i); + +/** Load/splat immediate unsigned int into rT. */ +extern void +spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui); + +/** And immediate value into rT. */ +extern void +spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + +/** Xor immediate value into rT. */ +extern void +spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + +/** Compare equal with immediate value. */ +extern void +spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + +/** Compare greater with immediate value. */ +extern void +spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + +/** Replicate word 0 of rA across rT. */ +extern void +spe_splat(struct spe_function *p, unsigned rT, unsigned rA); + +/** rT = complement_all_bits(rA). */ +extern void +spe_complement(struct spe_function *p, unsigned rT, unsigned rA); + +/** rT = rA. */ +extern void +spe_move(struct spe_function *p, unsigned rT, unsigned rA); + +/** rT = {0,0,0,0}. */ +extern void +spe_zero(struct spe_function *p, unsigned rT); + +/** rT = splat(rA, word) */ +extern void +spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word); + +/** rT = float min(rA, rB) */ +extern void +spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB); + +/** rT = float max(rA, rB) */ +extern void +spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB); + + +/* Floating-point instructions + */ +EMIT_RR (spe_fa, 0x2c4); +EMIT_RR (spe_dfa, 0x2cc); +EMIT_RR (spe_fs, 0x2c5); +EMIT_RR (spe_dfs, 0x2cd); +EMIT_RR (spe_fm, 0x2c6); +EMIT_RR (spe_dfm, 0x2ce); +EMIT_RRR (spe_fma, 0x00e); +EMIT_RR (spe_dfma, 0x35c); +EMIT_RRR (spe_fnms, 0x00d); +EMIT_RR (spe_dfnms, 0x35e); +EMIT_RRR (spe_fms, 0x00f); +EMIT_RR (spe_dfms, 0x35d); +EMIT_RR (spe_dfnma, 0x35f); +EMIT_R (spe_frest, 0x1b8); +EMIT_R (spe_frsqest, 0x1b9); +EMIT_RR (spe_fi, 0x3d4); +EMIT_RI8 (spe_csflt, 0x1da, 155); +EMIT_RI8 (spe_cflts, 0x1d8, 173); +EMIT_RI8 (spe_cuflt, 0x1db, 155); +EMIT_RI8 (spe_cfltu, 0x1d9, 173); +EMIT_R (spe_frds, 0x3b9); +EMIT_R (spe_fesd, 0x3b8); +EMIT_RR (spe_dfceq, 0x3c3); +EMIT_RR (spe_dfcmeq, 0x3cb); +EMIT_RR (spe_dfcgt, 0x2c3); +EMIT_RR (spe_dfcmgt, 0x2cb); +EMIT_RI7 (spe_dftsv, 0x3bf); +EMIT_RR (spe_fceq, 0x3c2); +EMIT_RR (spe_fcmeq, 0x3ca); +EMIT_RR (spe_fcgt, 0x2c2); +EMIT_RR (spe_fcmgt, 0x2ca); +EMIT_R (spe_fscrwr, 0x3ba); +EMIT_ (spe_fscrrd, 0x398); + + +/* Channel instructions + */ +EMIT_R (spe_rdch, 0x00d); +EMIT_R (spe_rdchcnt, 0x00f); +EMIT_R (spe_wrch, 0x10d); + + +#ifdef UNDEF_EMIT_MACROS +#undef EMIT_ +#undef EMIT_R +#undef EMIT_RR +#undef EMIT_RRR +#undef EMIT_RI7 +#undef EMIT_RI8 +#undef EMIT_RI10 +#undef EMIT_RI10s +#undef EMIT_RI16 +#undef EMIT_RI18 +#undef EMIT_I16 +#undef UNDEF_EMIT_MACROS +#endif /* EMIT_ */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c new file mode 100644 index 0000000000..99ee74cf14 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -0,0 +1,1748 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_X86) + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "util/u_pointer.h" + +#include "rtasm_execmem.h" +#include "rtasm_x86sse.h" + +#define DISASSEM 0 +#define X86_TWOB 0x0f + + +#define DUMP_SSE 0 + + +void x86_print_reg( struct x86_reg reg ) +{ + if (reg.mod != mod_REG) + debug_printf( "[" ); + + switch( reg.file ) { + case file_REG32: + switch( reg.idx ) { + case reg_AX: debug_printf( "EAX" ); break; + case reg_CX: debug_printf( "ECX" ); break; + case reg_DX: debug_printf( "EDX" ); break; + case reg_BX: debug_printf( "EBX" ); break; + case reg_SP: debug_printf( "ESP" ); break; + case reg_BP: debug_printf( "EBP" ); break; + case reg_SI: debug_printf( "ESI" ); break; + case reg_DI: debug_printf( "EDI" ); break; + } + break; + case file_MMX: + debug_printf( "MMX%u", reg.idx ); + break; + case file_XMM: + debug_printf( "XMM%u", reg.idx ); + break; + case file_x87: + debug_printf( "fp%u", reg.idx ); + break; + } + + if (reg.mod == mod_DISP8 || + reg.mod == mod_DISP32) + debug_printf("+%d", reg.disp); + + if (reg.mod != mod_REG) + debug_printf( "]" ); +} + +#if DUMP_SSE + +#define DUMP_START() debug_printf( "\n" ) +#define DUMP_END() debug_printf( "\n" ) + +#define DUMP() do { \ + const char *foo = __FUNCTION__; \ + while (*foo && *foo != '_') \ + foo++; \ + if (*foo) \ + foo++; \ + debug_printf( "\n% 4x% 15s ", p->csr - p->store, foo ); \ +} while (0) + +#define DUMP_I( I ) do { \ + DUMP(); \ + debug_printf( "%u", I ); \ +} while( 0 ) + +#define DUMP_R( R0 ) do { \ + DUMP(); \ + x86_print_reg( R0 ); \ +} while( 0 ) + +#define DUMP_RR( R0, R1 ) do { \ + DUMP(); \ + x86_print_reg( R0 ); \ + debug_printf( ", " ); \ + x86_print_reg( R1 ); \ +} while( 0 ) + +#define DUMP_RI( R0, I ) do { \ + DUMP(); \ + x86_print_reg( R0 ); \ + debug_printf( ", %u", I ); \ +} while( 0 ) + +#define DUMP_RRI( R0, R1, I ) do { \ + DUMP(); \ + x86_print_reg( R0 ); \ + debug_printf( ", " ); \ + x86_print_reg( R1 ); \ + debug_printf( ", %u", I ); \ +} while( 0 ) + +#else + +#define DUMP_START() +#define DUMP_END() +#define DUMP( ) +#define DUMP_I( I ) +#define DUMP_R( R0 ) +#define DUMP_RR( R0, R1 ) +#define DUMP_RI( R0, I ) +#define DUMP_RRI( R0, R1, I ) + +#endif + + +static void do_realloc( struct x86_function *p ) +{ + if (p->store == p->error_overflow) { + p->csr = p->store; + } + else if (p->size == 0) { + p->size = 1024; + p->store = rtasm_exec_malloc(p->size); + p->csr = p->store; + } + else { + uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store ); + unsigned char *tmp = p->store; + p->size *= 2; + p->store = rtasm_exec_malloc(p->size); + + if (p->store) { + memcpy(p->store, tmp, used); + p->csr = p->store + used; + } + else { + p->csr = p->store; + } + + rtasm_exec_free(tmp); + } + + if (p->store == NULL) { + p->store = p->csr = p->error_overflow; + p->size = sizeof(p->error_overflow); + } +} + +/* Emit bytes to the instruction stream: + */ +static unsigned char *reserve( struct x86_function *p, int bytes ) +{ + if (p->csr + bytes - p->store > (int) p->size) + do_realloc(p); + + { + unsigned char *csr = p->csr; + p->csr += bytes; + return csr; + } +} + + + +static void emit_1b( struct x86_function *p, char b0 ) +{ + char *csr = (char *)reserve(p, 1); + *csr = b0; +} + +static void emit_1i( struct x86_function *p, int i0 ) +{ + int *icsr = (int *)reserve(p, sizeof(i0)); + *icsr = i0; +} + +static void emit_1ub( struct x86_function *p, unsigned char b0 ) +{ + unsigned char *csr = reserve(p, 1); + *csr++ = b0; +} + +static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) +{ + unsigned char *csr = reserve(p, 2); + *csr++ = b0; + *csr++ = b1; +} + +static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) +{ + unsigned char *csr = reserve(p, 3); + *csr++ = b0; + *csr++ = b1; + *csr++ = b2; +} + + +/* Build a modRM byte + possible displacement. No treatment of SIB + * indexing. BZZT - no way to encode an absolute address. + * + * This is the "/r" field in the x86 manuals... + */ +static void emit_modrm( struct x86_function *p, + struct x86_reg reg, + struct x86_reg regmem ) +{ + unsigned char val = 0; + + assert(reg.mod == mod_REG); + + val |= regmem.mod << 6; /* mod field */ + val |= reg.idx << 3; /* reg field */ + val |= regmem.idx; /* r/m field */ + + emit_1ub(p, val); + + /* Oh-oh we've stumbled into the SIB thing. + */ + if (regmem.file == file_REG32 && + regmem.idx == reg_SP && + regmem.mod != mod_REG) { + emit_1ub(p, 0x24); /* simplistic! */ + } + + switch (regmem.mod) { + case mod_REG: + case mod_INDIRECT: + break; + case mod_DISP8: + emit_1b(p, (char) regmem.disp); + break; + case mod_DISP32: + emit_1i(p, regmem.disp); + break; + default: + assert(0); + break; + } +} + +/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes. + */ +static void emit_modrm_noreg( struct x86_function *p, + unsigned op, + struct x86_reg regmem ) +{ + struct x86_reg dummy = x86_make_reg(file_REG32, op); + emit_modrm(p, dummy, regmem); +} + +/* Many x86 instructions have two opcodes to cope with the situations + * where the destination is a register or memory reference + * respectively. This function selects the correct opcode based on + * the arguments presented. + */ +static void emit_op_modrm( struct x86_function *p, + unsigned char op_dst_is_reg, + unsigned char op_dst_is_mem, + struct x86_reg dst, + struct x86_reg src ) +{ + switch (dst.mod) { + case mod_REG: + emit_1ub(p, op_dst_is_reg); + emit_modrm(p, dst, src); + break; + case mod_INDIRECT: + case mod_DISP32: + case mod_DISP8: + assert(src.mod == mod_REG); + emit_1ub(p, op_dst_is_mem); + emit_modrm(p, src, dst); + break; + default: + assert(0); + break; + } +} + + + + + + + +/* Create and manipulate registers and regmem values: + */ +struct x86_reg x86_make_reg( enum x86_reg_file file, + enum x86_reg_name idx ) +{ + struct x86_reg reg; + + reg.file = file; + reg.idx = idx; + reg.mod = mod_REG; + reg.disp = 0; + + return reg; +} + +struct x86_reg x86_make_disp( struct x86_reg reg, + int disp ) +{ + assert(reg.file == file_REG32); + + if (reg.mod == mod_REG) + reg.disp = disp; + else + reg.disp += disp; + + if (reg.disp == 0 && reg.idx != reg_BP) + reg.mod = mod_INDIRECT; + else if (reg.disp <= 127 && reg.disp >= -128) + reg.mod = mod_DISP8; + else + reg.mod = mod_DISP32; + + return reg; +} + +struct x86_reg x86_deref( struct x86_reg reg ) +{ + return x86_make_disp(reg, 0); +} + +struct x86_reg x86_get_base_reg( struct x86_reg reg ) +{ + return x86_make_reg( reg.file, reg.idx ); +} + +int x86_get_label( struct x86_function *p ) +{ + return p->csr - p->store; +} + + + +/*********************************************************************** + * x86 instructions + */ + + +void x86_jcc( struct x86_function *p, + enum x86_cc cc, + int label ) +{ + int offset = label - (x86_get_label(p) + 2); + DUMP_I(cc); + + if (offset < 0) { + /*assert(p->csr - p->store > -offset);*/ + if (p->csr - p->store <= -offset) { + /* probably out of memory (using the error_overflow buffer) */ + return; + } + } + + if (offset <= 127 && offset >= -128) { + emit_1ub(p, 0x70 + cc); + emit_1b(p, (char) offset); + } + else { + offset = label - (x86_get_label(p) + 6); + emit_2ub(p, 0x0f, 0x80 + cc); + emit_1i(p, offset); + } +} + +/* Always use a 32bit offset for forward jumps: + */ +int x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ) +{ + DUMP_I(cc); + emit_2ub(p, 0x0f, 0x80 + cc); + emit_1i(p, 0); + return x86_get_label(p); +} + +int x86_jmp_forward( struct x86_function *p) +{ + DUMP(); + emit_1ub(p, 0xe9); + emit_1i(p, 0); + return x86_get_label(p); +} + +int x86_call_forward( struct x86_function *p) +{ + DUMP(); + + emit_1ub(p, 0xe8); + emit_1i(p, 0); + return x86_get_label(p); +} + +/* Fixup offset from forward jump: + */ +void x86_fixup_fwd_jump( struct x86_function *p, + int fixup ) +{ + *(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup; +} + +void x86_jmp( struct x86_function *p, int label) +{ + DUMP_I( label ); + emit_1ub(p, 0xe9); + emit_1i(p, label - x86_get_label(p) - 4); +} + +void x86_call( struct x86_function *p, struct x86_reg reg) +{ + DUMP_R( reg ); + emit_1ub(p, 0xff); + emit_modrm_noreg(p, 2, reg); +} + + +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + assert(dst.file == file_REG32); + assert(dst.mod == mod_REG); + emit_1ub(p, 0xb8 + dst.idx); + emit_1i(p, imm); +} + +/** + * Immediate group 1 instructions. + */ +static INLINE void +x86_group1_imm( struct x86_function *p, + unsigned op, struct x86_reg dst, int imm ) +{ + assert(dst.file == file_REG32); + assert(dst.mod == mod_REG); + if(-0x80 <= imm && imm < 0x80) { + emit_1ub(p, 0x83); + emit_modrm_noreg(p, op, dst); + emit_1b(p, (char)imm); + } + else { + emit_1ub(p, 0x81); + emit_modrm_noreg(p, op, dst); + emit_1i(p, imm); + } +} + +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 0, dst, imm); +} + +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 1, dst, imm); +} + +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 4, dst, imm); +} + +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 5, dst, imm); +} + +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 6, dst, imm); +} + +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 7, dst, imm); +} + + +void x86_push( struct x86_function *p, + struct x86_reg reg ) +{ + DUMP_R( reg ); + if (reg.mod == mod_REG) + emit_1ub(p, 0x50 + reg.idx); + else + { + emit_1ub(p, 0xff); + emit_modrm_noreg(p, 6, reg); + } + + + p->stack_offset += 4; +} + +void x86_push_imm32( struct x86_function *p, + int imm32 ) +{ + DUMP_I( imm32 ); + emit_1ub(p, 0x68); + emit_1i(p, imm32); + + p->stack_offset += 4; +} + + +void x86_pop( struct x86_function *p, + struct x86_reg reg ) +{ + DUMP_R( reg ); + assert(reg.mod == mod_REG); + emit_1ub(p, 0x58 + reg.idx); + p->stack_offset -= 4; +} + +void x86_inc( struct x86_function *p, + struct x86_reg reg ) +{ + DUMP_R( reg ); + assert(reg.mod == mod_REG); + emit_1ub(p, 0x40 + reg.idx); +} + +void x86_dec( struct x86_function *p, + struct x86_reg reg ) +{ + DUMP_R( reg ); + assert(reg.mod == mod_REG); + emit_1ub(p, 0x48 + reg.idx); +} + +void x86_ret( struct x86_function *p ) +{ + DUMP(); + assert(p->stack_offset == 0); + emit_1ub(p, 0xc3); +} + +void x86_retw( struct x86_function *p, unsigned short imm ) +{ + DUMP(); + emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff); +} + +void x86_sahf( struct x86_function *p ) +{ + DUMP(); + emit_1ub(p, 0x9e); +} + +void x86_mov( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_op_modrm( p, 0x8b, 0x89, dst, src ); +} + +void x86_xor( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_op_modrm( p, 0x33, 0x31, dst, src ); +} + +void x86_cmp( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_op_modrm( p, 0x3b, 0x39, dst, src ); +} + +void x86_lea( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_1ub(p, 0x8d); + emit_modrm( p, dst, src ); +} + +void x86_test( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_1ub(p, 0x85); + emit_modrm( p, dst, src ); +} + +void x86_add( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_op_modrm(p, 0x03, 0x01, dst, src ); +} + +/* Calculate EAX * src, results in EDX:EAX. + */ +void x86_mul( struct x86_function *p, + struct x86_reg src ) +{ + DUMP_R( src ); + emit_1ub(p, 0xf7); + emit_modrm_noreg(p, 4, src ); +} + + +void x86_imul( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0xAF); + emit_modrm(p, dst, src); +} + + +void x86_sub( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_op_modrm(p, 0x2b, 0x29, dst, src ); +} + +void x86_or( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_op_modrm( p, 0x0b, 0x09, dst, src ); +} + +void x86_and( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_op_modrm( p, 0x23, 0x21, dst, src ); +} + + + +/*********************************************************************** + * SSE instructions + */ + +void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 0, ptr); +} + +void sse_prefetch0( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 1, ptr); +} + +void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 2, ptr); +} + +void sse_movntps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src) +{ + DUMP_RR( dst, src ); + + assert(dst.mod != mod_REG); + assert(src.mod == mod_REG); + emit_2ub(p, 0x0f, 0x2b); + emit_modrm(p, src, dst); +} + + + + +void sse_movss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, 0xF3, X86_TWOB); + emit_op_modrm( p, 0x10, 0x11, dst, src ); +} + +void sse_movaps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x28, 0x29, dst, src ); +} + +void sse_movups( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x10, 0x11, dst, src ); +} + +void sse_movhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + assert(dst.mod != mod_REG || src.mod != mod_REG); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ +} + +void sse_movlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + assert(dst.mod != mod_REG || src.mod != mod_REG); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ +} + +void sse_maxps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + +void sse_maxss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0xF3, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + +void sse_divss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0xF3, X86_TWOB, 0x5E); + emit_modrm( p, dst, src ); +} + +void sse_minps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x5D); + emit_modrm( p, dst, src ); +} + +void sse_subps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x5C); + emit_modrm( p, dst, src ); +} + +void sse_mulps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + +void sse_mulss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0xF3, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + +void sse_addps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x58); + emit_modrm( p, dst, src ); +} + +void sse_addss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0xF3, X86_TWOB, 0x58); + emit_modrm( p, dst, src ); +} + +void sse_andnps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x55); + emit_modrm( p, dst, src ); +} + +void sse_andps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x54); + emit_modrm( p, dst, src ); +} + +void sse_rsqrtps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); +} + +void sse_rsqrtss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0xF3, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); + +} + +void sse_movhlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + assert(dst.mod == mod_REG && src.mod == mod_REG); + emit_2ub(p, X86_TWOB, 0x12); + emit_modrm( p, dst, src ); +} + +void sse_movlhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + assert(dst.mod == mod_REG && src.mod == mod_REG); + emit_2ub(p, X86_TWOB, 0x16); + emit_modrm( p, dst, src ); +} + +void sse_orps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x56); + emit_modrm( p, dst, src ); +} + +void sse_xorps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x57); + emit_modrm( p, dst, src ); +} + +void sse_cvtps2pi( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + assert(dst.file == file_MMX && + (src.file == file_XMM || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x2d); + emit_modrm( p, dst, src ); +} + +void sse2_cvtdq2ps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x5b); + emit_modrm( p, dst, src ); +} + + +/* Shufps can also be used to implement a reduced swizzle when dest == + * arg0. + */ +void sse_shufps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src, + unsigned char shuf) +{ + DUMP_RRI( dst, src, shuf ); + emit_2ub(p, X86_TWOB, 0xC6); + emit_modrm(p, dst, src); + emit_1ub(p, shuf); +} + +void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub( p, X86_TWOB, 0x15 ); + emit_modrm( p, dst, src ); +} + +void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub( p, X86_TWOB, 0x14 ); + emit_modrm( p, dst, src ); +} + +void sse_cmpps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src, + enum sse_cc cc) +{ + DUMP_RRI( dst, src, cc ); + emit_2ub(p, X86_TWOB, 0xC2); + emit_modrm(p, dst, src); + emit_1ub(p, cc); +} + +void sse_pmovmskb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0xD7); + emit_modrm(p, dst, src); +} + +/*********************************************************************** + * SSE2 instructions + */ + +/** + * Perform a reduced swizzle: + */ +void sse2_pshufd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src, + unsigned char shuf) +{ + DUMP_RRI( dst, src, shuf ); + emit_3ub(p, 0x66, X86_TWOB, 0x70); + emit_modrm(p, dst, src); + emit_1ub(p, shuf); +} + +void sse2_cvttps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); + emit_modrm( p, dst, src ); +} + +void sse2_cvtps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0x5B); + emit_modrm( p, dst, src ); +} + +void sse2_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0x6B); + emit_modrm( p, dst, src ); +} + +void sse2_packsswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0x63); + emit_modrm( p, dst, src ); +} + +void sse2_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0x67); + emit_modrm( p, dst, src ); +} + +void sse2_punpcklbw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0x60); + emit_modrm( p, dst, src ); +} + + +void sse2_rcpps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + +void sse2_rcpss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_3ub(p, 0xF3, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + +void sse2_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub(p, 0x66, X86_TWOB); + emit_op_modrm( p, 0x6e, 0x7e, dst, src ); +} + + + + +/*********************************************************************** + * x87 instructions + */ +static void note_x87_pop( struct x86_function *p ) +{ + p->x87_stack--; + assert(p->x87_stack >= 0); +} + +static void note_x87_push( struct x86_function *p ) +{ + p->x87_stack++; + assert(p->x87_stack <= 7); +} + +void x87_assert_stack_empty( struct x86_function *p ) +{ + assert (p->x87_stack == 0); +} + + +void x87_fist( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + emit_1ub(p, 0xdb); + emit_modrm_noreg(p, 2, dst); +} + +void x87_fistp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + emit_1ub(p, 0xdb); + emit_modrm_noreg(p, 3, dst); + note_x87_pop(p); +} + +void x87_fild( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + emit_1ub(p, 0xdf); + emit_modrm_noreg(p, 0, arg); + note_x87_push(p); +} + +void x87_fldz( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xee); + note_x87_push(p); +} + + +void x87_fldcw( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_REG32); + assert(arg.mod != mod_REG); + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 5, arg); +} + +void x87_fld1( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xe8); + note_x87_push(p); +} + +void x87_fldl2e( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xea); + note_x87_push(p); +} + +void x87_fldln2( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xed); + note_x87_push(p); +} + +void x87_fwait( struct x86_function *p ) +{ + DUMP(); + emit_1ub(p, 0x9b); +} + +void x87_fnclex( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xdb, 0xe2); +} + +void x87_fclex( struct x86_function *p ) +{ + x87_fwait(p); + x87_fnclex(p); +} + +void x87_fcmovb( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xc0+arg.idx); +} + +void x87_fcmove( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xc8+arg.idx); +} + +void x87_fcmovbe( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xd0+arg.idx); +} + +void x87_fcmovnb( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xc0+arg.idx); +} + +void x87_fcmovne( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xc8+arg.idx); +} + +void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xd0+arg.idx); +} + + + +static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, + unsigned char dst0ub0, + unsigned char dst0ub1, + unsigned char arg0ub0, + unsigned char arg0ub1, + unsigned char argmem_noreg) +{ + assert(dst.file == file_x87); + + if (arg.file == file_x87) { + if (dst.idx == 0) + emit_2ub(p, dst0ub0, dst0ub1+arg.idx); + else if (arg.idx == 0) + emit_2ub(p, arg0ub0, arg0ub1+arg.idx); + else + assert(0); + } + else if (dst.idx == 0) { + assert(arg.file == file_REG32); + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, argmem_noreg, arg); + } + else + assert(0); +} + +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, + 0xd8, 0xc8, + 0xdc, 0xc8, + 4); +} + +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, + 0xd8, 0xe0, + 0xdc, 0xe8, + 4); +} + +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, + 0xd8, 0xe8, + 0xdc, 0xe0, + 5); +} + +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, + 0xd8, 0xc0, + 0xdc, 0xc0, + 0); +} + +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, + 0xd8, 0xf0, + 0xdc, 0xf8, + 6); +} + +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + x87_arith_op(p, dst, src, + 0xd8, 0xf8, + 0xdc, 0xf0, + 7); +} + +void x87_fmulp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xc8+dst.idx); + note_x87_pop(p); +} + +void x87_fsubp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xe8+dst.idx); + note_x87_pop(p); +} + +void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xe0+dst.idx); + note_x87_pop(p); +} + +void x87_faddp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xc0+dst.idx); + note_x87_pop(p); +} + +void x87_fdivp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xf8+dst.idx); + note_x87_pop(p); +} + +void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xf0+dst.idx); + note_x87_pop(p); +} + +void x87_ftst( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xe4); +} + +void x87_fucom( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdd, 0xe0+arg.idx); +} + +void x87_fucomp( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdd, 0xe8+arg.idx); + note_x87_pop(p); +} + +void x87_fucompp( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xda, 0xe9); + note_x87_pop(p); /* pop twice */ + note_x87_pop(p); /* pop twice */ +} + +void x87_fxch( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xd9, 0xc8+arg.idx); +} + +void x87_fabs( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xe1); +} + +void x87_fchs( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xe0); +} + +void x87_fcos( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xff); +} + + +void x87_fprndint( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xfc); +} + +void x87_fscale( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xfd); +} + +void x87_fsin( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xfe); +} + +void x87_fsincos( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xfb); +} + +void x87_fsqrt( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xfa); +} + +void x87_fxtract( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xf4); +} + +/* st0 = (2^st0)-1 + * + * Restrictions: -1.0 <= st0 <= 1.0 + */ +void x87_f2xm1( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xf0); +} + +/* st1 = st1 * log2(st0); + * pop_stack; + */ +void x87_fyl2x( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xf1); + note_x87_pop(p); +} + +/* st1 = st1 * log2(st0 + 1.0); + * pop_stack; + * + * A fast operation, with restrictions: -.29 < st0 < .29 + */ +void x87_fyl2xp1( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xf9); + note_x87_pop(p); +} + + +void x87_fld( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + if (arg.file == file_x87) + emit_2ub(p, 0xd9, 0xc0 + arg.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 0, arg); + } + note_x87_push(p); +} + +void x87_fst( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + if (dst.file == file_x87) + emit_2ub(p, 0xdd, 0xd0 + dst.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 2, dst); + } +} + +void x87_fstp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + if (dst.file == file_x87) + emit_2ub(p, 0xdd, 0xd8 + dst.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 3, dst); + } + note_x87_pop(p); +} + +void x87_fpop( struct x86_function *p ) +{ + x87_fstp( p, x86_make_reg( file_x87, 0 )); +} + + +void x87_fcom( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + if (dst.file == file_x87) + emit_2ub(p, 0xd8, 0xd0 + dst.idx); + else { + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, 2, dst); + } +} + + +void x87_fcomp( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + if (dst.file == file_x87) + emit_2ub(p, 0xd8, 0xd8 + dst.idx); + else { + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, 3, dst); + } + note_x87_pop(p); +} + +void x87_fcomi( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + emit_2ub(p, 0xdb, 0xf0+arg.idx); +} + +void x87_fcomip( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + emit_2ub(p, 0xdb, 0xf0+arg.idx); + note_x87_pop(p); +} + + +void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_REG32); + + if (dst.idx == reg_AX && + dst.mod == mod_REG) + emit_2ub(p, 0xdf, 0xe0); + else { + emit_1ub(p, 0xdd); + emit_modrm_noreg(p, 7, dst); + } +} + + +void x87_fnstcw( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_REG32); + + emit_1ub(p, 0x9b); /* WAIT -- needed? */ + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 7, dst); +} + + + + +/*********************************************************************** + * MMX instructions + */ + +void mmx_emms( struct x86_function *p ) +{ + DUMP(); + assert(p->need_emms); + emit_2ub(p, 0x0f, 0x77); + p->need_emms = 0; +} + +void mmx_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + assert(dst.file == file_MMX && + (src.file == file_MMX || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x6b); + emit_modrm( p, dst, src ); +} + +void mmx_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + assert(dst.file == file_MMX && + (src.file == file_MMX || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x67); + emit_modrm( p, dst, src ); +} + +void mmx_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + p->need_emms = 1; + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x6e, 0x7e, dst, src ); +} + +void mmx_movq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( dst, src ); + p->need_emms = 1; + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x6f, 0x7f, dst, src ); +} + + +/*********************************************************************** + * Helper functions + */ + + +void x86_cdecl_caller_push_regs( struct x86_function *p ) +{ + x86_push(p, x86_make_reg(file_REG32, reg_AX)); + x86_push(p, x86_make_reg(file_REG32, reg_CX)); + x86_push(p, x86_make_reg(file_REG32, reg_DX)); +} + +void x86_cdecl_caller_pop_regs( struct x86_function *p ) +{ + x86_pop(p, x86_make_reg(file_REG32, reg_DX)); + x86_pop(p, x86_make_reg(file_REG32, reg_CX)); + x86_pop(p, x86_make_reg(file_REG32, reg_AX)); +} + + +/* Retreive a reference to one of the function arguments, taking into + * account any push/pop activity: + */ +struct x86_reg x86_fn_arg( struct x86_function *p, + unsigned arg ) +{ + return x86_make_disp(x86_make_reg(file_REG32, reg_SP), + p->stack_offset + arg * 4); /* ??? */ +} + + +void x86_init_func( struct x86_function *p ) +{ + p->size = 0; + p->store = NULL; + p->csr = p->store; + DUMP_START(); +} + +void x86_init_func_size( struct x86_function *p, unsigned code_size ) +{ + p->size = code_size; + p->store = rtasm_exec_malloc(code_size); + if (p->store == NULL) { + p->store = p->error_overflow; + } + p->csr = p->store; + DUMP_START(); +} + +void x86_release_func( struct x86_function *p ) +{ + if (p->store && p->store != p->error_overflow) + rtasm_exec_free(p->store); + + p->store = NULL; + p->csr = NULL; + p->size = 0; +} + + +void (*x86_get_func( struct x86_function *p ))(void) +{ + DUMP_END(); + if (DISASSEM && p->store) + debug_printf("disassemble %p %p\n", p->store, p->csr); + + if (p->store == p->error_overflow) + return (void (*)(void)) NULL; + else + return (void (*)(void)) p->store; +} + +#else + +void x86sse_dummy( void ) +{ +} + +#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h new file mode 100644 index 0000000000..1b5eaaca85 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -0,0 +1,319 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _RTASM_X86SSE_H_ +#define _RTASM_X86SSE_H_ + +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_X86) + +/* It is up to the caller to ensure that instructions issued are + * suitable for the host cpu. There are no checks made in this module + * for mmx/sse/sse2 support on the cpu. + */ +struct x86_reg { + unsigned file:3; + unsigned idx:3; + unsigned mod:2; /* mod_REG if this is just a register */ + int disp:24; /* only +/- 23bits of offset - should be enough... */ +}; + +struct x86_function { + unsigned size; + unsigned char *store; + unsigned char *csr; + + unsigned stack_offset:16; + unsigned need_emms:8; + int x87_stack:8; + + unsigned char error_overflow[4]; +}; + +enum x86_reg_file { + file_REG32, + file_MMX, + file_XMM, + file_x87 +}; + +/* Values for mod field of modr/m byte + */ +enum x86_reg_mod { + mod_INDIRECT, + mod_DISP8, + mod_DISP32, + mod_REG +}; + +enum x86_reg_name { + reg_AX, + reg_CX, + reg_DX, + reg_BX, + reg_SP, + reg_BP, + reg_SI, + reg_DI +}; + + +enum x86_cc { + cc_O, /* overflow */ + cc_NO, /* not overflow */ + cc_NAE, /* not above or equal / carry */ + cc_AE, /* above or equal / not carry */ + cc_E, /* equal / zero */ + cc_NE /* not equal / not zero */ +}; + +enum sse_cc { + cc_Equal, + cc_LessThan, + cc_LessThanEqual, + cc_Unordered, + cc_NotEqual, + cc_NotLessThan, + cc_NotLessThanEqual, + cc_Ordered +}; + +#define cc_Z cc_E +#define cc_NZ cc_NE + +/* Begin/end/retreive function creation: + */ + + +void x86_init_func( struct x86_function *p ); +void x86_init_func_size( struct x86_function *p, unsigned code_size ); +void x86_release_func( struct x86_function *p ); +void (*x86_get_func( struct x86_function *p ))( void ); + +/* Debugging: + */ +void x86_print_reg( struct x86_reg reg ); + + +/* Create and manipulate registers and regmem values: + */ +struct x86_reg x86_make_reg( enum x86_reg_file file, + enum x86_reg_name idx ); + +struct x86_reg x86_make_disp( struct x86_reg reg, + int disp ); + +struct x86_reg x86_deref( struct x86_reg reg ); + +struct x86_reg x86_get_base_reg( struct x86_reg reg ); + + +/* Labels, jumps and fixup: + */ +int x86_get_label( struct x86_function *p ); + +void x86_jcc( struct x86_function *p, + enum x86_cc cc, + int label ); + +int x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ); + +int x86_jmp_forward( struct x86_function *p); + +int x86_call_forward( struct x86_function *p); + +void x86_fixup_fwd_jump( struct x86_function *p, + int fixup ); + +void x86_jmp( struct x86_function *p, int label ); + +/* void x86_call( struct x86_function *p, void (*label)() ); */ +void x86_call( struct x86_function *p, struct x86_reg reg); + +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ); + + +/* Macro for sse_shufps() and sse2_pshufd(): + */ +#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6)) +#define SHUF_NOOP RSW(0,1,2,3) +#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +void mmx_emms( struct x86_function *p ); +void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + + +void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr); +void sse_prefetch0( struct x86_function *p, struct x86_reg ptr); +void sse_prefetch1( struct x86_function *p, struct x86_reg ptr); + +void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); + +void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, + enum sse_cc cc ); +void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); +void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_dec( struct x86_function *p, struct x86_reg reg ); +void x86_inc( struct x86_function *p, struct x86_reg reg ); +void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_mul( struct x86_function *p, struct x86_reg src ); +void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_pop( struct x86_function *p, struct x86_reg reg ); +void x86_push( struct x86_function *p, struct x86_reg reg ); +void x86_push_imm32( struct x86_function *p, int imm ); +void x86_ret( struct x86_function *p ); +void x86_retw( struct x86_function *p, unsigned short imm ); +void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_sahf( struct x86_function *p ); + + +void x86_cdecl_caller_push_regs( struct x86_function *p ); +void x86_cdecl_caller_pop_regs( struct x86_function *p ); + +void x87_assert_stack_empty( struct x86_function *p ); + +void x87_f2xm1( struct x86_function *p ); +void x87_fabs( struct x86_function *p ); +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_faddp( struct x86_function *p, struct x86_reg dst ); +void x87_fchs( struct x86_function *p ); +void x87_fclex( struct x86_function *p ); +void x87_fcmovb( struct x86_function *p, struct x86_reg src ); +void x87_fcmovbe( struct x86_function *p, struct x86_reg src ); +void x87_fcmove( struct x86_function *p, struct x86_reg src ); +void x87_fcmovnb( struct x86_function *p, struct x86_reg src ); +void x87_fcmovnbe( struct x86_function *p, struct x86_reg src ); +void x87_fcmovne( struct x86_function *p, struct x86_reg src ); +void x87_fcom( struct x86_function *p, struct x86_reg dst ); +void x87_fcomi( struct x86_function *p, struct x86_reg dst ); +void x87_fcomip( struct x86_function *p, struct x86_reg dst ); +void x87_fcomp( struct x86_function *p, struct x86_reg dst ); +void x87_fcos( struct x86_function *p ); +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fdivp( struct x86_function *p, struct x86_reg dst ); +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fdivrp( struct x86_function *p, struct x86_reg dst ); +void x87_fild( struct x86_function *p, struct x86_reg arg ); +void x87_fist( struct x86_function *p, struct x86_reg dst ); +void x87_fistp( struct x86_function *p, struct x86_reg dst ); +void x87_fld( struct x86_function *p, struct x86_reg arg ); +void x87_fld1( struct x86_function *p ); +void x87_fldcw( struct x86_function *p, struct x86_reg arg ); +void x87_fldl2e( struct x86_function *p ); +void x87_fldln2( struct x86_function *p ); +void x87_fldz( struct x86_function *p ); +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fmulp( struct x86_function *p, struct x86_reg dst ); +void x87_fnclex( struct x86_function *p ); +void x87_fprndint( struct x86_function *p ); +void x87_fpop( struct x86_function *p ); +void x87_fscale( struct x86_function *p ); +void x87_fsin( struct x86_function *p ); +void x87_fsincos( struct x86_function *p ); +void x87_fsqrt( struct x86_function *p ); +void x87_fst( struct x86_function *p, struct x86_reg dst ); +void x87_fstp( struct x86_function *p, struct x86_reg dst ); +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fsubp( struct x86_function *p, struct x86_reg dst ); +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); +void x87_ftst( struct x86_function *p ); +void x87_fxch( struct x86_function *p, struct x86_reg dst ); +void x87_fxtract( struct x86_function *p ); +void x87_fyl2x( struct x86_function *p ); +void x87_fyl2xp1( struct x86_function *p ); +void x87_fwait( struct x86_function *p ); +void x87_fnstcw( struct x86_function *p, struct x86_reg dst ); +void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); +void x87_fucompp( struct x86_function *p ); +void x87_fucomp( struct x86_function *p, struct x86_reg arg ); +void x87_fucom( struct x86_function *p, struct x86_reg arg ); + + + +/* Retreive a reference to one of the function arguments, taking into + * account any push/pop activity. Note - doesn't track explict + * manipulation of ESP by other instructions. + */ +struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); + +#endif +#endif diff --git a/src/gallium/auxiliary/sct/Makefile b/src/gallium/auxiliary/sct/Makefile new file mode 100644 index 0000000000..516d1756cf --- /dev/null +++ b/src/gallium/auxiliary/sct/Makefile @@ -0,0 +1,12 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = sct + +C_SOURCES = \ + sct.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/sct/SConscript b/src/gallium/auxiliary/sct/SConscript new file mode 100644 index 0000000000..76927d973f --- /dev/null +++ b/src/gallium/auxiliary/sct/SConscript @@ -0,0 +1,9 @@ +Import('*') + +sct = env.ConvenienceLibrary( + target = 'sct', + source = [ + 'sct.c' + ]) + +auxiliaries.insert(0, sct) diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c new file mode 100644 index 0000000000..49bb7ea92e --- /dev/null +++ b/src/gallium/auxiliary/sct/sct.c @@ -0,0 +1,454 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" +#include "sct.h" + + +struct texture_list +{ + struct pipe_texture *texture; + struct texture_list *next; +}; + + + +#define MAX_SURFACES ((PIPE_MAX_COLOR_BUFS) + 1) + +struct sct_context +{ + const struct pipe_context *context; + + /** surfaces the context is drawing into */ + struct pipe_surface *surfaces[MAX_SURFACES]; + + /** currently bound textures */ + struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; + + /** previously bound textures, used but not flushed */ + struct texture_list *textures_used; + + boolean needs_flush; + + struct sct_context *next; +}; + + + +struct sct_surface +{ + const struct pipe_surface *surface; + + /** list of contexts drawing to this surface */ + struct sct_context_list *contexts; + + struct sct_surface *next; +}; + + + +/** + * Find the surface_info for the given pipe_surface + */ +static struct sct_surface * +find_surface_info(struct surface_context_tracker *sct, + const struct pipe_surface *surface) +{ + struct sct_surface *si; + for (si = sct->surfaces; si; si = si->next) + if (si->surface == surface) + return si; + return NULL; +} + + +/** + * As above, but create new surface_info if surface is new. + */ +static struct sct_surface * +find_create_surface_info(struct surface_context_tracker *sct, + const struct pipe_surface *surface) +{ + struct sct_surface *si = find_surface_info(sct, surface); + if (si) + return si; + + /* alloc new */ + si = CALLOC_STRUCT(sct_surface); + if (si) { + si->surface = surface; + + /* insert at head */ + si->next = sct->surfaces; + sct->surfaces = si; + } + + return si; +} + + +/** + * Find a context_info for the given context. + */ +static struct sct_context * +find_context_info(struct surface_context_tracker *sct, + const struct pipe_context *context) +{ + struct sct_context *ci; + for (ci = sct->contexts; ci; ci = ci->next) + if (ci->context == context) + return ci; + return NULL; +} + + +/** + * As above, but create new context_info if context is new. + */ +static struct sct_context * +find_create_context_info(struct surface_context_tracker *sct, + const struct pipe_context *context) +{ + struct sct_context *ci = find_context_info(sct, context); + if (ci) + return ci; + + /* alloc new */ + ci = CALLOC_STRUCT(sct_context); + if (ci) { + ci->context = context; + + /* insert at head */ + ci->next = sct->contexts; + sct->contexts = ci; + } + + return ci; +} + + +/** + * Is the context already bound to the surface? + */ +static boolean +find_surface_context(const struct sct_surface *si, + const struct pipe_context *context) +{ + const struct sct_context_list *cl; + for (cl = si->contexts; cl; cl = cl->next) { + if (cl->context == context) { + return TRUE; + } + } + return FALSE; +} + + +/** + * Add a context to the list of contexts associated with a surface. + */ +static void +add_context_to_surface(struct sct_surface *si, + const struct pipe_context *context) +{ + struct sct_context_list *cl = CALLOC_STRUCT(sct_context_list); + if (cl) { + cl->context = context; + /* insert at head of list of contexts */ + cl->next = si->contexts; + si->contexts = cl; + } +} + + +/** + * Remove a context from the list of contexts associated with a surface. + */ +static void +remove_context_from_surface(struct sct_surface *si, + const struct pipe_context *context) +{ + struct sct_context_list *prev = NULL, *curr, *next; + + for (curr = si->contexts; curr; curr = next) { + if (curr->context == context) { + /* remove */ + if (prev) + prev->next = curr->next; + else + si->contexts = curr->next; + next = curr->next; + FREE(curr); + } + else { + prev = curr; + next = curr->next; + } + } +} + + +/** + * Unbind context from surface. + */ +static void +unbind_context_surface(struct surface_context_tracker *sct, + struct pipe_context *context, + struct pipe_surface *surface) +{ + struct sct_surface *si = find_surface_info(sct, surface); + if (si) { + remove_context_from_surface(si, context); + } +} + + +/** + * Bind context to a set of surfaces (color + Z). + * Like MakeCurrent(). + */ +void +sct_bind_surfaces(struct surface_context_tracker *sct, + struct pipe_context *context, + uint num_surf, + struct pipe_surface **surfaces) +{ + struct sct_context *ci = find_create_context_info(sct, context); + uint i; + + if (!ci) { + return; /* out of memory */ + } + + /* unbind currently bound surfaces */ + for (i = 0; i < MAX_SURFACES; i++) { + if (ci->surfaces[i]) { + unbind_context_surface(sct, context, ci->surfaces[i]); + } + } + + /* bind new surfaces */ + for (i = 0; i < num_surf; i++) { + struct sct_surface *si = find_create_surface_info(sct, surfaces[i]); + if (!find_surface_context(si, context)) { + add_context_to_surface(si, context); + } + } +} + + +/** + * Return list of contexts bound to a surface. + */ +const struct sct_context_list * +sct_get_surface_contexts(struct surface_context_tracker *sct, + const struct pipe_surface *surface) +{ + const struct sct_surface *si = find_surface_info(sct, surface); + return si->contexts; +} + + + +static boolean +find_texture(const struct sct_context *ci, + const struct pipe_texture *texture) +{ + const struct texture_list *tl; + + for (tl = ci->textures_used; tl; tl = tl->next) { + if (tl->texture == texture) { + return TRUE; + } + } + return FALSE; +} + + +/** + * Add the given texture to the context's list of used textures. + */ +static void +add_texture_used(struct sct_context *ci, + struct pipe_texture *texture) +{ + if (!find_texture(ci, texture)) { + /* add to list */ + struct texture_list *tl = CALLOC_STRUCT(texture_list); + if (tl) { + pipe_texture_reference(&tl->texture, texture); + /* insert at head */ + tl->next = ci->textures_used; + ci->textures_used = tl; + } + } +} + + +/** + * Bind a texture to a rendering context. + */ +void +sct_bind_texture(struct surface_context_tracker *sct, + struct pipe_context *context, + uint unit, + struct pipe_texture *tex) +{ + struct sct_context *ci = find_context_info(sct, context); + + if (ci->textures[unit] != tex) { + /* put texture on the 'used' list */ + add_texture_used(ci, tex); + /* bind new */ + pipe_texture_reference(&ci->textures[unit], tex); + } +} + + +/** + * Check if the given texture has been used by the rendering context + * since the last call to sct_flush_textures(). + */ +boolean +sct_is_texture_used(struct surface_context_tracker *sct, + const struct pipe_context *context, + const struct pipe_texture *texture) +{ + const struct sct_context *ci = find_context_info(sct, context); + return find_texture(ci, texture); +} + + +/** + * To be called when the image contents of a texture are changed, such + * as for gl[Copy]TexSubImage(). + * XXX this may not be needed + */ +void +sct_update_texture(struct pipe_texture *tex) +{ + +} + + +/** + * When a scene is flushed/rendered we can release the list of + * used textures. + */ +void +sct_flush_textures(struct surface_context_tracker *sct, + struct pipe_context *context) +{ + struct sct_context *ci = find_context_info(sct, context); + struct texture_list *tl, *next; + uint i; + + for (tl = ci->textures_used; tl; tl = next) { + next = tl->next; + pipe_texture_release(&tl->texture); + FREE(tl); + } + ci->textures_used = NULL; + + /* put the currently bound textures on the 'used' list */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + add_texture_used(ci, ci->textures[i]); + } +} + + + +void +sct_destroy_context(struct surface_context_tracker *sct, + struct pipe_context *context) +{ + /* XXX should we require an unbinding first? */ + { + struct sct_surface *si; + for (si = sct->surfaces; si; si = si->next) { + remove_context_from_surface(si, context); + } + } + + /* remove context from context_info list */ + { + struct sct_context *ci, *next, *prev = NULL; + for (ci = sct->contexts; ci; ci = next) { + next = ci->next; + if (ci->context == context) { + if (prev) + prev->next = ci->next; + else + sct->contexts = ci->next; + FREE(ci); + } + else { + prev = ci; + } + } + } + +} + + +void +sct_destroy_surface(struct surface_context_tracker *sct, + struct pipe_surface *surface) +{ + if (1) { + /* debug/sanity: no context should be bound to surface */ + struct sct_context *ci; + uint i; + for (ci = sct->contexts; ci; ci = ci->next) { + for (i = 0; i < MAX_SURFACES; i++) { + assert(ci->surfaces[i] != surface); + } + } + } + + /* remove surface from sct_surface list */ + { + struct sct_surface *si, *next, *prev = NULL; + for (si = sct->surfaces; si; si = next) { + next = si->next; + if (si->surface == surface) { + /* unlink */ + if (prev) + prev->next = si->next; + else + sct->surfaces = si->next; + FREE(si); + } + else { + prev = si; + } + } + } +} diff --git a/src/gallium/auxiliary/sct/sct.h b/src/gallium/auxiliary/sct/sct.h new file mode 100644 index 0000000000..cf7c4d3bdf --- /dev/null +++ b/src/gallium/auxiliary/sct/sct.h @@ -0,0 +1,123 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Surface/Context Tracking + * + * For some drivers, we need to monitor the binding between contexts and + * surfaces/textures. + * This code may evolve quite a bit... + */ + + +#ifndef SCT_H +#define SCT_H + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pipe_context; +struct pipe_surface; + +struct sct_context; +struct sct_surface; + + +/** + * Per-device info, basically + */ +struct surface_context_tracker +{ + struct sct_context *contexts; + struct sct_surface *surfaces; +}; + + + +/** + * Simple linked list of contexts + */ +struct sct_context_list +{ + const struct pipe_context *context; + struct sct_context_list *next; +}; + + + +extern void +sct_bind_surfaces(struct surface_context_tracker *sct, + struct pipe_context *context, + uint num_surf, + struct pipe_surface **surfaces); + + +extern void +sct_bind_texture(struct surface_context_tracker *sct, + struct pipe_context *context, + uint unit, + struct pipe_texture *texture); + + +extern void +sct_update_texture(struct pipe_texture *tex); + + +extern boolean +sct_is_texture_used(struct surface_context_tracker *sct, + const struct pipe_context *context, + const struct pipe_texture *texture); + +extern void +sct_flush_textures(struct surface_context_tracker *sct, + struct pipe_context *context); + + +extern const struct sct_context_list * +sct_get_surface_contexts(struct surface_context_tracker *sct, + const struct pipe_surface *surf); + + +extern void +sct_destroy_context(struct surface_context_tracker *sct, + struct pipe_context *context); + + +extern void +sct_destroy_surface(struct surface_context_tracker *sct, + struct pipe_surface *surface); + + + +#ifdef __cplusplus +} +#endif + +#endif /* SCT_H */ diff --git a/src/gallium/auxiliary/sct/usage.c b/src/gallium/auxiliary/sct/usage.c new file mode 100644 index 0000000000..6227f19962 --- /dev/null +++ b/src/gallium/auxiliary/sct/usage.c @@ -0,0 +1,61 @@ +/* surface / context tracking */ + + +/* + +context A: + render to texture T + +context B: + texture from T + +----------------------- + +flush surface: + which contexts are bound to the surface? + +----------------------- + +glTexSubImage(): + which contexts need to be flushed? + + */ + + +/* + +in MakeCurrent(): + + call sct_bind_surfaces(context, list of surfaces) to update the + dependencies between context and surfaces + + +in SurfaceFlush(), or whatever it is in D3D: + + call sct_get_surface_contexts(surface) to get a list of contexts + which are currently bound to the surface. + + + +in BindTexture(): + + call sct_bind_texture(context, texture) to indicate that the texture + is used in the scene. + + +in glTexSubImage() or RenderToTexture(): + + call sct_is_texture_used(context, texture) to determine if the texture + has been used in the scene, but the scene's not flushed. If TRUE is + returned it means the scene has to be rendered/flushed before the contents + of the texture can be changed. + + +in psb_scene_flush/terminate(): + + call sct_flush_textures(context) to tell the SCT that the textures which + were used in the scene can be released. + + + +*/ diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile new file mode 100644 index 0000000000..d7df9490cf --- /dev/null +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -0,0 +1,24 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = tgsi + +C_SOURCES = \ + tgsi_sanity.c \ + tgsi_build.c \ + tgsi_dump.c \ + tgsi_exec.c \ + tgsi_info.c \ + tgsi_iterate.c \ + tgsi_parse.c \ + tgsi_ppc.c \ + tgsi_scan.c \ + tgsi_sse2.c \ + tgsi_text.c \ + tgsi_transform.c \ + tgsi_util.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript new file mode 100644 index 0000000000..8200cce42f --- /dev/null +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -0,0 +1,22 @@ +Import('*') + +tgsi = env.ConvenienceLibrary( + target = 'tgsi', + source = [ + 'tgsi_build.c', + 'tgsi_dump.c', + 'tgsi_dump_c.c', + 'tgsi_exec.c', + 'tgsi_info.c', + 'tgsi_iterate.c', + 'tgsi_parse.c', + 'tgsi_sanity.c', + 'tgsi_scan.c', + 'tgsi_ppc.c', + 'tgsi_sse2.c', + 'tgsi_text.c', + 'tgsi_transform.c', + 'tgsi_util.c', + ]) + +auxiliaries.insert(0, tgsi) diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c new file mode 100644 index 0000000000..38fcaf8829 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -0,0 +1,1327 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi_build.h" +#include "tgsi_parse.h" + +/* + * version + */ + +struct tgsi_version +tgsi_build_version( void ) +{ + struct tgsi_version version; + + version.MajorVersion = 1; + version.MinorVersion = 1; + version.Padding = 0; + + return version; +} + +/* + * header + */ + +struct tgsi_header +tgsi_build_header( void ) +{ + struct tgsi_header header; + + header.HeaderSize = 1; + header.BodySize = 0; + + return header; +} + +static void +header_headersize_grow( struct tgsi_header *header ) +{ + assert( header->HeaderSize < 0xFF ); + assert( header->BodySize == 0 ); + + header->HeaderSize++; +} + +static void +header_bodysize_grow( struct tgsi_header *header ) +{ + assert( header->BodySize < 0xFFFFFF ); + + header->BodySize++; +} + +struct tgsi_processor +tgsi_default_processor( void ) +{ + struct tgsi_processor processor; + + processor.Processor = TGSI_PROCESSOR_FRAGMENT; + processor.Padding = 0; + + return processor; +} + +struct tgsi_processor +tgsi_build_processor( + unsigned type, + struct tgsi_header *header ) +{ + struct tgsi_processor processor; + + processor = tgsi_default_processor(); + processor.Processor = type; + + header_headersize_grow( header ); + + return processor; +} + +/* + * declaration + */ + +struct tgsi_declaration +tgsi_default_declaration( void ) +{ + struct tgsi_declaration declaration; + + declaration.Type = TGSI_TOKEN_TYPE_DECLARATION; + declaration.Size = 1; + declaration.File = TGSI_FILE_NULL; + declaration.UsageMask = TGSI_WRITEMASK_XYZW; + declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT; + declaration.Semantic = 0; + declaration.Padding = 0; + declaration.Extended = 0; + + return declaration; +} + +struct tgsi_declaration +tgsi_build_declaration( + unsigned file, + unsigned usage_mask, + unsigned interpolate, + unsigned semantic, + struct tgsi_header *header ) +{ + struct tgsi_declaration declaration; + + assert( file <= TGSI_FILE_IMMEDIATE ); + assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE ); + + declaration = tgsi_default_declaration(); + declaration.File = file; + declaration.UsageMask = usage_mask; + declaration.Interpolate = interpolate; + declaration.Semantic = semantic; + + header_bodysize_grow( header ); + + return declaration; +} + +static void +declaration_grow( + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + assert( declaration->Size < 0xFF ); + + declaration->Size++; + + header_bodysize_grow( header ); +} + +struct tgsi_full_declaration +tgsi_default_full_declaration( void ) +{ + struct tgsi_full_declaration full_declaration; + + full_declaration.Declaration = tgsi_default_declaration(); + full_declaration.DeclarationRange = tgsi_default_declaration_range(); + full_declaration.Semantic = tgsi_default_declaration_semantic(); + + return full_declaration; +} + +unsigned +tgsi_build_full_declaration( + const struct tgsi_full_declaration *full_decl, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ) +{ + unsigned size = 0; + struct tgsi_declaration *declaration; + struct tgsi_declaration_range *dr; + + if( maxsize <= size ) + return 0; + declaration = (struct tgsi_declaration *) &tokens[size]; + size++; + + *declaration = tgsi_build_declaration( + full_decl->Declaration.File, + full_decl->Declaration.UsageMask, + full_decl->Declaration.Interpolate, + full_decl->Declaration.Semantic, + header ); + + if (maxsize <= size) + return 0; + dr = (struct tgsi_declaration_range *) &tokens[size]; + size++; + + *dr = tgsi_build_declaration_range( + full_decl->DeclarationRange.First, + full_decl->DeclarationRange.Last, + declaration, + header ); + + if( full_decl->Declaration.Semantic ) { + struct tgsi_declaration_semantic *ds; + + if( maxsize <= size ) + return 0; + ds = (struct tgsi_declaration_semantic *) &tokens[size]; + size++; + + *ds = tgsi_build_declaration_semantic( + full_decl->Semantic.SemanticName, + full_decl->Semantic.SemanticIndex, + declaration, + header ); + } + + return size; +} + +struct tgsi_declaration_range +tgsi_default_declaration_range( void ) +{ + struct tgsi_declaration_range dr; + + dr.First = 0; + dr.Last = 0; + + return dr; +} + +struct tgsi_declaration_range +tgsi_build_declaration_range( + unsigned first, + unsigned last, + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + struct tgsi_declaration_range declaration_range; + + assert( last >= first ); + assert( last <= 0xFFFF ); + + declaration_range = tgsi_default_declaration_range(); + declaration_range.First = first; + declaration_range.Last = last; + + declaration_grow( declaration, header ); + + return declaration_range; +} + +struct tgsi_declaration_semantic +tgsi_default_declaration_semantic( void ) +{ + struct tgsi_declaration_semantic ds; + + ds.SemanticName = TGSI_SEMANTIC_POSITION; + ds.SemanticIndex = 0; + ds.Padding = 0; + + return ds; +} + +struct tgsi_declaration_semantic +tgsi_build_declaration_semantic( + unsigned semantic_name, + unsigned semantic_index, + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + struct tgsi_declaration_semantic ds; + + assert( semantic_name <= TGSI_SEMANTIC_COUNT ); + assert( semantic_index <= 0xFFFF ); + + ds = tgsi_default_declaration_semantic(); + ds.SemanticName = semantic_name; + ds.SemanticIndex = semantic_index; + + declaration_grow( declaration, header ); + + return ds; +} + +/* + * immediate + */ + +struct tgsi_immediate +tgsi_default_immediate( void ) +{ + struct tgsi_immediate immediate; + + immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE; + immediate.Size = 1; + immediate.DataType = TGSI_IMM_FLOAT32; + immediate.Padding = 0; + immediate.Extended = 0; + + return immediate; +} + +struct tgsi_immediate +tgsi_build_immediate( + struct tgsi_header *header ) +{ + struct tgsi_immediate immediate; + + immediate = tgsi_default_immediate(); + + header_bodysize_grow( header ); + + return immediate; +} + +struct tgsi_full_immediate +tgsi_default_full_immediate( void ) +{ + struct tgsi_full_immediate fullimm; + + fullimm.Immediate = tgsi_default_immediate(); + fullimm.u.Pointer = (void *) 0; + + return fullimm; +} + +static void +immediate_grow( + struct tgsi_immediate *immediate, + struct tgsi_header *header ) +{ + assert( immediate->Size < 0xFF ); + + immediate->Size++; + + header_bodysize_grow( header ); +} + +struct tgsi_immediate_float32 +tgsi_build_immediate_float32( + float value, + struct tgsi_immediate *immediate, + struct tgsi_header *header ) +{ + struct tgsi_immediate_float32 immediate_float32; + + immediate_float32.Float = value; + + immediate_grow( immediate, header ); + + return immediate_float32; +} + +unsigned +tgsi_build_full_immediate( + const struct tgsi_full_immediate *full_imm, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ) +{ + unsigned size = 0, i; + struct tgsi_immediate *immediate; + + if( maxsize <= size ) + return 0; + immediate = (struct tgsi_immediate *) &tokens[size]; + size++; + + *immediate = tgsi_build_immediate( header ); + + for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) { + struct tgsi_immediate_float32 *if32; + + if( maxsize <= size ) + return 0; + if32 = (struct tgsi_immediate_float32 *) &tokens[size]; + size++; + + *if32 = tgsi_build_immediate_float32( + full_imm->u.ImmediateFloat32[i].Float, + immediate, + header ); + } + + return size; +} + +/* + * instruction + */ + +struct tgsi_instruction +tgsi_default_instruction( void ) +{ + struct tgsi_instruction instruction; + + instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION; + instruction.Size = 1; + instruction.Opcode = TGSI_OPCODE_MOV; + instruction.Saturate = TGSI_SAT_NONE; + instruction.NumDstRegs = 1; + instruction.NumSrcRegs = 1; + instruction.Padding = 0; + instruction.Extended = 0; + + return instruction; +} + +struct tgsi_instruction +tgsi_build_instruction( + unsigned opcode, + unsigned saturate, + unsigned num_dst_regs, + unsigned num_src_regs, + struct tgsi_header *header ) +{ + struct tgsi_instruction instruction; + + assert (opcode <= TGSI_OPCODE_LAST); + assert (saturate <= TGSI_SAT_MINUS_PLUS_ONE); + assert (num_dst_regs <= 3); + assert (num_src_regs <= 15); + + instruction = tgsi_default_instruction(); + instruction.Opcode = opcode; + instruction.Saturate = saturate; + instruction.NumDstRegs = num_dst_regs; + instruction.NumSrcRegs = num_src_regs; + + header_bodysize_grow( header ); + + return instruction; +} + +static void +instruction_grow( + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + assert (instruction->Size < 0xFF); + + instruction->Size++; + + header_bodysize_grow( header ); +} + +struct tgsi_full_instruction +tgsi_default_full_instruction( void ) +{ + struct tgsi_full_instruction full_instruction; + unsigned i; + + full_instruction.Instruction = tgsi_default_instruction(); + full_instruction.InstructionExtNv = tgsi_default_instruction_ext_nv(); + full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label(); + full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture(); + for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) { + full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register(); + } + for( i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) { + full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register(); + } + + return full_instruction; +} + +unsigned +tgsi_build_full_instruction( + const struct tgsi_full_instruction *full_inst, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ) +{ + unsigned size = 0; + unsigned i; + struct tgsi_instruction *instruction; + struct tgsi_token *prev_token; + + if( maxsize <= size ) + return 0; + instruction = (struct tgsi_instruction *) &tokens[size]; + size++; + + *instruction = tgsi_build_instruction( + full_inst->Instruction.Opcode, + full_inst->Instruction.Saturate, + full_inst->Instruction.NumDstRegs, + full_inst->Instruction.NumSrcRegs, + header ); + prev_token = (struct tgsi_token *) instruction; + + if( tgsi_compare_instruction_ext_nv( + full_inst->InstructionExtNv, + tgsi_default_instruction_ext_nv() ) ) { + struct tgsi_instruction_ext_nv *instruction_ext_nv; + + if( maxsize <= size ) + return 0; + instruction_ext_nv = + (struct tgsi_instruction_ext_nv *) &tokens[size]; + size++; + + *instruction_ext_nv = tgsi_build_instruction_ext_nv( + full_inst->InstructionExtNv.Precision, + full_inst->InstructionExtNv.CondDstIndex, + full_inst->InstructionExtNv.CondFlowIndex, + full_inst->InstructionExtNv.CondMask, + full_inst->InstructionExtNv.CondSwizzleX, + full_inst->InstructionExtNv.CondSwizzleY, + full_inst->InstructionExtNv.CondSwizzleZ, + full_inst->InstructionExtNv.CondSwizzleW, + full_inst->InstructionExtNv.CondDstUpdate, + full_inst->InstructionExtNv.CondFlowEnable, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) instruction_ext_nv; + } + + if( tgsi_compare_instruction_ext_label( + full_inst->InstructionExtLabel, + tgsi_default_instruction_ext_label() ) ) { + struct tgsi_instruction_ext_label *instruction_ext_label; + + if( maxsize <= size ) + return 0; + instruction_ext_label = + (struct tgsi_instruction_ext_label *) &tokens[size]; + size++; + + *instruction_ext_label = tgsi_build_instruction_ext_label( + full_inst->InstructionExtLabel.Label, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) instruction_ext_label; + } + + if( tgsi_compare_instruction_ext_texture( + full_inst->InstructionExtTexture, + tgsi_default_instruction_ext_texture() ) ) { + struct tgsi_instruction_ext_texture *instruction_ext_texture; + + if( maxsize <= size ) + return 0; + instruction_ext_texture = + (struct tgsi_instruction_ext_texture *) &tokens[size]; + size++; + + *instruction_ext_texture = tgsi_build_instruction_ext_texture( + full_inst->InstructionExtTexture.Texture, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) instruction_ext_texture; + } + + for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) { + const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i]; + struct tgsi_dst_register *dst_register; + struct tgsi_token *prev_token; + + if( maxsize <= size ) + return 0; + dst_register = (struct tgsi_dst_register *) &tokens[size]; + size++; + + *dst_register = tgsi_build_dst_register( + reg->DstRegister.File, + reg->DstRegister.WriteMask, + reg->DstRegister.Index, + instruction, + header ); + prev_token = (struct tgsi_token *) dst_register; + + if( tgsi_compare_dst_register_ext_concode( + reg->DstRegisterExtConcode, + tgsi_default_dst_register_ext_concode() ) ) { + struct tgsi_dst_register_ext_concode *dst_register_ext_concode; + + if( maxsize <= size ) + return 0; + dst_register_ext_concode = + (struct tgsi_dst_register_ext_concode *) &tokens[size]; + size++; + + *dst_register_ext_concode = tgsi_build_dst_register_ext_concode( + reg->DstRegisterExtConcode.CondMask, + reg->DstRegisterExtConcode.CondSwizzleX, + reg->DstRegisterExtConcode.CondSwizzleY, + reg->DstRegisterExtConcode.CondSwizzleZ, + reg->DstRegisterExtConcode.CondSwizzleW, + reg->DstRegisterExtConcode.CondSrcIndex, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) dst_register_ext_concode; + } + + if( tgsi_compare_dst_register_ext_modulate( + reg->DstRegisterExtModulate, + tgsi_default_dst_register_ext_modulate() ) ) { + struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate; + + if( maxsize <= size ) + return 0; + dst_register_ext_modulate = + (struct tgsi_dst_register_ext_modulate *) &tokens[size]; + size++; + + *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate( + reg->DstRegisterExtModulate.Modulate, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) dst_register_ext_modulate; + } + } + + for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) { + const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i]; + struct tgsi_src_register *src_register; + struct tgsi_token *prev_token; + + if( maxsize <= size ) + return 0; + src_register = (struct tgsi_src_register *) &tokens[size]; + size++; + + *src_register = tgsi_build_src_register( + reg->SrcRegister.File, + reg->SrcRegister.SwizzleX, + reg->SrcRegister.SwizzleY, + reg->SrcRegister.SwizzleZ, + reg->SrcRegister.SwizzleW, + reg->SrcRegister.Negate, + reg->SrcRegister.Indirect, + reg->SrcRegister.Dimension, + reg->SrcRegister.Index, + instruction, + header ); + prev_token = (struct tgsi_token *) src_register; + + if( tgsi_compare_src_register_ext_swz( + reg->SrcRegisterExtSwz, + tgsi_default_src_register_ext_swz() ) ) { + struct tgsi_src_register_ext_swz *src_register_ext_swz; + + /* Use of the extended swizzle requires the simple swizzle to be identity. + */ + assert( reg->SrcRegister.SwizzleX == TGSI_SWIZZLE_X ); + assert( reg->SrcRegister.SwizzleY == TGSI_SWIZZLE_Y ); + assert( reg->SrcRegister.SwizzleZ == TGSI_SWIZZLE_Z ); + assert( reg->SrcRegister.SwizzleW == TGSI_SWIZZLE_W ); + assert( reg->SrcRegister.Negate == FALSE ); + + if( maxsize <= size ) + return 0; + src_register_ext_swz = + (struct tgsi_src_register_ext_swz *) &tokens[size]; + size++; + + *src_register_ext_swz = tgsi_build_src_register_ext_swz( + reg->SrcRegisterExtSwz.ExtSwizzleX, + reg->SrcRegisterExtSwz.ExtSwizzleY, + reg->SrcRegisterExtSwz.ExtSwizzleZ, + reg->SrcRegisterExtSwz.ExtSwizzleW, + reg->SrcRegisterExtSwz.NegateX, + reg->SrcRegisterExtSwz.NegateY, + reg->SrcRegisterExtSwz.NegateZ, + reg->SrcRegisterExtSwz.NegateW, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) src_register_ext_swz; + } + + if( tgsi_compare_src_register_ext_mod( + reg->SrcRegisterExtMod, + tgsi_default_src_register_ext_mod() ) ) { + struct tgsi_src_register_ext_mod *src_register_ext_mod; + + if( maxsize <= size ) + return 0; + src_register_ext_mod = + (struct tgsi_src_register_ext_mod *) &tokens[size]; + size++; + + *src_register_ext_mod = tgsi_build_src_register_ext_mod( + reg->SrcRegisterExtMod.Complement, + reg->SrcRegisterExtMod.Bias, + reg->SrcRegisterExtMod.Scale2X, + reg->SrcRegisterExtMod.Absolute, + reg->SrcRegisterExtMod.Negate, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) src_register_ext_mod; + } + + if( reg->SrcRegister.Indirect ) { + struct tgsi_src_register *ind; + + if( maxsize <= size ) + return 0; + ind = (struct tgsi_src_register *) &tokens[size]; + size++; + + *ind = tgsi_build_src_register( + reg->SrcRegisterInd.File, + reg->SrcRegisterInd.SwizzleX, + reg->SrcRegisterInd.SwizzleY, + reg->SrcRegisterInd.SwizzleZ, + reg->SrcRegisterInd.SwizzleW, + reg->SrcRegisterInd.Negate, + reg->SrcRegisterInd.Indirect, + reg->SrcRegisterInd.Dimension, + reg->SrcRegisterInd.Index, + instruction, + header ); + } + + if( reg->SrcRegister.Dimension ) { + struct tgsi_dimension *dim; + + assert( !reg->SrcRegisterDim.Dimension ); + + if( maxsize <= size ) + return 0; + dim = (struct tgsi_dimension *) &tokens[size]; + size++; + + *dim = tgsi_build_dimension( + reg->SrcRegisterDim.Indirect, + reg->SrcRegisterDim.Index, + instruction, + header ); + + if( reg->SrcRegisterDim.Indirect ) { + struct tgsi_src_register *ind; + + if( maxsize <= size ) + return 0; + ind = (struct tgsi_src_register *) &tokens[size]; + size++; + + *ind = tgsi_build_src_register( + reg->SrcRegisterDimInd.File, + reg->SrcRegisterDimInd.SwizzleX, + reg->SrcRegisterDimInd.SwizzleY, + reg->SrcRegisterDimInd.SwizzleZ, + reg->SrcRegisterDimInd.SwizzleW, + reg->SrcRegisterDimInd.Negate, + reg->SrcRegisterDimInd.Indirect, + reg->SrcRegisterDimInd.Dimension, + reg->SrcRegisterDimInd.Index, + instruction, + header ); + } + } + } + + return size; +} + +struct tgsi_instruction_ext_nv +tgsi_default_instruction_ext_nv( void ) +{ + struct tgsi_instruction_ext_nv instruction_ext_nv; + + instruction_ext_nv.Type = TGSI_INSTRUCTION_EXT_TYPE_NV; + instruction_ext_nv.Precision = TGSI_PRECISION_DEFAULT; + instruction_ext_nv.CondDstIndex = 0; + instruction_ext_nv.CondFlowIndex = 0; + instruction_ext_nv.CondMask = TGSI_CC_TR; + instruction_ext_nv.CondSwizzleX = TGSI_SWIZZLE_X; + instruction_ext_nv.CondSwizzleY = TGSI_SWIZZLE_Y; + instruction_ext_nv.CondSwizzleZ = TGSI_SWIZZLE_Z; + instruction_ext_nv.CondSwizzleW = TGSI_SWIZZLE_W; + instruction_ext_nv.CondDstUpdate = 0; + instruction_ext_nv.CondFlowEnable = 0; + instruction_ext_nv.Padding = 0; + instruction_ext_nv.Extended = 0; + + return instruction_ext_nv; +} + + +/** test for inequality of 32-bit values pointed to by a and b */ +static INLINE boolean +compare32(const void *a, const void *b) +{ + return *((uint32_t *) a) != *((uint32_t *) b); +} + + +unsigned +tgsi_compare_instruction_ext_nv( + struct tgsi_instruction_ext_nv a, + struct tgsi_instruction_ext_nv b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return compare32(&a, &b); +} + +struct tgsi_instruction_ext_nv +tgsi_build_instruction_ext_nv( + unsigned precision, + unsigned cond_dst_index, + unsigned cond_flow_index, + unsigned cond_mask, + unsigned cond_swizzle_x, + unsigned cond_swizzle_y, + unsigned cond_swizzle_z, + unsigned cond_swizzle_w, + unsigned cond_dst_update, + unsigned cond_flow_enable, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_ext_nv instruction_ext_nv; + + instruction_ext_nv = tgsi_default_instruction_ext_nv(); + instruction_ext_nv.Precision = precision; + instruction_ext_nv.CondDstIndex = cond_dst_index; + instruction_ext_nv.CondFlowIndex = cond_flow_index; + instruction_ext_nv.CondMask = cond_mask; + instruction_ext_nv.CondSwizzleX = cond_swizzle_x; + instruction_ext_nv.CondSwizzleY = cond_swizzle_y; + instruction_ext_nv.CondSwizzleZ = cond_swizzle_z; + instruction_ext_nv.CondSwizzleW = cond_swizzle_w; + instruction_ext_nv.CondDstUpdate = cond_dst_update; + instruction_ext_nv.CondFlowEnable = cond_flow_enable; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return instruction_ext_nv; +} + +struct tgsi_instruction_ext_label +tgsi_default_instruction_ext_label( void ) +{ + struct tgsi_instruction_ext_label instruction_ext_label; + + instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL; + instruction_ext_label.Label = 0; + instruction_ext_label.Padding = 0; + instruction_ext_label.Extended = 0; + + return instruction_ext_label; +} + +unsigned +tgsi_compare_instruction_ext_label( + struct tgsi_instruction_ext_label a, + struct tgsi_instruction_ext_label b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return compare32(&a, &b); +} + +struct tgsi_instruction_ext_label +tgsi_build_instruction_ext_label( + unsigned label, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_ext_label instruction_ext_label; + + instruction_ext_label = tgsi_default_instruction_ext_label(); + instruction_ext_label.Label = label; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return instruction_ext_label; +} + +struct tgsi_instruction_ext_texture +tgsi_default_instruction_ext_texture( void ) +{ + struct tgsi_instruction_ext_texture instruction_ext_texture; + + instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE; + instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN; + instruction_ext_texture.Padding = 0; + instruction_ext_texture.Extended = 0; + + return instruction_ext_texture; +} + +unsigned +tgsi_compare_instruction_ext_texture( + struct tgsi_instruction_ext_texture a, + struct tgsi_instruction_ext_texture b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return compare32(&a, &b); +} + +struct tgsi_instruction_ext_texture +tgsi_build_instruction_ext_texture( + unsigned texture, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_ext_texture instruction_ext_texture; + + instruction_ext_texture = tgsi_default_instruction_ext_texture(); + instruction_ext_texture.Texture = texture; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return instruction_ext_texture; +} + +struct tgsi_src_register +tgsi_default_src_register( void ) +{ + struct tgsi_src_register src_register; + + src_register.File = TGSI_FILE_NULL; + src_register.SwizzleX = TGSI_SWIZZLE_X; + src_register.SwizzleY = TGSI_SWIZZLE_Y; + src_register.SwizzleZ = TGSI_SWIZZLE_Z; + src_register.SwizzleW = TGSI_SWIZZLE_W; + src_register.Negate = 0; + src_register.Indirect = 0; + src_register.Dimension = 0; + src_register.Index = 0; + src_register.Extended = 0; + + return src_register; +} + +struct tgsi_src_register +tgsi_build_src_register( + unsigned file, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + unsigned negate, + unsigned indirect, + unsigned dimension, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_src_register src_register; + + assert( file <= TGSI_FILE_IMMEDIATE ); + assert( swizzle_x <= TGSI_SWIZZLE_W ); + assert( swizzle_y <= TGSI_SWIZZLE_W ); + assert( swizzle_z <= TGSI_SWIZZLE_W ); + assert( swizzle_w <= TGSI_SWIZZLE_W ); + assert( negate <= 1 ); + assert( index >= -0x8000 && index <= 0x7FFF ); + + src_register = tgsi_default_src_register(); + src_register.File = file; + src_register.SwizzleX = swizzle_x; + src_register.SwizzleY = swizzle_y; + src_register.SwizzleZ = swizzle_z; + src_register.SwizzleW = swizzle_w; + src_register.Negate = negate; + src_register.Indirect = indirect; + src_register.Dimension = dimension; + src_register.Index = index; + + instruction_grow( instruction, header ); + + return src_register; +} + +struct tgsi_full_src_register +tgsi_default_full_src_register( void ) +{ + struct tgsi_full_src_register full_src_register; + + full_src_register.SrcRegister = tgsi_default_src_register(); + full_src_register.SrcRegisterExtSwz = tgsi_default_src_register_ext_swz(); + full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod(); + full_src_register.SrcRegisterInd = tgsi_default_src_register(); + full_src_register.SrcRegisterDim = tgsi_default_dimension(); + full_src_register.SrcRegisterDimInd = tgsi_default_src_register(); + + return full_src_register; +} + +struct tgsi_src_register_ext_swz +tgsi_default_src_register_ext_swz( void ) +{ + struct tgsi_src_register_ext_swz src_register_ext_swz; + + src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ; + src_register_ext_swz.ExtSwizzleX = TGSI_EXTSWIZZLE_X; + src_register_ext_swz.ExtSwizzleY = TGSI_EXTSWIZZLE_Y; + src_register_ext_swz.ExtSwizzleZ = TGSI_EXTSWIZZLE_Z; + src_register_ext_swz.ExtSwizzleW = TGSI_EXTSWIZZLE_W; + src_register_ext_swz.NegateX = 0; + src_register_ext_swz.NegateY = 0; + src_register_ext_swz.NegateZ = 0; + src_register_ext_swz.NegateW = 0; + src_register_ext_swz.Padding = 0; + src_register_ext_swz.Extended = 0; + + return src_register_ext_swz; +} + +unsigned +tgsi_compare_src_register_ext_swz( + struct tgsi_src_register_ext_swz a, + struct tgsi_src_register_ext_swz b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return compare32(&a, &b); +} + +struct tgsi_src_register_ext_swz +tgsi_build_src_register_ext_swz( + unsigned ext_swizzle_x, + unsigned ext_swizzle_y, + unsigned ext_swizzle_z, + unsigned ext_swizzle_w, + unsigned negate_x, + unsigned negate_y, + unsigned negate_z, + unsigned negate_w, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_src_register_ext_swz src_register_ext_swz; + + assert( ext_swizzle_x <= TGSI_EXTSWIZZLE_ONE ); + assert( ext_swizzle_y <= TGSI_EXTSWIZZLE_ONE ); + assert( ext_swizzle_z <= TGSI_EXTSWIZZLE_ONE ); + assert( ext_swizzle_w <= TGSI_EXTSWIZZLE_ONE ); + assert( negate_x <= 1 ); + assert( negate_y <= 1 ); + assert( negate_z <= 1 ); + assert( negate_w <= 1 ); + + src_register_ext_swz = tgsi_default_src_register_ext_swz(); + src_register_ext_swz.ExtSwizzleX = ext_swizzle_x; + src_register_ext_swz.ExtSwizzleY = ext_swizzle_y; + src_register_ext_swz.ExtSwizzleZ = ext_swizzle_z; + src_register_ext_swz.ExtSwizzleW = ext_swizzle_w; + src_register_ext_swz.NegateX = negate_x; + src_register_ext_swz.NegateY = negate_y; + src_register_ext_swz.NegateZ = negate_z; + src_register_ext_swz.NegateW = negate_w; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return src_register_ext_swz; +} + +struct tgsi_src_register_ext_mod +tgsi_default_src_register_ext_mod( void ) +{ + struct tgsi_src_register_ext_mod src_register_ext_mod; + + src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; + src_register_ext_mod.Complement = 0; + src_register_ext_mod.Bias = 0; + src_register_ext_mod.Scale2X = 0; + src_register_ext_mod.Absolute = 0; + src_register_ext_mod.Negate = 0; + src_register_ext_mod.Padding = 0; + src_register_ext_mod.Extended = 0; + + return src_register_ext_mod; +} + +unsigned +tgsi_compare_src_register_ext_mod( + struct tgsi_src_register_ext_mod a, + struct tgsi_src_register_ext_mod b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return compare32(&a, &b); +} + +struct tgsi_src_register_ext_mod +tgsi_build_src_register_ext_mod( + unsigned complement, + unsigned bias, + unsigned scale_2x, + unsigned absolute, + unsigned negate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_src_register_ext_mod src_register_ext_mod; + + assert( complement <= 1 ); + assert( bias <= 1 ); + assert( scale_2x <= 1 ); + assert( absolute <= 1 ); + assert( negate <= 1 ); + + src_register_ext_mod = tgsi_default_src_register_ext_mod(); + src_register_ext_mod.Complement = complement; + src_register_ext_mod.Bias = bias; + src_register_ext_mod.Scale2X = scale_2x; + src_register_ext_mod.Absolute = absolute; + src_register_ext_mod.Negate = negate; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return src_register_ext_mod; +} + +struct tgsi_dimension +tgsi_default_dimension( void ) +{ + struct tgsi_dimension dimension; + + dimension.Indirect = 0; + dimension.Dimension = 0; + dimension.Padding = 0; + dimension.Index = 0; + dimension.Extended = 0; + + return dimension; +} + +struct tgsi_dimension +tgsi_build_dimension( + unsigned indirect, + unsigned index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dimension dimension; + + dimension = tgsi_default_dimension(); + dimension.Indirect = indirect; + dimension.Index = index; + + instruction_grow( instruction, header ); + + return dimension; +} + +struct tgsi_dst_register +tgsi_default_dst_register( void ) +{ + struct tgsi_dst_register dst_register; + + dst_register.File = TGSI_FILE_NULL; + dst_register.WriteMask = TGSI_WRITEMASK_XYZW; + dst_register.Indirect = 0; + dst_register.Dimension = 0; + dst_register.Index = 0; + dst_register.Padding = 0; + dst_register.Extended = 0; + + return dst_register; +} + +struct tgsi_dst_register +tgsi_build_dst_register( + unsigned file, + unsigned mask, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dst_register dst_register; + + assert( file <= TGSI_FILE_IMMEDIATE ); + assert( mask <= TGSI_WRITEMASK_XYZW ); + assert( index >= -32768 && index <= 32767 ); + + dst_register = tgsi_default_dst_register(); + dst_register.File = file; + dst_register.WriteMask = mask; + dst_register.Index = index; + + instruction_grow( instruction, header ); + + return dst_register; +} + +struct tgsi_full_dst_register +tgsi_default_full_dst_register( void ) +{ + struct tgsi_full_dst_register full_dst_register; + + full_dst_register.DstRegister = tgsi_default_dst_register(); + full_dst_register.DstRegisterExtConcode = + tgsi_default_dst_register_ext_concode(); + full_dst_register.DstRegisterExtModulate = + tgsi_default_dst_register_ext_modulate(); + + return full_dst_register; +} + +struct tgsi_dst_register_ext_concode +tgsi_default_dst_register_ext_concode( void ) +{ + struct tgsi_dst_register_ext_concode dst_register_ext_concode; + + dst_register_ext_concode.Type = TGSI_DST_REGISTER_EXT_TYPE_CONDCODE; + dst_register_ext_concode.CondMask = TGSI_CC_TR; + dst_register_ext_concode.CondSwizzleX = TGSI_SWIZZLE_X; + dst_register_ext_concode.CondSwizzleY = TGSI_SWIZZLE_Y; + dst_register_ext_concode.CondSwizzleZ = TGSI_SWIZZLE_Z; + dst_register_ext_concode.CondSwizzleW = TGSI_SWIZZLE_W; + dst_register_ext_concode.CondSrcIndex = 0; + dst_register_ext_concode.Padding = 0; + dst_register_ext_concode.Extended = 0; + + return dst_register_ext_concode; +} + +unsigned +tgsi_compare_dst_register_ext_concode( + struct tgsi_dst_register_ext_concode a, + struct tgsi_dst_register_ext_concode b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return compare32(&a, &b); +} + +struct tgsi_dst_register_ext_concode +tgsi_build_dst_register_ext_concode( + unsigned cc, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + int index, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dst_register_ext_concode dst_register_ext_concode; + + assert( cc <= TGSI_CC_FL ); + assert( swizzle_x <= TGSI_SWIZZLE_W ); + assert( swizzle_y <= TGSI_SWIZZLE_W ); + assert( swizzle_z <= TGSI_SWIZZLE_W ); + assert( swizzle_w <= TGSI_SWIZZLE_W ); + assert( index >= -32768 && index <= 32767 ); + + dst_register_ext_concode = tgsi_default_dst_register_ext_concode(); + dst_register_ext_concode.CondMask = cc; + dst_register_ext_concode.CondSwizzleX = swizzle_x; + dst_register_ext_concode.CondSwizzleY = swizzle_y; + dst_register_ext_concode.CondSwizzleZ = swizzle_z; + dst_register_ext_concode.CondSwizzleW = swizzle_w; + dst_register_ext_concode.CondSrcIndex = index; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return dst_register_ext_concode; +} + +struct tgsi_dst_register_ext_modulate +tgsi_default_dst_register_ext_modulate( void ) +{ + struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; + + dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE; + dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X; + dst_register_ext_modulate.Padding = 0; + dst_register_ext_modulate.Extended = 0; + + return dst_register_ext_modulate; +} + +unsigned +tgsi_compare_dst_register_ext_modulate( + struct tgsi_dst_register_ext_modulate a, + struct tgsi_dst_register_ext_modulate b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return compare32(&a, &b); +} + +struct tgsi_dst_register_ext_modulate +tgsi_build_dst_register_ext_modulate( + unsigned modulate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; + + assert( modulate <= TGSI_MODULATE_EIGHTH ); + + dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate(); + dst_register_ext_modulate.Modulate = modulate; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return dst_register_ext_modulate; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h new file mode 100644 index 0000000000..7d6234746a --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -0,0 +1,336 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_BUILD_H +#define TGSI_BUILD_H + + +struct tgsi_token; + + +#if defined __cplusplus +extern "C" { +#endif + +/* + * version + */ + +struct tgsi_version +tgsi_build_version( void ); + +/* + * header + */ + +struct tgsi_header +tgsi_build_header( void ); + +struct tgsi_processor +tgsi_default_processor( void ); + +struct tgsi_processor +tgsi_build_processor( + unsigned processor, + struct tgsi_header *header ); + +/* + * declaration + */ + +struct tgsi_declaration +tgsi_default_declaration( void ); + +struct tgsi_declaration +tgsi_build_declaration( + unsigned file, + unsigned usage_mask, + unsigned interpolate, + unsigned semantic, + struct tgsi_header *header ); + +struct tgsi_full_declaration +tgsi_default_full_declaration( void ); + +unsigned +tgsi_build_full_declaration( + const struct tgsi_full_declaration *full_decl, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ); + +struct tgsi_declaration_range +tgsi_default_declaration_range( void ); + +struct tgsi_declaration_range +tgsi_build_declaration_range( + unsigned first, + unsigned last, + struct tgsi_declaration *declaration, + struct tgsi_header *header ); + +struct tgsi_declaration_semantic +tgsi_default_declaration_semantic( void ); + +struct tgsi_declaration_semantic +tgsi_build_declaration_semantic( + unsigned semantic_name, + unsigned semantic_index, + struct tgsi_declaration *declaration, + struct tgsi_header *header ); + +/* + * immediate + */ + +struct tgsi_immediate +tgsi_default_immediate( void ); + +struct tgsi_immediate +tgsi_build_immediate( + struct tgsi_header *header ); + +struct tgsi_full_immediate +tgsi_default_full_immediate( void ); + +struct tgsi_immediate_float32 +tgsi_build_immediate_float32( + float value, + struct tgsi_immediate *immediate, + struct tgsi_header *header ); + +unsigned +tgsi_build_full_immediate( + const struct tgsi_full_immediate *full_imm, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ); + +/* + * instruction + */ + +struct tgsi_instruction +tgsi_default_instruction( void ); + +struct tgsi_instruction +tgsi_build_instruction( + unsigned opcode, + unsigned saturate, + unsigned num_dst_regs, + unsigned num_src_regs, + struct tgsi_header *header ); + +struct tgsi_full_instruction +tgsi_default_full_instruction( void ); + +unsigned +tgsi_build_full_instruction( + const struct tgsi_full_instruction *full_inst, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ); + +struct tgsi_instruction_ext_nv +tgsi_default_instruction_ext_nv( void ); + +unsigned +tgsi_compare_instruction_ext_nv( + struct tgsi_instruction_ext_nv a, + struct tgsi_instruction_ext_nv b ); + +struct tgsi_instruction_ext_nv +tgsi_build_instruction_ext_nv( + unsigned precision, + unsigned cond_dst_index, + unsigned cond_flow_index, + unsigned cond_mask, + unsigned cond_swizzle_x, + unsigned cond_swizzle_y, + unsigned cond_swizzle_z, + unsigned cond_swizzle_w, + unsigned cond_dst_update, + unsigned cond_flow_enable, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_instruction_ext_label +tgsi_default_instruction_ext_label( void ); + +unsigned +tgsi_compare_instruction_ext_label( + struct tgsi_instruction_ext_label a, + struct tgsi_instruction_ext_label b ); + +struct tgsi_instruction_ext_label +tgsi_build_instruction_ext_label( + unsigned label, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_instruction_ext_texture +tgsi_default_instruction_ext_texture( void ); + +unsigned +tgsi_compare_instruction_ext_texture( + struct tgsi_instruction_ext_texture a, + struct tgsi_instruction_ext_texture b ); + +struct tgsi_instruction_ext_texture +tgsi_build_instruction_ext_texture( + unsigned texture, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_src_register +tgsi_default_src_register( void ); + +struct tgsi_src_register +tgsi_build_src_register( + unsigned file, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + unsigned negate, + unsigned indirect, + unsigned dimension, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_full_src_register +tgsi_default_full_src_register( void ); + +struct tgsi_src_register_ext_swz +tgsi_default_src_register_ext_swz( void ); + +unsigned +tgsi_compare_src_register_ext_swz( + struct tgsi_src_register_ext_swz a, + struct tgsi_src_register_ext_swz b ); + +struct tgsi_src_register_ext_swz +tgsi_build_src_register_ext_swz( + unsigned ext_swizzle_x, + unsigned ext_swizzle_y, + unsigned ext_swizzle_z, + unsigned ext_swizzle_w, + unsigned negate_x, + unsigned negate_y, + unsigned negate_z, + unsigned negate_w, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_src_register_ext_mod +tgsi_default_src_register_ext_mod( void ); + +unsigned +tgsi_compare_src_register_ext_mod( + struct tgsi_src_register_ext_mod a, + struct tgsi_src_register_ext_mod b ); + +struct tgsi_src_register_ext_mod +tgsi_build_src_register_ext_mod( + unsigned complement, + unsigned bias, + unsigned scale_2x, + unsigned absolute, + unsigned negate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_dimension +tgsi_default_dimension( void ); + +struct tgsi_dimension +tgsi_build_dimension( + unsigned indirect, + unsigned index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_dst_register +tgsi_default_dst_register( void ); + +struct tgsi_dst_register +tgsi_build_dst_register( + unsigned file, + unsigned mask, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_full_dst_register +tgsi_default_full_dst_register( void ); + +struct tgsi_dst_register_ext_concode +tgsi_default_dst_register_ext_concode( void ); + +unsigned +tgsi_compare_dst_register_ext_concode( + struct tgsi_dst_register_ext_concode a, + struct tgsi_dst_register_ext_concode b ); + +struct tgsi_dst_register_ext_concode +tgsi_build_dst_register_ext_concode( + unsigned cc, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + int index, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_dst_register_ext_modulate +tgsi_default_dst_register_ext_modulate( void ); + +unsigned +tgsi_compare_dst_register_ext_modulate( + struct tgsi_dst_register_ext_modulate a, + struct tgsi_dst_register_ext_modulate b ); + +struct tgsi_dst_register_ext_modulate +tgsi_build_dst_register_ext_modulate( + unsigned modulate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_BUILD_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c new file mode 100644 index 0000000000..3177f54952 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -0,0 +1,547 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "util/u_string.h" +#include "tgsi_dump.h" +#include "tgsi_info.h" +#include "tgsi_iterate.h" + +struct dump_ctx +{ + struct tgsi_iterate_context iter; + + uint instno; + + void (*printf)(struct dump_ctx *ctx, const char *format, ...); +}; + +static void +dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...) +{ + va_list ap; + (void)ctx; + va_start(ap, format); + debug_vprintf(format, ap); + va_end(ap); +} + +static void +dump_enum( + struct dump_ctx *ctx, + uint e, + const char **enums, + uint enum_count ) +{ + if (e >= enum_count) + ctx->printf( ctx, "%u", e ); + else + ctx->printf( ctx, "%s", enums[e] ); +} + +#define EOL() ctx->printf( ctx, "\n" ) +#define TXT(S) ctx->printf( ctx, "%s", S ) +#define CHR(C) ctx->printf( ctx, "%c", C ) +#define UIX(I) ctx->printf( ctx, "0x%x", I ) +#define UID(I) ctx->printf( ctx, "%u", I ) +#define INSTID(I) ctx->printf( ctx, "% 3u", I ) +#define SID(I) ctx->printf( ctx, "%d", I ) +#define FLT(F) ctx->printf( ctx, "%10.4f", F ) +#define ENM(E,ENUMS) dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) + +static const char *processor_type_names[] = +{ + "FRAG", + "VERT", + "GEOM" +}; + +static const char *file_names[] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM" +}; + +static const char *interpolate_names[] = +{ + "CONSTANT", + "LINEAR", + "PERSPECTIVE" +}; + +static const char *semantic_names[] = +{ + "POSITION", + "COLOR", + "BCOLOR", + "FOG", + "PSIZE", + "GENERIC", + "NORMAL" +}; + +static const char *immediate_type_names[] = +{ + "FLT32" +}; + +static const char *swizzle_names[] = +{ + "x", + "y", + "z", + "w" +}; + +static const char *texture_names[] = +{ + "UNKNOWN", + "1D", + "2D", + "3D", + "CUBE", + "RECT", + "SHADOW1D", + "SHADOW2D", + "SHADOWRECT" +}; + +static const char *extswizzle_names[] = +{ + "x", + "y", + "z", + "w", + "0", + "1" +}; + +static const char *modulate_names[TGSI_MODULATE_COUNT] = +{ + "", + "_2X", + "_4X", + "_8X", + "_D2", + "_D4", + "_D8" +}; + +static void +_dump_register( + struct dump_ctx *ctx, + uint file, + int first, + int last ) +{ + ENM( file, file_names ); + CHR( '[' ); + SID( first ); + if (first != last) { + TXT( ".." ); + SID( last ); + } + CHR( ']' ); +} + +static void +_dump_register_ind( + struct dump_ctx *ctx, + uint file, + int index, + uint ind_file, + int ind_index ) +{ + ENM( file, file_names ); + CHR( '[' ); + ENM( ind_file, file_names ); + CHR( '[' ); + SID( ind_index ); + CHR( ']' ); + if (index != 0) { + if (index > 0) + CHR( '+' ); + SID( index ); + } + CHR( ']' ); +} + +static void +_dump_writemask( + struct dump_ctx *ctx, + uint writemask ) +{ + if (writemask != TGSI_WRITEMASK_XYZW) { + CHR( '.' ); + if (writemask & TGSI_WRITEMASK_X) + CHR( 'x' ); + if (writemask & TGSI_WRITEMASK_Y) + CHR( 'y' ); + if (writemask & TGSI_WRITEMASK_Z) + CHR( 'z' ); + if (writemask & TGSI_WRITEMASK_W) + CHR( 'w' ); + } +} + +static boolean +iter_declaration( + struct tgsi_iterate_context *iter, + struct tgsi_full_declaration *decl ) +{ + struct dump_ctx *ctx = (struct dump_ctx *)iter; + + TXT( "DCL " ); + + _dump_register( + ctx, + decl->Declaration.File, + decl->DeclarationRange.First, + decl->DeclarationRange.Last ); + _dump_writemask( + ctx, + decl->Declaration.UsageMask ); + + if (decl->Declaration.Semantic) { + TXT( ", " ); + ENM( decl->Semantic.SemanticName, semantic_names ); + if (decl->Semantic.SemanticIndex != 0 || + decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) { + CHR( '[' ); + UID( decl->Semantic.SemanticIndex ); + CHR( ']' ); + } + } + + TXT( ", " ); + ENM( decl->Declaration.Interpolate, interpolate_names ); + + EOL(); + + return TRUE; +} + +void +tgsi_dump_declaration( + const struct tgsi_full_declaration *decl ) +{ + struct dump_ctx ctx; + + ctx.printf = dump_ctx_printf; + + iter_declaration( &ctx.iter, (struct tgsi_full_declaration *)decl ); +} + +static boolean +iter_immediate( + struct tgsi_iterate_context *iter, + struct tgsi_full_immediate *imm ) +{ + struct dump_ctx *ctx = (struct dump_ctx *) iter; + + uint i; + + TXT( "IMM " ); + ENM( imm->Immediate.DataType, immediate_type_names ); + + TXT( " { " ); + for (i = 0; i < imm->Immediate.Size - 1; i++) { + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + FLT( imm->u.ImmediateFloat32[i].Float ); + break; + default: + assert( 0 ); + } + + if (i < imm->Immediate.Size - 2) + TXT( ", " ); + } + TXT( " }" ); + + EOL(); + + return TRUE; +} + +void +tgsi_dump_immediate( + const struct tgsi_full_immediate *imm ) +{ + struct dump_ctx ctx; + + ctx.printf = dump_ctx_printf; + + iter_immediate( &ctx.iter, (struct tgsi_full_immediate *)imm ); +} + +static boolean +iter_instruction( + struct tgsi_iterate_context *iter, + struct tgsi_full_instruction *inst ) +{ + struct dump_ctx *ctx = (struct dump_ctx *) iter; + uint instno = ctx->instno++; + + uint i; + boolean first_reg = TRUE; + + INSTID( instno ); + TXT( ": " ); + TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic ); + + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + TXT( "_SAT" ); + break; + case TGSI_SAT_MINUS_PLUS_ONE: + TXT( "_SATNV" ); + break; + default: + assert( 0 ); + } + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + + if (!first_reg) + CHR( ',' ); + CHR( ' ' ); + + _dump_register( + ctx, + dst->DstRegister.File, + dst->DstRegister.Index, + dst->DstRegister.Index ); + ENM( dst->DstRegisterExtModulate.Modulate, modulate_names ); + _dump_writemask( ctx, dst->DstRegister.WriteMask ); + + first_reg = FALSE; + } + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + + if (!first_reg) + CHR( ',' ); + CHR( ' ' ); + + if (src->SrcRegisterExtMod.Negate) + TXT( "-(" ); + if (src->SrcRegisterExtMod.Absolute) + CHR( '|' ); + if (src->SrcRegisterExtMod.Scale2X) + TXT( "2*(" ); + if (src->SrcRegisterExtMod.Bias) + CHR( '(' ); + if (src->SrcRegisterExtMod.Complement) + TXT( "1-(" ); + if (src->SrcRegister.Negate) + CHR( '-' ); + + if (src->SrcRegister.Indirect) { + _dump_register_ind( + ctx, + src->SrcRegister.File, + src->SrcRegister.Index, + src->SrcRegisterInd.File, + src->SrcRegisterInd.Index ); + } + else { + _dump_register( + ctx, + src->SrcRegister.File, + src->SrcRegister.Index, + src->SrcRegister.Index ); + } + + if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || + src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || + src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || + src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) { + CHR( '.' ); + ENM( src->SrcRegister.SwizzleX, swizzle_names ); + ENM( src->SrcRegister.SwizzleY, swizzle_names ); + ENM( src->SrcRegister.SwizzleZ, swizzle_names ); + ENM( src->SrcRegister.SwizzleW, swizzle_names ); + } + if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || + src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || + src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || + src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { + CHR( '.' ); + if (src->SrcRegisterExtSwz.NegateX) + TXT("-"); + ENM( src->SrcRegisterExtSwz.ExtSwizzleX, extswizzle_names ); + if (src->SrcRegisterExtSwz.NegateY) + TXT("-"); + ENM( src->SrcRegisterExtSwz.ExtSwizzleY, extswizzle_names ); + if (src->SrcRegisterExtSwz.NegateZ) + TXT("-"); + ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, extswizzle_names ); + if (src->SrcRegisterExtSwz.NegateW) + TXT("-"); + ENM( src->SrcRegisterExtSwz.ExtSwizzleW, extswizzle_names ); + } + + if (src->SrcRegisterExtMod.Complement) + CHR( ')' ); + if (src->SrcRegisterExtMod.Bias) + TXT( ")-.5" ); + if (src->SrcRegisterExtMod.Scale2X) + CHR( ')' ); + if (src->SrcRegisterExtMod.Absolute) + CHR( '|' ); + if (src->SrcRegisterExtMod.Negate) + CHR( ')' ); + + first_reg = FALSE; + } + + if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) { + TXT( ", " ); + ENM( inst->InstructionExtTexture.Texture, texture_names ); + } + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_IF: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_BGNLOOP2: + case TGSI_OPCODE_ENDLOOP2: + case TGSI_OPCODE_CAL: + TXT( " :" ); + UID( inst->InstructionExtLabel.Label ); + break; + } + + EOL(); + + return TRUE; +} + +void +tgsi_dump_instruction( + const struct tgsi_full_instruction *inst, + uint instno ) +{ + struct dump_ctx ctx; + + ctx.instno = instno; + ctx.printf = dump_ctx_printf; + + iter_instruction( &ctx.iter, (struct tgsi_full_instruction *)inst ); +} + +static boolean +prolog( + struct tgsi_iterate_context *iter ) +{ + struct dump_ctx *ctx = (struct dump_ctx *) iter; + ENM( iter->processor.Processor, processor_type_names ); + UID( iter->version.MajorVersion ); + CHR( '.' ); + UID( iter->version.MinorVersion ); + EOL(); + return TRUE; +} + +void +tgsi_dump( + const struct tgsi_token *tokens, + uint flags ) +{ + struct dump_ctx ctx; + + ctx.iter.prolog = prolog; + ctx.iter.iterate_instruction = iter_instruction; + ctx.iter.iterate_declaration = iter_declaration; + ctx.iter.iterate_immediate = iter_immediate; + ctx.iter.epilog = NULL; + + ctx.instno = 0; + ctx.printf = dump_ctx_printf; + + tgsi_iterate_shader( tokens, &ctx.iter ); +} + +struct str_dump_ctx +{ + struct dump_ctx base; + char *str; + char *ptr; + size_t left; +}; + +static void +str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...) +{ + struct str_dump_ctx *sctx = (struct str_dump_ctx *)ctx; + + if(sctx->left > 1) { + size_t written; + va_list ap; + va_start(ap, format); + written = util_vsnprintf(sctx->ptr, sctx->left, format, ap); + va_end(ap); + sctx->ptr += written; + sctx->left -= written; + } +} + +void +tgsi_dump_str( + const struct tgsi_token *tokens, + uint flags, + char *str, + size_t size) +{ + struct str_dump_ctx ctx; + + ctx.base.iter.prolog = prolog; + ctx.base.iter.iterate_instruction = iter_instruction; + ctx.base.iter.iterate_declaration = iter_declaration; + ctx.base.iter.iterate_immediate = iter_immediate; + ctx.base.iter.epilog = NULL; + + ctx.base.instno = 0; + ctx.base.printf = &str_dump_ctx_printf; + + ctx.str = str; + ctx.str[0] = 0; + ctx.ptr = str; + ctx.left = size; + + tgsi_iterate_shader( tokens, &ctx.base.iter ); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h new file mode 100644 index 0000000000..ad1e647ec9 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h @@ -0,0 +1,70 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_DUMP_H +#define TGSI_DUMP_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +void +tgsi_dump_str( + const struct tgsi_token *tokens, + uint flags, + char *str, + size_t size); + +void +tgsi_dump( + const struct tgsi_token *tokens, + uint flags ); + +struct tgsi_full_immediate; +struct tgsi_full_instruction; +struct tgsi_full_declaration; + +void +tgsi_dump_immediate( + const struct tgsi_full_immediate *imm ); + +void +tgsi_dump_instruction( + const struct tgsi_full_instruction *inst, + uint instno ); + +void +tgsi_dump_declaration( + const struct tgsi_full_declaration *decl ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_DUMP_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c new file mode 100644 index 0000000000..be25cb45a0 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -0,0 +1,719 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "util/u_string.h" +#include "tgsi_dump_c.h" +#include "tgsi_build.h" +#include "tgsi_info.h" +#include "tgsi_parse.h" + +static void +dump_enum( + const unsigned e, + const char **enums, + const unsigned enums_count ) +{ + if (e >= enums_count) { + debug_printf( "%u", e ); + } + else { + debug_printf( "%s", enums[e] ); + } +} + +#define EOL() debug_printf( "\n" ) +#define TXT(S) debug_printf( "%s", S ) +#define CHR(C) debug_printf( "%c", C ) +#define UIX(I) debug_printf( "0x%x", I ) +#define UID(I) debug_printf( "%u", I ) +#define SID(I) debug_printf( "%d", I ) +#define FLT(F) debug_printf( "%10.4f", F ) +#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) + +static const char *TGSI_PROCESSOR_TYPES[] = +{ + "PROCESSOR_FRAGMENT", + "PROCESSOR_VERTEX", + "PROCESSOR_GEOMETRY" +}; + +static const char *TGSI_TOKEN_TYPES[] = +{ + "TOKEN_TYPE_DECLARATION", + "TOKEN_TYPE_IMMEDIATE", + "TOKEN_TYPE_INSTRUCTION" +}; + +static const char *TGSI_FILES[] = +{ + "FILE_NULL", + "FILE_CONSTANT", + "FILE_INPUT", + "FILE_OUTPUT", + "FILE_TEMPORARY", + "FILE_SAMPLER", + "FILE_ADDRESS", + "FILE_IMMEDIATE" +}; + +static const char *TGSI_INTERPOLATES[] = +{ + "INTERPOLATE_CONSTANT", + "INTERPOLATE_LINEAR", + "INTERPOLATE_PERSPECTIVE" +}; + +static const char *TGSI_SEMANTICS[] = +{ + "SEMANTIC_POSITION", + "SEMANTIC_COLOR", + "SEMANTIC_BCOLOR", + "SEMANTIC_FOG", + "SEMANTIC_PSIZE", + "SEMANTIC_GENERIC", + "SEMANTIC_NORMAL" +}; + +static const char *TGSI_IMMS[] = +{ + "IMM_FLOAT32" +}; + +static const char *TGSI_SATS[] = +{ + "SAT_NONE", + "SAT_ZERO_ONE", + "SAT_MINUS_PLUS_ONE" +}; + +static const char *TGSI_INSTRUCTION_EXTS[] = +{ + "INSTRUCTION_EXT_TYPE_NV", + "INSTRUCTION_EXT_TYPE_LABEL", + "INSTRUCTION_EXT_TYPE_TEXTURE" +}; + +static const char *TGSI_PRECISIONS[] = +{ + "PRECISION_DEFAULT", + "PRECISION_FLOAT32", + "PRECISION_FLOAT16", + "PRECISION_FIXED12" +}; + +static const char *TGSI_CCS[] = +{ + "CC_GT", + "CC_EQ", + "CC_LT", + "CC_UN", + "CC_GE", + "CC_LE", + "CC_NE", + "CC_TR", + "CC_FL" +}; + +static const char *TGSI_SWIZZLES[] = +{ + "SWIZZLE_X", + "SWIZZLE_Y", + "SWIZZLE_Z", + "SWIZZLE_W" +}; + +static const char *TGSI_TEXTURES[] = +{ + "TEXTURE_UNKNOWN", + "TEXTURE_1D", + "TEXTURE_2D", + "TEXTURE_3D", + "TEXTURE_CUBE", + "TEXTURE_RECT", + "TEXTURE_SHADOW1D", + "TEXTURE_SHADOW2D", + "TEXTURE_SHADOWRECT" +}; + +static const char *TGSI_SRC_REGISTER_EXTS[] = +{ + "SRC_REGISTER_EXT_TYPE_SWZ", + "SRC_REGISTER_EXT_TYPE_MOD" +}; + +static const char *TGSI_EXTSWIZZLES[] = +{ + "EXTSWIZZLE_X", + "EXTSWIZZLE_Y", + "EXTSWIZZLE_Z", + "EXTSWIZZLE_W", + "EXTSWIZZLE_ZERO", + "EXTSWIZZLE_ONE" +}; + +static const char *TGSI_WRITEMASKS[] = +{ + "0", + "WRITEMASK_X", + "WRITEMASK_Y", + "WRITEMASK_XY", + "WRITEMASK_Z", + "WRITEMASK_XZ", + "WRITEMASK_YZ", + "WRITEMASK_XYZ", + "WRITEMASK_W", + "WRITEMASK_XW", + "WRITEMASK_YW", + "WRITEMASK_XYW", + "WRITEMASK_ZW", + "WRITEMASK_XZW", + "WRITEMASK_YZW", + "WRITEMASK_XYZW" +}; + +static const char *TGSI_DST_REGISTER_EXTS[] = +{ + "DST_REGISTER_EXT_TYPE_CONDCODE", + "DST_REGISTER_EXT_TYPE_MODULATE" +}; + +static const char *TGSI_MODULATES[] = +{ + "MODULATE_1X", + "MODULATE_2X", + "MODULATE_4X", + "MODULATE_8X", + "MODULATE_HALF", + "MODULATE_QUARTER", + "MODULATE_EIGHTH" +}; + +static void +dump_declaration_verbose( + struct tgsi_full_declaration *decl, + unsigned ignored, + unsigned deflt, + struct tgsi_full_declaration *fd ) +{ + TXT( "\nFile : " ); + ENM( decl->Declaration.File, TGSI_FILES ); + if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) { + TXT( "\nUsageMask : " ); + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { + CHR( 'X' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { + CHR( 'Y' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { + CHR( 'Z' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { + CHR( 'W' ); + } + } + if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) { + TXT( "\nInterpolate: " ); + ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES ); + } + if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) { + TXT( "\nSemantic : " ); + UID( decl->Declaration.Semantic ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( decl->Declaration.Padding ); + } + + EOL(); + TXT( "\nFirst: " ); + UID( decl->DeclarationRange.First ); + TXT( "\nLast : " ); + UID( decl->DeclarationRange.Last ); + + if( decl->Declaration.Semantic ) { + EOL(); + TXT( "\nSemanticName : " ); + ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS ); + TXT( "\nSemanticIndex: " ); + UID( decl->Semantic.SemanticIndex ); + if( ignored ) { + TXT( "\nPadding : " ); + UIX( decl->Semantic.Padding ); + } + } +} + +static void +dump_immediate_verbose( + struct tgsi_full_immediate *imm, + unsigned ignored ) +{ + unsigned i; + + TXT( "\nDataType : " ); + ENM( imm->Immediate.DataType, TGSI_IMMS ); + if( ignored ) { + TXT( "\nPadding : " ); + UIX( imm->Immediate.Padding ); + } + + for( i = 0; i < imm->Immediate.Size - 1; i++ ) { + EOL(); + switch( imm->Immediate.DataType ) { + case TGSI_IMM_FLOAT32: + TXT( "\nFloat: " ); + FLT( imm->u.ImmediateFloat32[i].Float ); + break; + + default: + assert( 0 ); + } + } +} + +static void +dump_instruction_verbose( + struct tgsi_full_instruction *inst, + unsigned ignored, + unsigned deflt, + struct tgsi_full_instruction *fi ) +{ + unsigned i; + + TXT( "\nOpcode : OPCODE_" ); + TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic ); + if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) { + TXT( "\nSaturate : " ); + ENM( inst->Instruction.Saturate, TGSI_SATS ); + } + if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) { + TXT( "\nNumDstRegs : " ); + UID( inst->Instruction.NumDstRegs ); + } + if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) { + TXT( "\nNumSrcRegs : " ); + UID( inst->Instruction.NumSrcRegs ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->Instruction.Padding ); + } + + if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) { + EOL(); + TXT( "\nType : " ); + ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) { + TXT( "\nPrecision : " ); + ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS ); + } + if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) { + TXT( "\nCondDstIndex : " ); + UID( inst->InstructionExtNv.CondDstIndex ); + } + if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) { + TXT( "\nCondFlowIndex : " ); + UID( inst->InstructionExtNv.CondFlowIndex ); + } + if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) { + TXT( "\nCondMask : " ); + ENM( inst->InstructionExtNv.CondMask, TGSI_CCS ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) { + TXT( "\nCondSwizzleX : " ); + ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) { + TXT( "\nCondSwizzleY : " ); + ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) { + TXT( "\nCondSwizzleZ : " ); + ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) { + TXT( "\nCondSwizzleW : " ); + ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) { + TXT( "\nCondDstUpdate : " ); + UID( inst->InstructionExtNv.CondDstUpdate ); + } + if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) { + TXT( "\nCondFlowEnable: " ); + UID( inst->InstructionExtNv.CondFlowEnable ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtNv.Padding ); + if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) { + TXT( "\nExtended : " ); + UID( inst->InstructionExtNv.Extended ); + } + } + } + + if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) { + EOL(); + TXT( "\nType : " ); + ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) { + TXT( "\nLabel : " ); + UID( inst->InstructionExtLabel.Label ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtLabel.Padding ); + if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) { + TXT( "\nExtended: " ); + UID( inst->InstructionExtLabel.Extended ); + } + } + } + + if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) { + EOL(); + TXT( "\nType : " ); + ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) { + TXT( "\nTexture : " ); + ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtTexture.Padding ); + if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) { + TXT( "\nExtended: " ); + UID( inst->InstructionExtTexture.Extended ); + } + } + } + + for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i]; + + EOL(); + TXT( "\nFile : " ); + ENM( dst->DstRegister.File, TGSI_FILES ); + if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) { + TXT( "\nWriteMask: " ); + ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS ); + } + if( ignored ) { + if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) { + TXT( "\nIndirect : " ); + UID( dst->DstRegister.Indirect ); + } + if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) { + TXT( "\nDimension: " ); + UID( dst->DstRegister.Dimension ); + } + } + if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) { + TXT( "\nIndex : " ); + SID( dst->DstRegister.Index ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegister.Padding ); + if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) { + TXT( "\nExtended : " ); + UID( dst->DstRegister.Extended ); + } + } + + if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) { + EOL(); + TXT( "\nType : " ); + ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS ); + if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) { + TXT( "\nCondMask : " ); + ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) { + TXT( "\nCondSwizzleX: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) { + TXT( "\nCondSwizzleY: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) { + TXT( "\nCondSwizzleZ: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) { + TXT( "\nCondSwizzleW: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) { + TXT( "\nCondSrcIndex: " ); + UID( dst->DstRegisterExtConcode.CondSrcIndex ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegisterExtConcode.Padding ); + if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) { + TXT( "\nExtended : " ); + UID( dst->DstRegisterExtConcode.Extended ); + } + } + } + + if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) { + EOL(); + TXT( "\nType : " ); + ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS ); + if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) { + TXT( "\nModulate: " ); + ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegisterExtModulate.Padding ); + if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) { + TXT( "\nExtended: " ); + UID( dst->DstRegisterExtModulate.Extended ); + } + } + } + } + + for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i]; + + EOL(); + TXT( "\nFile : "); + ENM( src->SrcRegister.File, TGSI_FILES ); + if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) { + TXT( "\nSwizzleX : " ); + ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) { + TXT( "\nSwizzleY : " ); + ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) { + TXT( "\nSwizzleZ : " ); + ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) { + TXT( "\nSwizzleW : " ); + ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) { + TXT( "\nNegate : " ); + UID( src->SrcRegister.Negate ); + } + if( ignored ) { + if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) { + TXT( "\nIndirect : " ); + UID( src->SrcRegister.Indirect ); + } + if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) { + TXT( "\nDimension: " ); + UID( src->SrcRegister.Dimension ); + } + } + if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) { + TXT( "\nIndex : " ); + SID( src->SrcRegister.Index ); + } + if( ignored ) { + if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegister.Extended ); + } + } + + if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) { + EOL(); + TXT( "\nType : " ); + ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS ); + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) { + TXT( "\nExtSwizzleX: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) { + TXT( "\nExtSwizzleY: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) { + TXT( "\nExtSwizzleZ: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) { + TXT( "\nExtSwizzleW: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) { + TXT( "\nNegateX : " ); + UID( src->SrcRegisterExtSwz.NegateX ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) { + TXT( "\nNegateY : " ); + UID( src->SrcRegisterExtSwz.NegateY ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) { + TXT( "\nNegateZ : " ); + UID( src->SrcRegisterExtSwz.NegateZ ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) { + TXT( "\nNegateW : " ); + UID( src->SrcRegisterExtSwz.NegateW ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( src->SrcRegisterExtSwz.Padding ); + if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegisterExtSwz.Extended ); + } + } + } + + if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) { + EOL(); + TXT( "\nType : " ); + ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS ); + if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) { + TXT( "\nComplement: " ); + UID( src->SrcRegisterExtMod.Complement ); + } + if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) { + TXT( "\nBias : " ); + UID( src->SrcRegisterExtMod.Bias ); + } + if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) { + TXT( "\nScale2X : " ); + UID( src->SrcRegisterExtMod.Scale2X ); + } + if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) { + TXT( "\nAbsolute : " ); + UID( src->SrcRegisterExtMod.Absolute ); + } + if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) { + TXT( "\nNegate : " ); + UID( src->SrcRegisterExtMod.Negate ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( src->SrcRegisterExtMod.Padding ); + if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegisterExtMod.Extended ); + } + } + } + } +} + +void +tgsi_dump_c( + const struct tgsi_token *tokens, + uint flags ) +{ + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + uint ignored = flags & TGSI_DUMP_C_IGNORED; + uint deflt = flags & TGSI_DUMP_C_DEFAULT; + uint instno = 0; + + tgsi_parse_init( &parse, tokens ); + + TXT( "tgsi-dump begin -----------------" ); + + TXT( "\nMajorVersion: " ); + UID( parse.FullVersion.Version.MajorVersion ); + TXT( "\nMinorVersion: " ); + UID( parse.FullVersion.Version.MinorVersion ); + EOL(); + + TXT( "\nHeaderSize: " ); + UID( parse.FullHeader.Header.HeaderSize ); + TXT( "\nBodySize : " ); + UID( parse.FullHeader.Header.BodySize ); + TXT( "\nProcessor : " ); + ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES ); + EOL(); + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + TXT( "\nType : " ); + ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES ); + if( ignored ) { + TXT( "\nSize : " ); + UID( parse.FullToken.Token.Size ); + if( deflt || parse.FullToken.Token.Extended ) { + TXT( "\nExtended : " ); + UID( parse.FullToken.Token.Extended ); + } + } + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + dump_declaration_verbose( + &parse.FullToken.FullDeclaration, + ignored, + deflt, + &fd ); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + dump_immediate_verbose( + &parse.FullToken.FullImmediate, + ignored ); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + dump_instruction_verbose( + &parse.FullToken.FullInstruction, + ignored, + deflt, + &fi ); + break; + + default: + assert( 0 ); + } + + EOL(); + } + + TXT( "\ntgsi-dump end -------------------\n" ); + + tgsi_parse_free( &parse ); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.h b/src/gallium/auxiliary/tgsi/tgsi_dump_c.h new file mode 100644 index 0000000000..d91cd35b3b --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.h @@ -0,0 +1,49 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_DUMP_C_H +#define TGSI_DUMP_C_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +#define TGSI_DUMP_C_IGNORED 1 +#define TGSI_DUMP_C_DEFAULT 2 + +void +tgsi_dump_c( + const struct tgsi_token *tokens, + uint flags ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_DUMP_C_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c new file mode 100644 index 0000000000..1a5294eabc --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -0,0 +1,2768 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI interpreter/executor. + * + * Flow control information: + * + * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) + * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special + * care since a condition may be true for some quad components but false + * for other components. + * + * We basically execute all statements (even if they're in the part of + * an IF/ELSE clause that's "not taken") and use a special mask to + * control writing to destination registers. This is the ExecMask. + * See store_dest(). + * + * The ExecMask is computed from three other masks (CondMask, LoopMask and + * ContMask) which are controlled by the flow control instructions (namely: + * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). + * + * + * Authors: + * Michal Krol + * Brian Paul + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi_exec.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#define FAST_MATH 1 + +#define TILE_TOP_LEFT 0 +#define TILE_TOP_RIGHT 1 +#define TILE_BOTTOM_LEFT 2 +#define TILE_BOTTOM_RIGHT 3 + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + +/* + * Shorthand locations of various utility registers (_I = Index, _C = Channel) + */ +#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I +#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C +#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I +#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C +#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I +#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C +#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I +#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C +#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C +#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I +#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C +#define TEMP_128_I TGSI_EXEC_TEMP_128_I +#define TEMP_128_C TGSI_EXEC_TEMP_128_C +#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I +#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C +#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I +#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C +#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I +#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C +#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I +#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I +#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C +#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I +#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C +#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I +#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +#define IS_CHANNEL_ENABLED(INST, CHAN)\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IS_CHANNEL_ENABLED2(INST, CHAN)\ + ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + +#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ + for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ + if (IS_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ + for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ + if (IS_CHANNEL_ENABLED2( INST, CHAN )) + + +/** The execution mask depends on the conditional mask and the loop mask */ +#define UPDATE_EXEC_MASK(MACH) \ + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + +/** + * Initialize machine state by expanding tokens to full instructions, + * allocating temporary storage, setting up constants, etc. + * After this, we can call tgsi_exec_machine_run() many times. + */ +void +tgsi_exec_machine_bind_shader( + struct tgsi_exec_machine *mach, + const struct tgsi_token *tokens, + uint numSamplers, + struct tgsi_sampler *samplers) +{ + uint k; + struct tgsi_parse_context parse; + struct tgsi_exec_labels *labels = &mach->Labels; + struct tgsi_full_instruction *instructions; + struct tgsi_full_declaration *declarations; + uint maxInstructions = 10, numInstructions = 0; + uint maxDeclarations = 10, numDeclarations = 0; + uint instno = 0; + +#if 0 + tgsi_dump(tokens, 0); +#endif + + util_init_math(); + + mach->Tokens = tokens; + mach->Samplers = samplers; + + k = tgsi_parse_init (&parse, mach->Tokens); + if (k != TGSI_PARSE_OK) { + debug_printf( "Problem parsing!\n" ); + return; + } + + mach->Processor = parse.FullHeader.Processor.Processor; + mach->ImmLimit = 0; + labels->count = 0; + + declarations = (struct tgsi_full_declaration *) + MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); + + if (!declarations) { + return; + } + + instructions = (struct tgsi_full_instruction *) + MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); + + if (!instructions) { + FREE( declarations ); + return; + } + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + uint pointer = parse.Position; + uint i; + + tgsi_parse_token( &parse ); + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* save expanded declaration */ + if (numDeclarations == maxDeclarations) { + declarations = REALLOC(declarations, + maxDeclarations + * sizeof(struct tgsi_full_declaration), + (maxDeclarations + 10) + * sizeof(struct tgsi_full_declaration)); + maxDeclarations += 10; + } + memcpy(declarations + numDeclarations, + &parse.FullToken.FullDeclaration, + sizeof(declarations[0])); + numDeclarations++; + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + assert( size % 4 == 0 ); + assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); + + for( i = 0; i < size; i++ ) { + mach->Imms[mach->ImmLimit + i / 4][i % 4] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } + mach->ImmLimit += size / 4; + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + assert( labels->count < MAX_LABELS ); + + labels->labels[labels->count][0] = instno; + labels->labels[labels->count][1] = pointer; + labels->count++; + + /* save expanded instruction */ + if (numInstructions == maxInstructions) { + instructions = REALLOC(instructions, + maxInstructions + * sizeof(struct tgsi_full_instruction), + (maxInstructions + 10) + * sizeof(struct tgsi_full_instruction)); + maxInstructions += 10; + } + memcpy(instructions + numInstructions, + &parse.FullToken.FullInstruction, + sizeof(instructions[0])); + numInstructions++; + break; + + default: + assert( 0 ); + } + } + tgsi_parse_free (&parse); + + if (mach->Declarations) { + FREE( mach->Declarations ); + } + mach->Declarations = declarations; + mach->NumDeclarations = numDeclarations; + + if (mach->Instructions) { + FREE( mach->Instructions ); + } + mach->Instructions = instructions; + mach->NumInstructions = numInstructions; +} + + +void +tgsi_exec_machine_init( + struct tgsi_exec_machine *mach ) +{ + uint i; + + mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); + mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; + + /* Setup constants. */ + for( i = 0; i < 4; i++ ) { + mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; + mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; + mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; + mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; + mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; + mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; + mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; + mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; + mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; + mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; + } +} + + +void +tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) +{ + if (mach->Instructions) { + FREE(mach->Instructions); + mach->Instructions = NULL; + mach->NumInstructions = 0; + } + if (mach->Declarations) { + FREE(mach->Declarations); + mach->Declarations = NULL; + mach->NumDeclarations = 0; + } +} + + +static void +micro_abs( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = fabsf( src->f[0] ); + dst->f[1] = fabsf( src->f[1] ); + dst->f[2] = fabsf( src->f[2] ); + dst->f[3] = fabsf( src->f[3] ); +} + +static void +micro_add( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] + src1->f[0]; + dst->f[1] = src0->f[1] + src1->f[1]; + dst->f[2] = src0->f[2] + src1->f[2]; + dst->f[3] = src0->f[3] + src1->f[3]; +} + +static void +micro_iadd( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] + src1->i[0]; + dst->i[1] = src0->i[1] + src1->i[1]; + dst->i[2] = src0->i[2] + src1->i[2]; + dst->i[3] = src0->i[3] + src1->i[3]; +} + +static void +micro_and( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] & src1->u[0]; + dst->u[1] = src0->u[1] & src1->u[1]; + dst->u[2] = src0->u[2] & src1->u[2]; + dst->u[3] = src0->u[3] & src1->u[3]; +} + +static void +micro_ceil( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = ceilf( src->f[0] ); + dst->f[1] = ceilf( src->f[1] ); + dst->f[2] = ceilf( src->f[2] ); + dst->f[3] = ceilf( src->f[3] ); +} + +static void +micro_cos( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = cosf( src->f[0] ); + dst->f[1] = cosf( src->f[1] ); + dst->f[2] = cosf( src->f[2] ); + dst->f[3] = cosf( src->f[3] ); +} + +static void +micro_ddx( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_ddy( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_div( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + if (src1->f[0] != 0) { + dst->f[0] = src0->f[0] / src1->f[0]; + } + if (src1->f[1] != 0) { + dst->f[1] = src0->f[1] / src1->f[1]; + } + if (src1->f[2] != 0) { + dst->f[2] = src0->f[2] / src1->f[2]; + } + if (src1->f[3] != 0) { + dst->f[3] = src0->f[3] / src1->f[3]; + } +} + +static void +micro_udiv( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] / src1->u[0]; + dst->u[1] = src0->u[1] / src1->u[1]; + dst->u[2] = src0->u[2] / src1->u[2]; + dst->u[3] = src0->u[3] / src1->u[3]; +} + +static void +micro_eq( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_ieq( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; + dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; + dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; + dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; +} + +static void +micro_exp2( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ +#if FAST_MATH + dst->f[0] = util_fast_exp2( src->f[0] ); + dst->f[1] = util_fast_exp2( src->f[1] ); + dst->f[2] = util_fast_exp2( src->f[2] ); + dst->f[3] = util_fast_exp2( src->f[3] ); +#else + dst->f[0] = powf( 2.0f, src->f[0] ); + dst->f[1] = powf( 2.0f, src->f[1] ); + dst->f[2] = powf( 2.0f, src->f[2] ); + dst->f[3] = powf( 2.0f, src->f[3] ); +#endif +} + +static void +micro_f2it( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->i[0] = (int) src->f[0]; + dst->i[1] = (int) src->f[1]; + dst->i[2] = (int) src->f[2]; + dst->i[3] = (int) src->f[3]; +} + +static void +micro_f2ut( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->u[0] = (uint) src->f[0]; + dst->u[1] = (uint) src->f[1]; + dst->u[2] = (uint) src->f[2]; + dst->u[3] = (uint) src->f[3]; +} + +static void +micro_flr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = floorf( src->f[0] ); + dst->f[1] = floorf( src->f[1] ); + dst->f[2] = floorf( src->f[2] ); + dst->f[3] = floorf( src->f[3] ); +} + +static void +micro_frc( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = src->f[0] - floorf( src->f[0] ); + dst->f[1] = src->f[1] - floorf( src->f[1] ); + dst->f[2] = src->f[2] - floorf( src->f[2] ); + dst->f[3] = src->f[3] - floorf( src->f[3] ); +} + +static void +micro_ge( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_i2f( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) src->i[0]; + dst->f[1] = (float) src->i[1]; + dst->f[2] = (float) src->i[2]; + dst->f[3] = (float) src->i[3]; +} + +static void +micro_lg2( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ +#if FAST_MATH + dst->f[0] = util_fast_log2( src->f[0] ); + dst->f[1] = util_fast_log2( src->f[1] ); + dst->f[2] = util_fast_log2( src->f[2] ); + dst->f[3] = util_fast_log2( src->f[3] ); +#else + dst->f[0] = logf( src->f[0] ) * 1.442695f; + dst->f[1] = logf( src->f[1] ) * 1.442695f; + dst->f[2] = logf( src->f[2] ) * 1.442695f; + dst->f[3] = logf( src->f[3] ) * 1.442695f; +#endif +} + +static void +micro_le( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_lt( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_ilt( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; + dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; + dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; + dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; +} + +static void +micro_ult( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; + dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; + dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; + dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; +} + +static void +micro_max( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; + dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; + dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; + dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; +} + +static void +micro_imax( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; + dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; + dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; + dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; +} + +static void +micro_umax( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; + dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; + dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; + dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; +} + +static void +micro_min( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; +} + +static void +micro_imin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; + dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; + dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; + dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; +} + +static void +micro_umin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; + dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; + dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; + dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; +} + +static void +micro_umod( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] % src1->u[0]; + dst->u[1] = src0->u[1] % src1->u[1]; + dst->u[2] = src0->u[2] % src1->u[2]; + dst->u[3] = src0->u[3] % src1->u[3]; +} + +static void +micro_mul( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] * src1->f[0]; + dst->f[1] = src0->f[1] * src1->f[1]; + dst->f[2] = src0->f[2] * src1->f[2]; + dst->f[3] = src0->f[3] * src1->f[3]; +} + +static void +micro_imul( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] * src1->i[0]; + dst->i[1] = src0->i[1] * src1->i[1]; + dst->i[2] = src0->i[2] * src1->i[2]; + dst->i[3] = src0->i[3] * src1->i[3]; +} + +static void +micro_imul64( + union tgsi_exec_channel *dst0, + union tgsi_exec_channel *dst1, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst1->i[0] = src0->i[0] * src1->i[0]; + dst1->i[1] = src0->i[1] * src1->i[1]; + dst1->i[2] = src0->i[2] * src1->i[2]; + dst1->i[3] = src0->i[3] * src1->i[3]; + dst0->i[0] = 0; + dst0->i[1] = 0; + dst0->i[2] = 0; + dst0->i[3] = 0; +} + +static void +micro_umul64( + union tgsi_exec_channel *dst0, + union tgsi_exec_channel *dst1, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst1->u[0] = src0->u[0] * src1->u[0]; + dst1->u[1] = src0->u[1] * src1->u[1]; + dst1->u[2] = src0->u[2] * src1->u[2]; + dst1->u[3] = src0->u[3] * src1->u[3]; + dst0->u[0] = 0; + dst0->u[1] = 0; + dst0->u[2] = 0; + dst0->u[3] = 0; +} + +static void +micro_movc( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2 ) +{ + dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; + dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; + dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; + dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; +} + +static void +micro_neg( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = -src->f[0]; + dst->f[1] = -src->f[1]; + dst->f[2] = -src->f[2]; + dst->f[3] = -src->f[3]; +} + +static void +micro_ineg( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->i[0] = -src->i[0]; + dst->i[1] = -src->i[1]; + dst->i[2] = -src->i[2]; + dst->i[3] = -src->i[3]; +} + +static void +micro_not( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->u[0] = ~src->u[0]; + dst->u[1] = ~src->u[1]; + dst->u[2] = ~src->u[2]; + dst->u[3] = ~src->u[3]; +} + +static void +micro_or( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] | src1->u[0]; + dst->u[1] = src0->u[1] | src1->u[1]; + dst->u[2] = src0->u[2] | src1->u[2]; + dst->u[3] = src0->u[3] | src1->u[3]; +} + +static void +micro_pow( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ +#if FAST_MATH + dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); + dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); + dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); + dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); +#else + dst->f[0] = powf( src0->f[0], src1->f[0] ); + dst->f[1] = powf( src0->f[1], src1->f[1] ); + dst->f[2] = powf( src0->f[2], src1->f[2] ); + dst->f[3] = powf( src0->f[3], src1->f[3] ); +#endif +} + +static void +micro_rnd( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = floorf( src->f[0] + 0.5f ); + dst->f[1] = floorf( src->f[1] + 0.5f ); + dst->f[2] = floorf( src->f[2] + 0.5f ); + dst->f[3] = floorf( src->f[3] + 0.5f ); +} + +static void +micro_shl( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] << src1->i[0]; + dst->i[1] = src0->i[1] << src1->i[1]; + dst->i[2] = src0->i[2] << src1->i[2]; + dst->i[3] = src0->i[3] << src1->i[3]; +} + +static void +micro_ishr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] >> src1->i[0]; + dst->i[1] = src0->i[1] >> src1->i[1]; + dst->i[2] = src0->i[2] >> src1->i[2]; + dst->i[3] = src0->i[3] >> src1->i[3]; +} + +static void +micro_trunc( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0 ) +{ + dst->f[0] = (float) (int) src0->f[0]; + dst->f[1] = (float) (int) src0->f[1]; + dst->f[2] = (float) (int) src0->f[2]; + dst->f[3] = (float) (int) src0->f[3]; +} + +static void +micro_ushr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] >> src1->u[0]; + dst->u[1] = src0->u[1] >> src1->u[1]; + dst->u[2] = src0->u[2] >> src1->u[2]; + dst->u[3] = src0->u[3] >> src1->u[3]; +} + +static void +micro_sin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = sinf( src->f[0] ); + dst->f[1] = sinf( src->f[1] ); + dst->f[2] = sinf( src->f[2] ); + dst->f[3] = sinf( src->f[3] ); +} + +static void +micro_sqrt( union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = sqrtf( src->f[0] ); + dst->f[1] = sqrtf( src->f[1] ); + dst->f[2] = sqrtf( src->f[2] ); + dst->f[3] = sqrtf( src->f[3] ); +} + +static void +micro_sub( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] - src1->f[0]; + dst->f[1] = src0->f[1] - src1->f[1]; + dst->f[2] = src0->f[2] - src1->f[2]; + dst->f[3] = src0->f[3] - src1->f[3]; +} + +static void +micro_u2f( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) src->u[0]; + dst->f[1] = (float) src->u[1]; + dst->f[2] = (float) src->u[2]; + dst->f[3] = (float) src->u[3]; +} + +static void +micro_xor( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] ^ src1->u[0]; + dst->u[1] = src0->u[1] ^ src1->u[1]; + dst->u[2] = src0->u[2] ^ src1->u[2]; + dst->u[3] = src0->u[3] ^ src1->u[3]; +} + +static void +fetch_src_file_channel( + const struct tgsi_exec_machine *mach, + const uint file, + const uint swizzle, + const union tgsi_exec_channel *index, + union tgsi_exec_channel *chan ) +{ + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( file ) { + case TGSI_FILE_CONSTANT: + assert(mach->Consts); + chan->f[0] = mach->Consts[index->i[0]][swizzle]; + chan->f[1] = mach->Consts[index->i[1]][swizzle]; + chan->f[2] = mach->Consts[index->i[2]][swizzle]; + chan->f[3] = mach->Consts[index->i[3]][swizzle]; + break; + + case TGSI_FILE_INPUT: + chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_TEMPORARY: + assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); + chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_IMMEDIATE: + assert( index->i[0] < (int) mach->ImmLimit ); + chan->f[0] = mach->Imms[index->i[0]][swizzle]; + assert( index->i[1] < (int) mach->ImmLimit ); + chan->f[1] = mach->Imms[index->i[1]][swizzle]; + assert( index->i[2] < (int) mach->ImmLimit ); + chan->f[2] = mach->Imms[index->i[2]][swizzle]; + assert( index->i[3] < (int) mach->ImmLimit ); + chan->f[3] = mach->Imms[index->i[3]][swizzle]; + break; + + case TGSI_FILE_ADDRESS: + chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_OUTPUT: + /* vertex/fragment output vars can be read too */ + chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; + break; + + case TGSI_EXTSWIZZLE_ONE: + *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; + break; + + default: + assert( 0 ); + } +} + +static void +fetch_source( + const struct tgsi_exec_machine *mach, + union tgsi_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index ) +{ + union tgsi_exec_channel index; + uint swizzle; + + index.i[0] = + index.i[1] = + index.i[2] = + index.i[3] = reg->SrcRegister.Index; + + if (reg->SrcRegister.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterInd.File, + swizzle, + &index2, + &indir_index ); + + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; + } + + if( reg->SrcRegister.Dimension ) { + switch( reg->SrcRegister.File ) { + case TGSI_FILE_INPUT: + index.i[0] *= 17; + index.i[1] *= 17; + index.i[2] *= 17; + index.i[3] *= 17; + break; + case TGSI_FILE_CONSTANT: + index.i[0] *= 4096; + index.i[1] *= 4096; + index.i[2] *= 4096; + index.i[3] *= 4096; + break; + default: + assert( 0 ); + } + + index.i[0] += reg->SrcRegisterDim.Index; + index.i[1] += reg->SrcRegisterDim.Index; + index.i[2] += reg->SrcRegisterDim.Index; + index.i[3] += reg->SrcRegisterDim.Index; + + if (reg->SrcRegisterDim.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterDimInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterDimInd.File, + swizzle, + &index2, + &indir_index ); + + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; + } + } + + swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + fetch_src_file_channel( + mach, + reg->SrcRegister.File, + swizzle, + &index, + chan ); + + switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { + case TGSI_UTIL_SIGN_CLEAR: + micro_abs( chan, chan ); + break; + + case TGSI_UTIL_SIGN_SET: + micro_abs( chan, chan ); + micro_neg( chan, chan ); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + micro_neg( chan, chan ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } + + if (reg->SrcRegisterExtMod.Complement) { + micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); + } +} + +static void +store_dest( + struct tgsi_exec_machine *mach, + const union tgsi_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index ) +{ + uint i; + union tgsi_exec_channel null; + union tgsi_exec_channel *dst; + uint execmask = mach->ExecMask; + + switch (reg->DstRegister.File) { + case TGSI_FILE_NULL: + dst = &null; + break; + + case TGSI_FILE_OUTPUT: + dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + + reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_TEMPORARY: + assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS ); + dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_ADDRESS: + dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; + break; + + default: + assert( 0 ); + return; + } + + if (inst->InstructionExtNv.CondFlowEnable) { + union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; + uint swizzle; + uint shift; + uint mask; + uint test; + + /* Only CC0 supported. + */ + assert( inst->InstructionExtNv.CondFlowIndex < 1 ); + + switch (chan_index) { + case CHAN_X: + swizzle = inst->InstructionExtNv.CondSwizzleX; + break; + case CHAN_Y: + swizzle = inst->InstructionExtNv.CondSwizzleY; + break; + case CHAN_Z: + swizzle = inst->InstructionExtNv.CondSwizzleZ; + break; + case CHAN_W: + swizzle = inst->InstructionExtNv.CondSwizzleW; + break; + default: + assert( 0 ); + return; + } + + switch (swizzle) { + case TGSI_SWIZZLE_X: + shift = TGSI_EXEC_CC_X_SHIFT; + mask = TGSI_EXEC_CC_X_MASK; + break; + case TGSI_SWIZZLE_Y: + shift = TGSI_EXEC_CC_Y_SHIFT; + mask = TGSI_EXEC_CC_Y_MASK; + break; + case TGSI_SWIZZLE_Z: + shift = TGSI_EXEC_CC_Z_SHIFT; + mask = TGSI_EXEC_CC_Z_MASK; + break; + case TGSI_SWIZZLE_W: + shift = TGSI_EXEC_CC_W_SHIFT; + mask = TGSI_EXEC_CC_W_MASK; + break; + default: + assert( 0 ); + return; + } + + switch (inst->InstructionExtNv.CondMask) { + case TGSI_CC_GT: + test = ~(TGSI_EXEC_CC_GT << shift) & mask; + for (i = 0; i < QUAD_SIZE; i++) + if (cc->u[i] & test) + execmask &= ~(1 << i); + break; + + case TGSI_CC_EQ: + test = ~(TGSI_EXEC_CC_EQ << shift) & mask; + for (i = 0; i < QUAD_SIZE; i++) + if (cc->u[i] & test) + execmask &= ~(1 << i); + break; + + case TGSI_CC_LT: + test = ~(TGSI_EXEC_CC_LT << shift) & mask; + for (i = 0; i < QUAD_SIZE; i++) + if (cc->u[i] & test) + execmask &= ~(1 << i); + break; + + case TGSI_CC_GE: + test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask; + for (i = 0; i < QUAD_SIZE; i++) + if (cc->u[i] & test) + execmask &= ~(1 << i); + break; + + case TGSI_CC_LE: + test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask; + for (i = 0; i < QUAD_SIZE; i++) + if (cc->u[i] & test) + execmask &= ~(1 << i); + break; + + case TGSI_CC_NE: + test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask; + for (i = 0; i < QUAD_SIZE; i++) + if (cc->u[i] & test) + execmask &= ~(1 << i); + break; + + case TGSI_CC_TR: + break; + + case TGSI_CC_FL: + for (i = 0; i < QUAD_SIZE; i++) + execmask &= ~(1 << i); + break; + + default: + assert( 0 ); + return; + } + } + + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) + dst->i[i] = chan->i[i]; + break; + + case TGSI_SAT_ZERO_ONE: + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) { + if (chan->f[i] < 0.0f) + dst->f[i] = 0.0f; + else if (chan->f[i] > 1.0f) + dst->f[i] = 1.0f; + else + dst->i[i] = chan->i[i]; + } + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) { + if (chan->f[i] < -1.0f) + dst->f[i] = -1.0f; + else if (chan->f[i] > 1.0f) + dst->f[i] = 1.0f; + else + dst->i[i] = chan->i[i]; + } + break; + + default: + assert( 0 ); + } + + if (inst->InstructionExtNv.CondDstUpdate) { + union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; + uint shift; + uint mask; + + /* Only CC0 supported. + */ + assert( inst->InstructionExtNv.CondDstIndex < 1 ); + + switch (chan_index) { + case CHAN_X: + shift = TGSI_EXEC_CC_X_SHIFT; + mask = ~TGSI_EXEC_CC_X_MASK; + break; + case CHAN_Y: + shift = TGSI_EXEC_CC_Y_SHIFT; + mask = ~TGSI_EXEC_CC_Y_MASK; + break; + case CHAN_Z: + shift = TGSI_EXEC_CC_Z_SHIFT; + mask = ~TGSI_EXEC_CC_Z_MASK; + break; + case CHAN_W: + shift = TGSI_EXEC_CC_W_SHIFT; + mask = ~TGSI_EXEC_CC_W_MASK; + break; + default: + assert( 0 ); + return; + } + + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) { + cc->u[i] &= mask; + if (dst->f[i] < 0.0f) + cc->u[i] |= TGSI_EXEC_CC_LT << shift; + else if (dst->f[i] > 0.0f) + cc->u[i] |= TGSI_EXEC_CC_GT << shift; + else if (dst->f[i] == 0.0f) + cc->u[i] |= TGSI_EXEC_CC_EQ << shift; + else + cc->u[i] |= TGSI_EXEC_CC_UN << shift; + } + } +} + +#define FETCH(VAL,INDEX,CHAN)\ + fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + +#define STORE(VAL,INDEX,CHAN)\ + store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + + +/** + * Execute ARB-style KIL which is predicated by a src register. + * Kill fragment if any of the four values is less than zero. + */ +static void +exec_kil(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint uniquemask; + uint chan_index; + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + union tgsi_exec_channel r[1]; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + for (chan_index = 0; chan_index < 4; chan_index++) + { + uint swizzle; + uint i; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle ( + &inst->FullSrcRegisters[0], + chan_index); + + /* check if the component has not been already tested */ + if (uniquemask & (1 << swizzle)) + continue; + uniquemask |= 1 << swizzle; + + FETCH(&r[0], 0, chan_index); + for (i = 0; i < 4; i++) + if (r[0].f[i] < 0.0f) + kilmask |= 1 << i; + } + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + +/** + * Execute NVIDIA-style KIL which is predicated by a condition code. + * Kill fragment if the condition code is TRUE. + */ +static void +exec_kilp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + + if (inst->InstructionExtNv.CondFlowEnable) { + uint swizzle[4]; + uint chan_index; + + kilmask = 0x0; + + swizzle[0] = inst->InstructionExtNv.CondSwizzleX; + swizzle[1] = inst->InstructionExtNv.CondSwizzleY; + swizzle[2] = inst->InstructionExtNv.CondSwizzleZ; + swizzle[3] = inst->InstructionExtNv.CondSwizzleW; + + for (chan_index = 0; chan_index < 4; chan_index++) + { + uint i; + + for (i = 0; i < 4; i++) { + /* TODO: evaluate the condition code */ + if (0) + kilmask |= 1 << i; + } + } + } + else { + /* "unconditional" kil */ + kilmask = mach->ExecMask; + } + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + + +/* + * Fetch a texel using STR texture coordinates. + */ +static void +fetch_texel( struct tgsi_sampler *sampler, + const union tgsi_exec_channel *s, + const union tgsi_exec_channel *t, + const union tgsi_exec_channel *p, + float lodbias, /* XXX should be float[4] */ + union tgsi_exec_channel *r, + union tgsi_exec_channel *g, + union tgsi_exec_channel *b, + union tgsi_exec_channel *a ) +{ + uint j; + float rgba[NUM_CHANNELS][QUAD_SIZE]; + + sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); + + for (j = 0; j < 4; j++) { + r->f[j] = rgba[0][j]; + g->f[j] = rgba[1][j]; + b->f[j] = rgba[2][j]; + a->f[j] = rgba[3][j]; + } +} + + +static void +exec_tex(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + boolean biasLod, + boolean projected) +{ + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + union tgsi_exec_channel r[8]; + uint chan_index; + float lodBias; + + /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ + + switch (inst->InstructionExtTexture.Texture) { + case TGSI_TEXTURE_1D: + + FETCH(&r[0], 0, CHAN_X); + + if (projected) { + FETCH(&r[1], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[1] ); + } + + if (biasLod) { + FETCH(&r[1], 0, CHAN_W); + lodBias = r[2].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (projected) { + FETCH(&r[3], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[3] ); + micro_div( &r[1], &r[1], &r[3] ); + micro_div( &r[2], &r[2], &r[3] ); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (projected) { + FETCH(&r[3], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[3] ); + micro_div( &r[1], &r[1], &r[3] ); + micro_div( &r[2], &r[2], &r[3] ); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert (0); + } + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[chan_index], 0, chan_index ); + } +} + + +/** + * Evaluate a constant-valued coefficient at the position of the + * current quad. + */ +static void +eval_constant_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; + } +} + +/** + * Evaluate a linear-valued coefficient at the position of the + * current quad. + */ +static void +eval_linear_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + mach->Inputs[attrib].xyzw[chan].f[0] = a0; + mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; + mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; + mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; +} + +/** + * Evaluate a perspective-valued coefficient at the position of the + * current quad. + */ +static void +eval_perspective_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + const float *w = mach->QuadPos.xyzw[3].f; + /* divide by W here */ + mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; + mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; + mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; + mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; +} + + +typedef void (* eval_coef_func)( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ); + +static void +exec_declaration( + struct tgsi_exec_machine *mach, + const struct tgsi_full_declaration *decl ) +{ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + eval_coef_func eval; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + eval = eval_constant_coef; + break; + + case TGSI_INTERPOLATE_LINEAR: + eval = eval_linear_coef; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + eval = eval_perspective_coef; + break; + + default: + eval = NULL; + assert( 0 ); + } + + if( mask == TGSI_WRITEMASK_XYZW ) { + unsigned i, j; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + eval( mach, i, j ); + } + } + } + else { + unsigned i, j; + + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + eval( mach, i, j ); + } + } + } + } + } + } +} + +static void +exec_instruction( + struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + int *pc ) +{ + uint chan_index; + union tgsi_exec_channel r[8]; + + (*pc)++; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_f2it( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_X ); + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[1], 0, CHAN_Y ); + micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + + FETCH( &r[2], 0, CHAN_W ); + micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); + micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); + micro_pow( &r[1], &r[1], &r[2] ); + micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, CHAN_Z ); + } + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( &r[0], 0, CHAN_X ); + micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( &r[0], 0, CHAN_X ); + micro_sqrt( &r[0], &r[0] ); + micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + FETCH( &r[0], 0, CHAN_X ); + micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ + STORE( &r[2], 0, CHAN_X ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ + STORE( &r[2], 0, CHAN_Y ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ + STORE( &r[2], 0, CHAN_Z ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_LOG: + FETCH( &r[0], 0, CHAN_X ); + micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ + micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ + micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[0], 0, CHAN_X ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ + micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ + STORE( &r[0], 0, CHAN_Y ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[1], 0, CHAN_Z ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + micro_mul( &r[0], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_add( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Z ); + FETCH( &r[2], 1, CHAN_Z ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_W); + FETCH(&r[2], 1, CHAN_W); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + FETCH( &r[0], 0, CHAN_Y ); + FETCH( &r[1], 1, CHAN_Y); + micro_mul( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_Z ); + STORE( &r[0], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + FETCH( &r[0], 1, CHAN_W ); + STORE( &r[0], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + /* XXX use micro_min()?? */ + micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + /* XXX use micro_max()?? */ + micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); + + STORE(&r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_mul( &r[0], &r[0], &r[1] ); + FETCH( &r[1], 2, chan_index ); + micro_add( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + micro_sub( &r[0], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + micro_sub( &r[1], &r[1], &r[2] ); + micro_mul( &r[0], &r[0], &r[1] ); + micro_add( &r[0], &r[0], &r[2] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_CND: + assert (0); + break; + + case TGSI_OPCODE_CND0: + assert (0); + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + assert (0); + break; + + case TGSI_OPCODE_INDEX: + assert (0); + break; + + case TGSI_OPCODE_NEGATE: + assert (0); + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_frc( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + assert (0); + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_flr( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_rnd( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH(&r[0], 0, CHAN_X); + +#if FAST_MATH + micro_exp2( &r[0], &r[0] ); +#else + micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); +#endif + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( &r[0], 0, CHAN_X ); + micro_lg2( &r[0], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_pow( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + FETCH(&r[0], 0, CHAN_Y); + FETCH(&r[1], 1, CHAN_Z); + + micro_mul( &r[2], &r[0], &r[1] ); + + FETCH(&r[3], 0, CHAN_Z); + FETCH(&r[4], 1, CHAN_Y); + + micro_mul( &r[5], &r[3], &r[4] ); + micro_sub( &r[2], &r[2], &r[5] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[2], 0, CHAN_X ); + } + + FETCH(&r[2], 1, CHAN_X); + + micro_mul( &r[3], &r[3], &r[2] ); + + FETCH(&r[5], 0, CHAN_X); + + micro_mul( &r[1], &r[1], &r[5] ); + micro_sub( &r[3], &r[3], &r[1] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + STORE( &r[3], 0, CHAN_Y ); + } + + micro_mul( &r[5], &r[5], &r[4] ); + micro_mul( &r[0], &r[0], &r[2] ); + micro_sub( &r[5], &r[5], &r[0] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[5], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + assert (0); + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + + micro_abs( &r[0], &r[0] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_RCC: + assert (0); + break; + + case TGSI_OPCODE_DPH: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 1, CHAN_W); + + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH(&r[0], 0, CHAN_X); + + micro_cos( &r[0], &r[0] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_ddx( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDY: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_ddy( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_KILP: + exec_kilp (mach, inst); + break; + + case TGSI_OPCODE_KIL: + exec_kil (mach, inst); + break; + + case TGSI_OPCODE_PK2H: + assert (0); + break; + + case TGSI_OPCODE_PK2US: + assert (0); + break; + + case TGSI_OPCODE_PK4B: + assert (0); + break; + + case TGSI_OPCODE_PK4UB: + assert (0); + break; + + case TGSI_OPCODE_RFL: + assert (0); + break; + + case TGSI_OPCODE_SEQ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_eq( &r[0], &r[0], &r[1], + &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], + &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SFL: + assert (0); + break; + + case TGSI_OPCODE_SGT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SIN: + FETCH( &r[0], 0, CHAN_X ); + micro_sin( &r[0], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SNE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_STR: + assert (0); + break; + + case TGSI_OPCODE_TEX: + /* simple texture lookup */ + /* src[0] = texcoord */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE, FALSE); + break; + + case TGSI_OPCODE_TXB: + /* Texture lookup with lod bias */ + /* src[0] = texcoord (src[0].w = LOD bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXD: + /* Texture lookup with explict partial derivatives */ + /* src[0] = texcoord */ + /* src[1] = d[strq]/dx */ + /* src[2] = d[strq]/dy */ + /* src[3] = sampler unit */ + assert (0); + break; + + case TGSI_OPCODE_TXL: + /* Texture lookup with explit LOD */ + /* src[0] = texcoord (src[0].w = LOD) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXP: + /* Texture lookup with projection */ + /* src[0] = texcoord (src[0].w = projection) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE, TRUE); + break; + + case TGSI_OPCODE_UP2H: + assert (0); + break; + + case TGSI_OPCODE_UP2US: + assert (0); + break; + + case TGSI_OPCODE_UP4B: + assert (0); + break; + + case TGSI_OPCODE_UP4UB: + assert (0); + break; + + case TGSI_OPCODE_X2D: + assert (0); + break; + + case TGSI_OPCODE_ARA: + assert (0); + break; + + case TGSI_OPCODE_ARR: + assert (0); + break; + + case TGSI_OPCODE_BRA: + assert (0); + break; + + case TGSI_OPCODE_CAL: + /* skip the call if no execution channels are enabled */ + if (mach->ExecMask) { + /* do the call */ + + /* push the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + + assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; + + /* note that PC was already incremented above */ + mach->CallStack[mach->CallStackTop++] = *pc; + *pc = inst->InstructionExtLabel.Label; + } + break; + + case TGSI_OPCODE_RET: + mach->FuncMask &= ~mach->ExecMask; + UPDATE_EXEC_MASK(mach); + + if (mach->FuncMask == 0x0) { + /* really return now (otherwise, keep executing */ + + if (mach->CallStackTop == 0) { + /* returning from main() */ + *pc = -1; + return; + } + *pc = mach->CallStack[--mach->CallStackTop]; + + /* pop the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + UPDATE_EXEC_MASK(mach); + } + break; + + case TGSI_OPCODE_SSG: + assert (0); + break; + + case TGSI_OPCODE_CMP: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_SCS: + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( &r[0], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + micro_cos( &r[1], &r[0] ); + STORE( &r[1], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + micro_sin( &r[1], &r[0] ); + STORE( &r[1], 0, CHAN_Y ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_NRM: + assert (0); + break; + + case TGSI_OPCODE_DIV: + assert( 0 ); + break; + + case TGSI_OPCODE_DP2: + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_IF: + /* push CondMask */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + FETCH( &r[0], 0, CHAN_X ); + /* update CondMask */ + if( ! r[0].u[0] ) { + mach->CondMask &= ~0x1; + } + if( ! r[0].u[1] ) { + mach->CondMask &= ~0x2; + } + if( ! r[0].u[2] ) { + mach->CondMask &= ~0x4; + } + if( ! r[0].u[3] ) { + mach->CondMask &= ~0x8; + } + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ELSE */ + break; + + case TGSI_OPCODE_ELSE: + /* invert CondMask wrt previous mask */ + { + uint prevMask; + assert(mach->CondStackTop > 0); + prevMask = mach->CondStack[mach->CondStackTop - 1]; + mach->CondMask = ~mach->CondMask & prevMask; + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ENDIF */ + } + break; + + case TGSI_OPCODE_ENDIF: + /* pop CondMask */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_END: + /* halt execution */ + *pc = -1; + break; + + case TGSI_OPCODE_REP: + assert (0); + break; + + case TGSI_OPCODE_ENDREP: + assert (0); + break; + + case TGSI_OPCODE_PUSHA: + assert (0); + break; + + case TGSI_OPCODE_POPA: + assert (0); + break; + + case TGSI_OPCODE_CEIL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_ceil( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_I2F: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_i2f( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_NOT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_not( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_TRUNC: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_trunc( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_shl( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_ishr( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_AND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_and( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_OR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_or( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOD: + assert (0); + break; + + case TGSI_OPCODE_XOR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_xor( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SAD: + assert (0); + break; + + case TGSI_OPCODE_TXF: + assert (0); + break; + + case TGSI_OPCODE_TXQ: + assert (0); + break; + + case TGSI_OPCODE_EMIT: + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + break; + + case TGSI_OPCODE_ENDPRIM: + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + break; + + case TGSI_OPCODE_LOOP: + /* fall-through (for now) */ + case TGSI_OPCODE_BGNLOOP2: + /* push LoopMask and ContMasks */ + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + break; + + case TGSI_OPCODE_ENDLOOP: + /* fall-through (for now at least) */ + case TGSI_OPCODE_ENDLOOP2: + /* Restore ContMask, but don't pop */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; + UPDATE_EXEC_MASK(mach); + if (mach->ExecMask) { + /* repeat loop: jump to instruction just past BGNLOOP */ + *pc = inst->InstructionExtLabel.Label + 1; + } + else { + /* exit loop: pop LoopMask */ + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + /* pop ContMask */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + } + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BRK: + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_CONT: + /* turn off cont channels for each enabled exec channel */ + mach->ContMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BGNSUB: + /* no-op */ + break; + + case TGSI_OPCODE_ENDSUB: + /* no-op */ + break; + + case TGSI_OPCODE_NOISE1: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE2: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE3: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE4: + assert( 0 ); + break; + + case TGSI_OPCODE_NOP: + break; + + default: + assert( 0 ); + } +} + + +/** + * Run TGSI interpreter. + * \return bitmask of "alive" quad components + */ +uint +tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) +{ + uint i; + int pc = 0; + + mach->CondMask = 0xf; + mach->LoopMask = 0xf; + mach->ContMask = 0xf; + mach->FuncMask = 0xf; + mach->ExecMask = 0xf; + + mach->CondStackTop = 0; /* temporarily subvert this assertion */ + assert(mach->CondStackTop == 0); + assert(mach->LoopStackTop == 0); + assert(mach->ContStackTop == 0); + assert(mach->CallStackTop == 0); + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; + + if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; + mach->Primitives[0] = 0; + } + + for (i = 0; i < QUAD_SIZE; i++) { + mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = + (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | + (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | + (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | + (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); + } + + /* execute declarations (interpolants) */ + for (i = 0; i < mach->NumDeclarations; i++) { + exec_declaration( mach, mach->Declarations+i ); + } + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + assert(pc < (int) mach->NumInstructions); + exec_instruction( mach, mach->Instructions + pc, &pc ); + } + +#if 0 + /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + /* + * Scale back depth component. + */ + for (i = 0; i < 4; i++) + mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; + } +#endif + + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; +} + + diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h new file mode 100644 index 0000000000..c4e649e69c --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -0,0 +1,272 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_EXEC_H +#define TGSI_EXEC_H + +#include "pipe/p_compiler.h" + +#if defined __cplusplus +extern "C" { +#endif + +#define MAX_LABELS 1024 + +#define NUM_CHANNELS 4 /* R,G,B,A */ +#define QUAD_SIZE 4 /* 4 pixel/quad */ + +/** + * Registers may be treated as float, signed int or unsigned int. + */ +union tgsi_exec_channel +{ + float f[QUAD_SIZE]; + int i[QUAD_SIZE]; + unsigned u[QUAD_SIZE]; +}; + +/** + * A vector[RGBA] of channels[4 pixels] + */ +struct tgsi_exec_vector +{ + union tgsi_exec_channel xyzw[NUM_CHANNELS]; +}; + +/** + * For fragment programs, information for computing fragment input + * values from plane equation of the triangle/line. + */ +struct tgsi_interp_coef +{ + float a0[NUM_CHANNELS]; /* in an xyzw layout */ + float dadx[NUM_CHANNELS]; + float dady[NUM_CHANNELS]; +}; + + +struct softpipe_tile_cache; /**< Opaque to TGSI */ + +/** + * Information for sampling textures, which must be implemented + * by code outside the TGSI executor. + */ +struct tgsi_sampler +{ + const struct pipe_sampler_state *state; + struct pipe_texture *texture; + /** Get samples for four fragments in a quad */ + void (*get_samples)(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + void *pipe; /*XXX temporary*/ + struct softpipe_tile_cache *cache; +}; + +/** + * For branching/calling subroutines. + */ +struct tgsi_exec_labels +{ + unsigned labels[MAX_LABELS][2]; + unsigned count; +}; + + +#define TGSI_EXEC_NUM_TEMPS 128 +#define TGSI_EXEC_NUM_TEMP_EXTRAS 6 +#define TGSI_EXEC_NUM_IMMEDIATES 256 + +/* + * Locations of various utility registers (_I = Index, _C = Channel) + */ +#define TGSI_EXEC_TEMP_00000000_I (TGSI_EXEC_NUM_TEMPS + 0) +#define TGSI_EXEC_TEMP_00000000_C 0 + +#define TGSI_EXEC_TEMP_7FFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0) +#define TGSI_EXEC_TEMP_7FFFFFFF_C 1 + +#define TGSI_EXEC_TEMP_80000000_I (TGSI_EXEC_NUM_TEMPS + 0) +#define TGSI_EXEC_TEMP_80000000_C 2 + +#define TGSI_EXEC_TEMP_FFFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0) +#define TGSI_EXEC_TEMP_FFFFFFFF_C 3 + +#define TGSI_EXEC_TEMP_ONE_I (TGSI_EXEC_NUM_TEMPS + 1) +#define TGSI_EXEC_TEMP_ONE_C 0 + +#define TGSI_EXEC_TEMP_TWO_I (TGSI_EXEC_NUM_TEMPS + 1) +#define TGSI_EXEC_TEMP_TWO_C 1 + +#define TGSI_EXEC_TEMP_128_I (TGSI_EXEC_NUM_TEMPS + 1) +#define TGSI_EXEC_TEMP_128_C 2 + +#define TGSI_EXEC_TEMP_MINUS_128_I (TGSI_EXEC_NUM_TEMPS + 1) +#define TGSI_EXEC_TEMP_MINUS_128_C 3 + +#define TGSI_EXEC_TEMP_KILMASK_I (TGSI_EXEC_NUM_TEMPS + 2) +#define TGSI_EXEC_TEMP_KILMASK_C 0 + +#define TGSI_EXEC_TEMP_OUTPUT_I (TGSI_EXEC_NUM_TEMPS + 2) +#define TGSI_EXEC_TEMP_OUTPUT_C 1 + +#define TGSI_EXEC_TEMP_PRIMITIVE_I (TGSI_EXEC_NUM_TEMPS + 2) +#define TGSI_EXEC_TEMP_PRIMITIVE_C 2 + +/* NVIDIA condition code (CC) vector + */ +#define TGSI_EXEC_CC_GT 0x01 +#define TGSI_EXEC_CC_EQ 0x02 +#define TGSI_EXEC_CC_LT 0x04 +#define TGSI_EXEC_CC_UN 0x08 + +#define TGSI_EXEC_CC_X_MASK 0x000000ff +#define TGSI_EXEC_CC_X_SHIFT 0 +#define TGSI_EXEC_CC_Y_MASK 0x0000ff00 +#define TGSI_EXEC_CC_Y_SHIFT 8 +#define TGSI_EXEC_CC_Z_MASK 0x00ff0000 +#define TGSI_EXEC_CC_Z_SHIFT 16 +#define TGSI_EXEC_CC_W_MASK 0xff000000 +#define TGSI_EXEC_CC_W_SHIFT 24 + +#define TGSI_EXEC_TEMP_CC_I (TGSI_EXEC_NUM_TEMPS + 2) +#define TGSI_EXEC_TEMP_CC_C 3 + +#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 3) +#define TGSI_EXEC_TEMP_THREE_C 0 + +#define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3) +#define TGSI_EXEC_TEMP_HALF_C 1 + +#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) + +#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 5) + + +#define TGSI_EXEC_MAX_COND_NESTING 20 +#define TGSI_EXEC_MAX_LOOP_NESTING 20 +#define TGSI_EXEC_MAX_CALL_NESTING 20 + +/** + * Run-time virtual machine state for executing TGSI shader. + */ +struct tgsi_exec_machine +{ + /* Total = program temporaries + internal temporaries + * + 1 padding to align to 16 bytes + */ + struct tgsi_exec_vector _Temps[TGSI_EXEC_NUM_TEMPS + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; + + /* + * This will point to _Temps after aligning to 16B boundary. + */ + struct tgsi_exec_vector *Temps; + struct tgsi_exec_vector *Addrs; + + struct tgsi_sampler *Samplers; + + float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; + unsigned ImmLimit; + const float (*Consts)[4]; + struct tgsi_exec_vector *Inputs; + struct tgsi_exec_vector *Outputs; + const struct tgsi_token *Tokens; + unsigned Processor; + + /* GEOMETRY processor only. */ + unsigned *Primitives; + + /* FRAGMENT processor only. */ + const struct tgsi_interp_coef *InterpCoefs; + struct tgsi_exec_vector QuadPos; + + /* Conditional execution masks */ + uint CondMask; /**< For IF/ELSE/ENDIF */ + uint LoopMask; /**< For BGNLOOP/ENDLOOP */ + uint ContMask; /**< For loop CONT statements */ + uint FuncMask; /**< For function calls */ + uint ExecMask; /**< = CondMask & LoopMask */ + + /** Condition mask stack (for nested conditionals) */ + uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; + int CondStackTop; + + /** Loop mask stack (for nested loops) */ + uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopStackTop; + + /** Loop continue mask stack (see comments in tgsi_exec.c) */ + uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int ContStackTop; + + /** Function execution mask stack (for executing subroutine code) */ + uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; + int FuncStackTop; + + /** Function call stack for saving/restoring the program counter */ + uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; + int CallStackTop; + + struct tgsi_full_instruction *Instructions; + uint NumInstructions; + + struct tgsi_full_declaration *Declarations; + uint NumDeclarations; + + struct tgsi_exec_labels Labels; +}; + +void +tgsi_exec_machine_init( + struct tgsi_exec_machine *mach ); + + +void +tgsi_exec_machine_bind_shader( + struct tgsi_exec_machine *mach, + const struct tgsi_token *tokens, + uint numSamplers, + struct tgsi_sampler *samplers); + +uint +tgsi_exec_machine_run( + struct tgsi_exec_machine *mach ); + + +void +tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach); + + +#if defined __cplusplus +} /* extern "C" */ +#endif + +#endif /* TGSI_EXEC_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c new file mode 100644 index 0000000000..68c7a6b7f5 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -0,0 +1,161 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_info.h" + +static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = +{ + { 1, 1, 0, 0, "ARL" }, + { 1, 1, 0, 0, "MOV" }, + { 1, 1, 0, 0, "LIT" }, + { 1, 1, 0, 0, "RCP" }, + { 1, 1, 0, 0, "RSQ" }, + { 1, 1, 0, 0, "EXP" }, + { 1, 1, 0, 0, "LOG" }, + { 1, 2, 0, 0, "MUL" }, + { 1, 2, 0, 0, "ADD" }, + { 1, 2, 0, 0, "DP3" }, + { 1, 2, 0, 0, "DP4" }, + { 1, 2, 0, 0, "DST" }, + { 1, 2, 0, 0, "MIN" }, + { 1, 2, 0, 0, "MAX" }, + { 1, 2, 0, 0, "SLT" }, + { 1, 2, 0, 0, "SGE" }, + { 1, 3, 0, 0, "MAD" }, + { 1, 2, 0, 0, "SUB" }, + { 1, 3, 0, 0, "LERP" }, + { 1, 3, 0, 0, "CND" }, + { 1, 3, 0, 0, "CND0" }, + { 1, 3, 0, 0, "DOT2ADD" }, + { 1, 2, 0, 0, "INDEX" }, + { 1, 1, 0, 0, "NEGATE" }, + { 1, 1, 0, 0, "FRAC" }, + { 1, 3, 0, 0, "CLAMP" }, + { 1, 1, 0, 0, "FLOOR" }, + { 1, 1, 0, 0, "ROUND" }, + { 1, 1, 0, 0, "EXPBASE2" }, + { 1, 1, 0, 0, "LOGBASE2" }, + { 1, 2, 0, 0, "POWER" }, + { 1, 2, 0, 0, "CROSSPRODUCT" }, + { 1, 2, 0, 0, "MULTIPLYMATRIX" }, + { 1, 1, 0, 0, "ABS" }, + { 1, 1, 0, 0, "RCC" }, + { 1, 2, 0, 0, "DPH" }, + { 1, 1, 0, 0, "COS" }, + { 1, 1, 0, 0, "DDX" }, + { 1, 1, 0, 0, "DDY" }, + { 0, 0, 0, 0, "KILP" }, + { 1, 1, 0, 0, "PK2H" }, + { 1, 1, 0, 0, "PK2US" }, + { 1, 1, 0, 0, "PK4B" }, + { 1, 1, 0, 0, "PK4UB" }, + { 1, 2, 0, 0, "RFL" }, + { 1, 2, 0, 0, "SEQ" }, + { 1, 2, 0, 0, "SFL" }, + { 1, 2, 0, 0, "SGT" }, + { 1, 1, 0, 0, "SIN" }, + { 1, 2, 0, 0, "SLE" }, + { 1, 2, 0, 0, "SNE" }, + { 1, 2, 0, 0, "STR" }, + { 1, 2, 1, 0, "TEX" }, + { 1, 4, 1, 0, "TXD" }, + { 1, 2, 1, 0, "TXP" }, + { 1, 1, 0, 0, "UP2H" }, + { 1, 1, 0, 0, "UP2US" }, + { 1, 1, 0, 0, "UP4B" }, + { 1, 1, 0, 0, "UP4UB" }, + { 1, 3, 0, 0, "X2D" }, + { 1, 1, 0, 0, "ARA" }, + { 1, 1, 0, 0, "ARR" }, + { 0, 1, 0, 0, "BRA" }, + { 0, 0, 0, 1, "CAL" }, + { 0, 0, 0, 0, "RET" }, + { 1, 1, 0, 0, "SSG" }, + { 1, 3, 0, 0, "CMP" }, + { 1, 1, 0, 0, "SCS" }, + { 1, 2, 1, 0, "TXB" }, + { 1, 1, 0, 0, "NRM" }, + { 1, 2, 0, 0, "DIV" }, + { 1, 2, 0, 0, "DP2" }, + { 1, 2, 1, 0, "TXL" }, + { 0, 0, 0, 0, "BRK" }, + { 0, 1, 0, 1, "IF" }, + { 0, 0, 0, 0, "LOOP" }, + { 0, 1, 0, 0, "REP" }, + { 0, 0, 0, 1, "ELSE" }, + { 0, 0, 0, 0, "ENDIF" }, + { 0, 0, 0, 0, "ENDLOOP" }, + { 0, 0, 0, 0, "ENDREP" }, + { 0, 1, 0, 0, "PUSHA" }, + { 1, 0, 0, 0, "POPA" }, + { 1, 1, 0, 0, "CEIL" }, + { 1, 1, 0, 0, "I2F" }, + { 1, 1, 0, 0, "NOT" }, + { 1, 1, 0, 0, "TRUNC" }, + { 1, 2, 0, 0, "SHL" }, + { 1, 2, 0, 0, "SHR" }, + { 1, 2, 0, 0, "AND" }, + { 1, 2, 0, 0, "OR" }, + { 1, 2, 0, 0, "MOD" }, + { 1, 2, 0, 0, "XOR" }, + { 1, 3, 0, 0, "SAD" }, + { 1, 2, 1, 0, "TXF" }, + { 1, 2, 1, 0, "TXQ" }, + { 0, 0, 0, 0, "CONT" }, + { 0, 0, 0, 0, "EMIT" }, + { 0, 0, 0, 0, "ENDPRIM" }, + { 0, 0, 0, 1, "BGNLOOP2" }, + { 0, 0, 0, 0, "BGNSUB" }, + { 0, 0, 0, 1, "ENDLOOP2" }, + { 0, 0, 0, 0, "ENDSUB" }, + { 1, 1, 0, 0, "NOISE1" }, + { 1, 1, 0, 0, "NOISE2" }, + { 1, 1, 0, 0, "NOISE3" }, + { 1, 1, 0, 0, "NOISE4" }, + { 0, 0, 0, 0, "NOP" }, + { 1, 2, 0, 0, "M4X3" }, + { 1, 2, 0, 0, "M3X4" }, + { 1, 2, 0, 0, "M3X3" }, + { 1, 2, 0, 0, "M3X2" }, + { 1, 1, 0, 0, "NRM4" }, + { 0, 1, 0, 0, "CALLNZ" }, + { 0, 1, 0, 0, "IFC" }, + { 0, 1, 0, 0, "BREAKC" }, + { 0, 1, 0, 0, "KIL" }, + { 0, 0, 0, 0, "END" }, + { 1, 1, 0, 0, "SWZ" } +}; + +const struct tgsi_opcode_info * +tgsi_get_opcode_info( uint opcode ) +{ + if (opcode < TGSI_OPCODE_LAST) + return &opcode_info[opcode]; + assert( 0 ); + return NULL; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h new file mode 100644 index 0000000000..7230bdaae3 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_INFO_H +#define TGSI_INFO_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_opcode_info +{ + uint num_dst; + uint num_src; + boolean is_tex; + boolean is_branch; + const char *mnemonic; +}; + +const struct tgsi_opcode_info * +tgsi_get_opcode_info( uint opcode ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_INFO_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/tgsi_iterate.c new file mode 100644 index 0000000000..5371a88b96 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.c @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_iterate.h" + +boolean +tgsi_iterate_shader( + const struct tgsi_token *tokens, + struct tgsi_iterate_context *ctx ) +{ + struct tgsi_parse_context parse; + + if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) + return FALSE; + + ctx->processor = parse.FullHeader.Processor; + ctx->version = parse.FullVersion.Version; + + if (ctx->prolog) + if (!ctx->prolog( ctx )) + goto fail; + + while (!tgsi_parse_end_of_tokens( &parse )) { + tgsi_parse_token( &parse ); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (ctx->iterate_instruction) + if (!ctx->iterate_instruction( ctx, &parse.FullToken.FullInstruction )) + goto fail; + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + if (ctx->iterate_declaration) + if (!ctx->iterate_declaration( ctx, &parse.FullToken.FullDeclaration )) + goto fail; + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + if (ctx->iterate_immediate) + if (!ctx->iterate_immediate( ctx, &parse.FullToken.FullImmediate )) + goto fail; + break; + + default: + assert( 0 ); + } + } + + if (ctx->epilog) + if (!ctx->epilog( ctx )) + goto fail; + + tgsi_parse_free( &parse ); + return TRUE; + +fail: + tgsi_parse_free( &parse ); + return FALSE; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.h b/src/gallium/auxiliary/tgsi/tgsi_iterate.h new file mode 100644 index 0000000000..ec7b85bf63 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_ITERATE_H +#define TGSI_ITERATE_H + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_iterate_context +{ + boolean + (* prolog)( + struct tgsi_iterate_context *ctx ); + + boolean + (* iterate_instruction)( + struct tgsi_iterate_context *ctx, + struct tgsi_full_instruction *inst ); + + boolean + (* iterate_declaration)( + struct tgsi_iterate_context *ctx, + struct tgsi_full_declaration *decl ); + + boolean + (* iterate_immediate)( + struct tgsi_iterate_context *ctx, + struct tgsi_full_immediate *imm ); + + boolean + (* epilog)( + struct tgsi_iterate_context *ctx ); + + struct tgsi_processor processor; + struct tgsi_version version; +}; + +boolean +tgsi_iterate_shader( + const struct tgsi_token *tokens, + struct tgsi_iterate_context *ctx ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_ITERATE_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c new file mode 100644 index 0000000000..2cd56e413a --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -0,0 +1,344 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi_parse.h" +#include "tgsi_build.h" +#include "util/u_memory.h" + +void +tgsi_full_token_init( + union tgsi_full_token *full_token ) +{ + full_token->Token.Type = TGSI_TOKEN_TYPE_DECLARATION; +} + +void +tgsi_full_token_free( + union tgsi_full_token *full_token ) +{ + if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) { + FREE( (void *) full_token->FullImmediate.u.Pointer ); + } +} + +unsigned +tgsi_parse_init( + struct tgsi_parse_context *ctx, + const struct tgsi_token *tokens ) +{ + ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0]; + if( ctx->FullVersion.Version.MajorVersion > 1 ) { + return TGSI_PARSE_ERROR; + } + + ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[1]; + if( ctx->FullHeader.Header.HeaderSize >= 2 ) { + ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2]; + } + else { + ctx->FullHeader.Processor = tgsi_default_processor(); + } + + ctx->Tokens = tokens; + ctx->Position = 1 + ctx->FullHeader.Header.HeaderSize; + + tgsi_full_token_init( &ctx->FullToken ); + + return TGSI_PARSE_OK; +} + +void +tgsi_parse_free( + struct tgsi_parse_context *ctx ) +{ + tgsi_full_token_free( &ctx->FullToken ); +} + +boolean +tgsi_parse_end_of_tokens( + struct tgsi_parse_context *ctx ) +{ + return ctx->Position >= + 1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize; +} + + +/** + * This function is used to avoid and work-around type punning/aliasing + * warnings. The warnings seem harmless on x86 but on PPC they cause + * real failures. + */ +static INLINE void +copy_token(void *dst, const void *src) +{ + memcpy(dst, src, 4); +} + + +/** + * Get next 4-byte token, return it at address specified by 'token' + */ +static void +next_token( + struct tgsi_parse_context *ctx, + void *token ) +{ + assert( !tgsi_parse_end_of_tokens( ctx ) ); + copy_token(token, &ctx->Tokens[ctx->Position]); + ctx->Position++; +} + + +void +tgsi_parse_token( + struct tgsi_parse_context *ctx ) +{ + struct tgsi_token token; + unsigned i; + + tgsi_full_token_free( &ctx->FullToken ); + tgsi_full_token_init( &ctx->FullToken ); + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration; + + *decl = tgsi_default_full_declaration(); + copy_token(&decl->Declaration, &token); + + next_token( ctx, &decl->DeclarationRange ); + + if( decl->Declaration.Semantic ) { + next_token( ctx, &decl->Semantic ); + } + + break; + } + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; + + *imm = tgsi_default_full_immediate(); + copy_token(&imm->Immediate, &token); + assert( !imm->Immediate.Extended ); + + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + imm->u.Pointer = MALLOC( + sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) ); + for( i = 0; i < imm->Immediate.Size - 1; i++ ) { + next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] ); + } + break; + + default: + assert( 0 ); + } + + break; + } + + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction; + unsigned extended; + + *inst = tgsi_default_full_instruction(); + copy_token(&inst->Instruction, &token); + extended = inst->Instruction.Extended; + + while( extended ) { + struct tgsi_src_register_ext token; + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_INSTRUCTION_EXT_TYPE_NV: + copy_token(&inst->InstructionExtNv, &token); + break; + + case TGSI_INSTRUCTION_EXT_TYPE_LABEL: + copy_token(&inst->InstructionExtLabel, &token); + break; + + case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE: + copy_token(&inst->InstructionExtTexture, &token); + break; + + default: + assert( 0 ); + } + + extended = token.Extended; + } + + assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS ); + + for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { + unsigned extended; + + next_token( ctx, &inst->FullDstRegisters[i].DstRegister ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->FullDstRegisters[i].DstRegister.Indirect ); + assert( !inst->FullDstRegisters[i].DstRegister.Dimension ); + + extended = inst->FullDstRegisters[i].DstRegister.Extended; + + while( extended ) { + struct tgsi_src_register_ext token; + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE: + copy_token(&inst->FullDstRegisters[i].DstRegisterExtConcode, + &token); + break; + + case TGSI_DST_REGISTER_EXT_TYPE_MODULATE: + copy_token(&inst->FullDstRegisters[i].DstRegisterExtModulate, + &token); + break; + + default: + assert( 0 ); + } + + extended = token.Extended; + } + } + + assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS ); + + for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { + unsigned extended; + + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister ); + + extended = inst->FullSrcRegisters[i].SrcRegister.Extended; + + while( extended ) { + struct tgsi_src_register_ext token; + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_SRC_REGISTER_EXT_TYPE_SWZ: + copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtSwz, + &token); + break; + + case TGSI_SRC_REGISTER_EXT_TYPE_MOD: + copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtMod, + &token); + break; + + default: + assert( 0 ); + } + + extended = token.Extended; + } + + if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) { + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); + } + + if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) { + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim ); + + /* + * No support for multi-dimensional addressing. + */ + assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension ); + assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended ); + + if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) { + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); + } + } + } + + break; + } + + default: + assert( 0 ); + } +} + + +unsigned +tgsi_num_tokens(const struct tgsi_token *tokens) +{ + struct tgsi_parse_context ctx; + if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) { + unsigned len = (ctx.FullHeader.Header.HeaderSize + + ctx.FullHeader.Header.BodySize + + 1); + return len; + } + return 0; +} + + +/** + * Make a new copy of a token array. + */ +struct tgsi_token * +tgsi_dup_tokens(const struct tgsi_token *tokens) +{ + unsigned n = tgsi_num_tokens(tokens); + unsigned bytes = n * sizeof(struct tgsi_token); + struct tgsi_token *new_tokens = (struct tgsi_token *) MALLOC(bytes); + if (new_tokens) + memcpy(new_tokens, tokens, bytes); + return new_tokens; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h new file mode 100644 index 0000000000..054350712d --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -0,0 +1,151 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_PARSE_H +#define TGSI_PARSE_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_full_version +{ + struct tgsi_version Version; +}; + +struct tgsi_full_header +{ + struct tgsi_header Header; + struct tgsi_processor Processor; +}; + +struct tgsi_full_dst_register +{ + struct tgsi_dst_register DstRegister; + struct tgsi_dst_register_ext_concode DstRegisterExtConcode; + struct tgsi_dst_register_ext_modulate DstRegisterExtModulate; +}; + +struct tgsi_full_src_register +{ + struct tgsi_src_register SrcRegister; + struct tgsi_src_register_ext_swz SrcRegisterExtSwz; + struct tgsi_src_register_ext_mod SrcRegisterExtMod; + struct tgsi_src_register SrcRegisterInd; + struct tgsi_dimension SrcRegisterDim; + struct tgsi_src_register SrcRegisterDimInd; +}; + +struct tgsi_full_declaration +{ + struct tgsi_declaration Declaration; + struct tgsi_declaration_range DeclarationRange; + struct tgsi_declaration_semantic Semantic; +}; + +struct tgsi_full_immediate +{ + struct tgsi_immediate Immediate; + union + { + const void *Pointer; + const struct tgsi_immediate_float32 *ImmediateFloat32; + } u; +}; + +#define TGSI_FULL_MAX_DST_REGISTERS 2 +#define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */ + +struct tgsi_full_instruction +{ + struct tgsi_instruction Instruction; + struct tgsi_instruction_ext_nv InstructionExtNv; + struct tgsi_instruction_ext_label InstructionExtLabel; + struct tgsi_instruction_ext_texture InstructionExtTexture; + struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS]; + struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS]; +}; + +union tgsi_full_token +{ + struct tgsi_token Token; + struct tgsi_full_declaration FullDeclaration; + struct tgsi_full_immediate FullImmediate; + struct tgsi_full_instruction FullInstruction; +}; + +void +tgsi_full_token_init( + union tgsi_full_token *full_token ); + +void +tgsi_full_token_free( + union tgsi_full_token *full_token ); + +struct tgsi_parse_context +{ + const struct tgsi_token *Tokens; + unsigned Position; + struct tgsi_full_version FullVersion; + struct tgsi_full_header FullHeader; + union tgsi_full_token FullToken; +}; + +#define TGSI_PARSE_OK 0 +#define TGSI_PARSE_ERROR 1 + +unsigned +tgsi_parse_init( + struct tgsi_parse_context *ctx, + const struct tgsi_token *tokens ); + +void +tgsi_parse_free( + struct tgsi_parse_context *ctx ); + +boolean +tgsi_parse_end_of_tokens( + struct tgsi_parse_context *ctx ); + +void +tgsi_parse_token( + struct tgsi_parse_context *ctx ); + +unsigned +tgsi_num_tokens(const struct tgsi_token *tokens); + +struct tgsi_token * +tgsi_dup_tokens(const struct tgsi_token *tokens); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_PARSE_H */ + diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c new file mode 100644 index 0000000000..9ad7ecd7cf --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -0,0 +1,910 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI to PowerPC code generation. + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_PPC) + +#include "pipe/p_debug.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_sse.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi_exec.h" +#include "tgsi_ppc.h" +#include "rtasm/rtasm_ppc.h" + + +/** + * Since it's pretty much impossible to form PPC vector immediates, load + * them from memory here: + */ +const float ppc_builtin_constants[] ALIGN16_ATTRIB = { + 1.0f, -128.0f, 128.0, 0.0 +}; + + +#define FOR_EACH_CHANNEL( CHAN )\ + for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) + +#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ + FOR_EACH_CHANNEL( CHAN )\ + IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + +#define TEMP_ONE_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_ONE_C TGSI_EXEC_TEMP_ONE_C + +#define TEMP_R0 TGSI_EXEC_TEMP_R0 +#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR + + +/** + * Context/state used during code gen. + */ +struct gen_context +{ + struct ppc_function *f; + int inputs_reg; /**< GP register pointing to input params */ + int outputs_reg; /**< GP register pointing to output params */ + int temps_reg; /**< GP register pointing to temporary "registers" */ + int immed_reg; /**< GP register pointing to immediates buffer */ + int const_reg; /**< GP register pointing to constants buffer */ + int builtins_reg; /**< GP register pointint to built-in constants */ + + int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ + int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ +}; + + +/** + * Load the given vector register with {value, value, value, value}. + * The value must be in the ppu_builtin_constants[] array. + * We wouldn't need this if there was a simple way to load PPC vector + * registers with immediate values! + */ +static void +load_constant_vec(struct gen_context *gen, int dst_vec, float value) +{ + uint pos; + for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { + if (ppc_builtin_constants[pos] == value) { + int offset_reg = ppc_allocate_register(gen->f); + int offset = pos * 4; + + ppc_li(gen->f, offset_reg, offset); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our builtins start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); + /* splat word[pos % 4] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); + ppc_release_register(gen->f, offset_reg); + return; + } + } + assert(0 && "Need to add new constant to ppc_builtin_constants array"); +} + + +/** + * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. + */ +static int +gen_one_vec(struct gen_context *gen) +{ + if (gen->one_vec < 0) { + gen->one_vec = ppc_allocate_vec_register(gen->f); + load_constant_vec(gen, gen->one_vec, 1.0f); + } + return gen->one_vec; +} + +/** + * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}. + */ +static int +gen_get_bit31_vec(struct gen_context *gen) +{ + if (gen->bit31_vec < 0) { + gen->bit31_vec = ppc_allocate_vec_register(gen->f); + ppc_vspltisw(gen->f, gen->bit31_vec, -1); + ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec); + } + return gen->bit31_vec; +} + + +/** + * Register fetch, put result in 'dst_vec'. + */ +static void +emit_fetch(struct gen_context *gen, + unsigned dst_vec, + const struct tgsi_full_src_register *reg, + const unsigned chan_index) +{ + uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index); + + switch (swizzle) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch (reg->SrcRegister.File) { + case TGSI_FILE_INPUT: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_TEMPORARY: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_IMMEDIATE: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_CONSTANT: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; + ppc_li(gen->f, offset_reg, offset); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our constants start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg); + /* splat word[swizzle] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); + ppc_release_register(gen->f, offset_reg); + } + break; + default: + assert( 0 ); + } + break; + case TGSI_EXTSWIZZLE_ZERO: + ppc_vzero(gen->f, dst_vec); + break; + case TGSI_EXTSWIZZLE_ONE: + { + int one_vec = gen_one_vec(gen); + ppc_vmove(gen->f, dst_vec, one_vec); + } + break; + default: + assert( 0 ); + } + + { + uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); + if (sign_op != TGSI_UTIL_SIGN_KEEP) { + int bit31_vec = gen_get_bit31_vec(gen); + + switch (sign_op) { + case TGSI_UTIL_SIGN_CLEAR: + /* vec = vec & ~bit31 */ + ppc_vandc(gen->f, dst_vec, dst_vec, bit31_vec); + break; + case TGSI_UTIL_SIGN_SET: + /* vec = vec | bit31 */ + ppc_vor(gen->f, dst_vec, dst_vec, bit31_vec); + break; + case TGSI_UTIL_SIGN_TOGGLE: + /* vec = vec ^ bit31 */ + ppc_vxor(gen->f, dst_vec, dst_vec, bit31_vec); + break; + default: + assert(0); + } + } + } +} + +#define FETCH( GEN, INST, DST_VEC, SRC_REG, CHAN ) \ + emit_fetch( GEN, DST_VEC, &(INST).FullSrcRegisters[SRC_REG], CHAN ) + + + +/** + * Register store. Store 'src_vec' at location indicated by 'reg'. + */ +static void +emit_store(struct gen_context *gen, + unsigned src_vec, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + unsigned chan_index) +{ + switch (reg->DstRegister.File) { + case TGSI_FILE_OUTPUT: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_TEMPORARY: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; +#if 0 + case TGSI_FILE_ADDRESS: + emit_addrs( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; +#endif + default: + assert( 0 ); + } + +#if 0 + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + /* assert( 0 ); */ + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + } +#endif +} + + +#define STORE( GEN, INST, XMM, INDEX, CHAN )\ + emit_store( GEN, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + + + +static void +emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + uint chan_index; + + FETCH(gen, *inst, v0, 0, CHAN_X); + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_RSQ: + /* v1 = 1.0 / sqrt(v0) */ + ppc_vrsqrtefp(gen->f, v1, v0); + break; + case TGSI_OPCODE_RCP: + /* v1 = 1.0 / v0 */ + ppc_vrefp(gen->f, v1, v0); + break; + default: + assert(0); + } + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE(gen, *inst, v1, 0, chan_index); + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); +} + + +static void +emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + uint chan_index; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, 0, 0, chan_index); /* v0 = srcreg[0] */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + /* turn off the most significant bit of each vector float word */ + { + int v1 = ppc_allocate_vec_register(gen->f); + ppc_vspltisw(gen->f, v1, -1); /* v1 = {-1, -1, -1, -1} */ + ppc_vslw(gen->f, v1, v1, v1); /* v1 = {1<<31, 1<<31, 1<<31, 1<<31} */ + ppc_vandc(gen->f, v0, v0, v1); /* v0 = v0 & ~v1 */ + ppc_release_vec_register(gen->f, v1); + } + break; + case TGSI_OPCODE_FLOOR: + ppc_vrfim(gen->f, v0, v0); /* v0 = floor(v0) */ + break; + case TGSI_OPCODE_FRAC: + { + int v1 = ppc_allocate_vec_register(gen->f); + ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ + ppc_vsubfp(gen->f, v0, v0, v1); /* v0 = v0 - v1 */ + ppc_release_vec_register(gen->f, v1); + } + break; + case TGSI_OPCODE_EXPBASE2: + ppc_vexptefp(gen->f, v0, v0); /* v0 = 2^v0 */ + break; + case TGSI_OPCODE_LOGBASE2: + /* XXX this may be broken! */ + ppc_vlogefp(gen->f, v0, v0); /* v0 = log2(v0) */ + break; + case TGSI_OPCODE_MOV: + /* nothing */ + break; + default: + assert(0); + } + STORE(gen, *inst, v0, 0, chan_index); /* store v0 */ + } + ppc_release_vec_register(gen->f, v0); +} + + +static void +emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + uint chan_index; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ + FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + ppc_vaddfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_SUB: + ppc_vsubfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_MUL: + ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v0 */ + break; + case TGSI_OPCODE_MIN: + ppc_vminfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_MAX: + ppc_vmaxfp(gen->f, v2, v0, v1); + break; + default: + assert(0); + } + STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); +} + + +/** + * Vector comparisons, resulting in 1.0 or 0.0 values. + */ +static void +emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + uint chan_index; + boolean complement = FALSE; + int one_vec = gen_one_vec(gen); + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ + FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_SNE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SEQ: + ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */ + break; + + case TGSI_OPCODE_SGE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SLT: + ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */ + break; + + case TGSI_OPCODE_SLE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SGT: + ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */ + break; + default: + assert(0); + } + + /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ + + if (complement) + ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */ + else + ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ + + STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + } + + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); +} + + +static void +emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + uint chan_index; + + ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ + + FETCH(gen, *inst, v0, 0, CHAN_X); /* v0 = src0.XXXX */ + FETCH(gen, *inst, v1, 1, CHAN_X); /* v1 = src1.XXXX */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + FETCH(gen, *inst, v0, 0, CHAN_Y); /* v0 = src0.YYYY */ + FETCH(gen, *inst, v1, 1, CHAN_Y); /* v1 = src1.YYYY */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + FETCH(gen, *inst, v0, 0, CHAN_Z); /* v0 = src0.ZZZZ */ + FETCH(gen, *inst, v1, 1, CHAN_Z); /* v1 = src1.ZZZZ */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { + FETCH(gen, *inst, v0, 0, CHAN_W); /* v0 = src0.WWWW */ + FETCH(gen, *inst, v1, 1, CHAN_W); /* v1 = src1.WWWW */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + } + else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { + FETCH(gen, *inst, v1, 1, CHAN_W); /* v1 = src1.WWWW */ + ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ + } + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); +} + + +static void +emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + int v3 = ppc_allocate_vec_register(gen->f); + uint chan_index; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ + FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ + FETCH(gen, *inst, v2, 2, chan_index); /* v2 = srcreg[2] */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MAD: + ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ + break; + case TGSI_OPCODE_LRP: + ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ + ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ + break; + default: + assert(0); + } + STORE(gen, *inst, v3, 0, chan_index); /* store v3 */ + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); + ppc_release_vec_register(gen->f, v3); +} + + + +/** Approximation for vr = pow(va, vb) */ +static void +ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) +{ + /* pow(a,b) ~= exp2(log2(a) * b) */ + int t_vec = ppc_allocate_vec_register(f); + int zero_vec = ppc_allocate_vec_register(f); + + ppc_vzero(f, zero_vec); + + ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ + ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb */ + ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */ + + ppc_release_vec_register(f, t_vec); + ppc_release_vec_register(f, zero_vec); +} + + +static void +emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int one_vec = gen_one_vec(gen); + + /* Compute X */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(gen, *inst, one_vec, 0, CHAN_X); + } + + /* Compute Y, Z */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int x_vec = ppc_allocate_vec_register(gen->f); + int zero_vec = ppc_allocate_vec_register(gen->f); + + FETCH(gen, *inst, x_vec, 0, CHAN_X); /* x_vec = src[0].x */ + + ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ + ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(gen, *inst, x_vec, 0, CHAN_Y); /* store Y */ + } + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int y_vec = ppc_allocate_vec_register(gen->f); + int z_vec = ppc_allocate_vec_register(gen->f); + int w_vec = ppc_allocate_vec_register(gen->f); + int pow_vec = ppc_allocate_vec_register(gen->f); + int pos_vec = ppc_allocate_vec_register(gen->f); + int p128_vec = ppc_allocate_vec_register(gen->f); + int n128_vec = ppc_allocate_vec_register(gen->f); + + FETCH(gen, *inst, y_vec, 0, CHAN_Y); /* y_vec = src[0].y */ + ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ + + FETCH(gen, *inst, w_vec, 0, CHAN_W); /* w_vec = src[0].w */ + + /* clamp Y to [-128, 128] */ + load_constant_vec(gen, p128_vec, 128.0f); + load_constant_vec(gen, n128_vec, -128.0f); + ppc_vmaxfp(gen->f, y_vec, y_vec, n128_vec); /* y = max(y, -128) */ + ppc_vminfp(gen->f, y_vec, y_vec, p128_vec); /* y = min(y, 128) */ + + /* if temp.x > 0 + * z = pow(tmp.y, tmp.w) + * else + * z = 0.0 + */ + ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */ + ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */ + ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */ + + STORE(gen, *inst, z_vec, 0, CHAN_Z); /* store Z */ + + ppc_release_vec_register(gen->f, y_vec); + ppc_release_vec_register(gen->f, z_vec); + ppc_release_vec_register(gen->f, w_vec); + ppc_release_vec_register(gen->f, pow_vec); + ppc_release_vec_register(gen->f, pos_vec); + ppc_release_vec_register(gen->f, p128_vec); + ppc_release_vec_register(gen->f, n128_vec); + } + + ppc_release_vec_register(gen->f, x_vec); + ppc_release_vec_register(gen->f, zero_vec); + } + + /* Compute W */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(gen, *inst, one_vec, 0, CHAN_W); + } +} + + +static int +emit_instruction(struct gen_context *gen, + struct tgsi_full_instruction *inst) +{ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_FLOOR: + case TGSI_OPCODE_FRAC: + case TGSI_OPCODE_EXPBASE2: + case TGSI_OPCODE_LOGBASE2: + emit_unaryop(gen, inst); + break; + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_RCP: + emit_scalar_unaryop(gen, inst); + break; + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_MUL: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MAX: + emit_binop(gen, inst); + break; + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SGE: + emit_inequality(gen, inst); + break; + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_LRP: + emit_triop(gen, inst); + break; + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + emit_dotprod(gen, inst); + break; + case TGSI_OPCODE_LIT: + emit_lit(gen, inst); + break; + case TGSI_OPCODE_END: + /* normal end */ + return 1; + default: + return 0; + } + + + return 1; +} + +static void +emit_declaration( + struct ppc_function *func, + struct tgsi_full_declaration *decl ) +{ + if( decl->Declaration.File == TGSI_FILE_INPUT ) { +#if 0 + unsigned first, last, mask; + unsigned i, j; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + emit_coef_a0( func, 0, i, j ); + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_coef_a0( func, 4, i, j ); + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 4 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); + emit_coef_a0( func, 5, i, j ); + emit_rcp( func, 4, 4 ); /* 1.0 / w */ + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 5 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ + emit_inputs( func, 0, i, j ); + break; + + default: + assert( 0 ); + break; + } + } + } + } +#endif + } +} + + + +static void +emit_prologue(struct ppc_function *func) +{ + /* XXX set up stack frame */ +} + + +static void +emit_epilogue(struct ppc_function *func) +{ + ppc_return(func); + /* XXX restore prev stack frame */ +} + + + +/** + * Translate a TGSI vertex/fragment shader to PPC code. + * + * \param tokens the TGSI input shader + * \param func the output PPC code/function + * \param immediates buffer to place immediates, later passed to PPC func + * \return TRUE for success, FALSE if translation failed + */ +boolean +tgsi_emit_ppc(const struct tgsi_token *tokens, + struct ppc_function *func, + float (*immediates)[4], + boolean do_swizzles ) +{ + static int use_ppc_asm = -1; + struct tgsi_parse_context parse; + /*boolean instruction_phase = FALSE;*/ + unsigned ok = 1; + uint num_immediates = 0; + struct gen_context gen; + + if (use_ppc_asm < 0) { + /* If GALLIUM_NOPPC is set, don't use PPC codegen */ + use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE); + } + if (!use_ppc_asm) + return FALSE; + + util_init_math(); + + gen.f = func; + gen.inputs_reg = ppc_reserve_register(func, 3); /* first function param */ + gen.outputs_reg = ppc_reserve_register(func, 4); /* second function param */ + gen.temps_reg = ppc_reserve_register(func, 5); /* ... */ + gen.immed_reg = ppc_reserve_register(func, 6); + gen.const_reg = ppc_reserve_register(func, 7); + gen.builtins_reg = ppc_reserve_register(func, 8); + gen.one_vec = -1; + gen.bit31_vec = -1; + + emit_prologue(func); + + tgsi_parse_init( &parse, tokens ); + + while (!tgsi_parse_end_of_tokens(&parse) && ok) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + emit_declaration(func, &parse.FullToken.FullDeclaration ); + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); + + if (!ok) { + debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n", + parse.FullToken.FullInstruction.Instruction.Opcode, + parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? + "vertex shader" : "fragment shader"); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* splat each immediate component into a float[4] vector for SoA */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + float *imm = (float *) immediates; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for (i = 0; i < size; i++) { + const float value = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + imm[num_immediates * 4 + 0] = + imm[num_immediates * 4 + 1] = + imm[num_immediates * 4 + 2] = + imm[num_immediates * 4 + 3] = value; + num_immediates++; + } + } + break; + + default: + ok = 0; + assert( 0 ); + } + } + + emit_epilogue(func); + + tgsi_parse_free( &parse ); + + return ok; +} + +#endif /* PIPE_ARCH_PPC */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.h b/src/gallium/auxiliary/tgsi/tgsi_ppc.h new file mode 100644 index 0000000000..829ec075e7 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.h @@ -0,0 +1,51 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_PPC_H +#define TGSI_PPC_H + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_token; +struct ppc_function; + +extern const float ppc_builtin_constants[]; + + +boolean +tgsi_emit_ppc(const struct tgsi_token *tokens, + struct ppc_function *function, + float (*immediates)[4], + boolean do_swizzles); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_PPC_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c new file mode 100644 index 0000000000..11659247c0 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -0,0 +1,360 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_sanity.h" +#include "tgsi_info.h" +#include "tgsi_iterate.h" + +#define MAX_REGISTERS 256 + +typedef uint reg_flag; + +#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8) + +struct sanity_check_ctx +{ + struct tgsi_iterate_context iter; + + reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG]; + reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG]; + boolean regs_ind_used[TGSI_FILE_COUNT]; + uint num_imms; + uint num_instructions; + uint index_of_END; + + uint errors; + uint warnings; +}; + +static void +report_error( + struct sanity_check_ctx *ctx, + const char *format, + ... ) +{ + va_list args; + + debug_printf( "Error : " ); + va_start( args, format ); + _debug_vprintf( format, args ); + va_end( args ); + debug_printf( "\n" ); + ctx->errors++; +} + +static void +report_warning( + struct sanity_check_ctx *ctx, + const char *format, + ... ) +{ + va_list args; + + debug_printf( "Warning: " ); + va_start( args, format ); + _debug_vprintf( format, args ); + va_end( args ); + debug_printf( "\n" ); + ctx->warnings++; +} + +static boolean +check_file_name( + struct sanity_check_ctx *ctx, + uint file ) +{ + if (file <= TGSI_FILE_NULL || file >= TGSI_FILE_COUNT) { + report_error( ctx, "Invalid register file name" ); + return FALSE; + } + return TRUE; +} + +static boolean +is_register_declared( + struct sanity_check_ctx *ctx, + uint file, + int index ) +{ + assert( index >= 0 && index < MAX_REGISTERS ); + + return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; +} + +static boolean +is_any_register_declared( + struct sanity_check_ctx *ctx, + uint file ) +{ + uint i; + + for (i = 0; i < MAX_REGISTERS / BITS_IN_REG_FLAG; i++) + if (ctx->regs_decl[file][i]) + return TRUE; + return FALSE; +} + +static boolean +is_register_used( + struct sanity_check_ctx *ctx, + uint file, + int index ) +{ + assert( index < MAX_REGISTERS ); + + return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; +} + +static const char *file_names[] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM" +}; + +static boolean +check_register_usage( + struct sanity_check_ctx *ctx, + uint file, + int index, + const char *name, + boolean indirect_access ) +{ + if (!check_file_name( ctx, file )) + return FALSE; + + if (index < 0 || index > MAX_REGISTERS) { + report_error( ctx, "%s[%i]: Invalid index %s", file_names[file], index, name ); + return FALSE; + } + + if (indirect_access) { + if (!is_any_register_declared( ctx, file )) + report_error( ctx, "%s: Undeclared %s register", file_names[file], name ); + ctx->regs_ind_used[file] = TRUE; + } + else { + if (!is_register_declared( ctx, file, index )) + report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name ); + ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); + } + return TRUE; +} + +static boolean +iter_instruction( + struct tgsi_iterate_context *iter, + struct tgsi_full_instruction *inst ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + const struct tgsi_opcode_info *info; + uint i; + + if (inst->Instruction.Opcode == TGSI_OPCODE_END) { + if (ctx->index_of_END != ~0) { + report_error( ctx, "Too many END instructions" ); + } + ctx->index_of_END = ctx->num_instructions; + } + + info = tgsi_get_opcode_info( inst->Instruction.Opcode ); + if (info == NULL) { + report_error( ctx, "Invalid instruction opcode" ); + return TRUE; + } + + if (info->num_dst != inst->Instruction.NumDstRegs) { + report_error( ctx, "Invalid number of destination operands" ); + } + if (info->num_src != inst->Instruction.NumSrcRegs) { + report_error( ctx, "Invalid number of source operands" ); + } + + /* Check destination and source registers' validity. + * Mark the registers as used. + */ + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + check_register_usage( + ctx, + inst->FullDstRegisters[i].DstRegister.File, + inst->FullDstRegisters[i].DstRegister.Index, + "destination", + FALSE ); + } + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + check_register_usage( + ctx, + inst->FullSrcRegisters[i].SrcRegister.File, + inst->FullSrcRegisters[i].SrcRegister.Index, + "source", + (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect ); + if (inst->FullSrcRegisters[i].SrcRegister.Indirect) { + uint file; + int index; + + file = inst->FullSrcRegisters[i].SrcRegisterInd.File; + index = inst->FullSrcRegisters[i].SrcRegisterInd.Index; + check_register_usage( + ctx, + file, + index, + "indirect", + FALSE ); + if (file != TGSI_FILE_ADDRESS || index != 0) + report_warning( ctx, "Indirect register not ADDR[0]" ); + } + } + + ctx->num_instructions++; + + return TRUE; +} + +static boolean +iter_declaration( + struct tgsi_iterate_context *iter, + struct tgsi_full_declaration *decl ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + uint file; + uint i; + + /* No declarations allowed after the first instruction. + */ + if (ctx->num_instructions > 0) + report_error( ctx, "Instruction expected but declaration found" ); + + /* Check registers' validity. + * Mark the registers as declared. + */ + file = decl->Declaration.File; + if (!check_file_name( ctx, file )) + return TRUE; + for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { + if (is_register_declared( ctx, file, i )) + report_error( ctx, "The same register declared twice" ); + ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG)); + } + + return TRUE; +} + +static boolean +iter_immediate( + struct tgsi_iterate_context *iter, + struct tgsi_full_immediate *imm ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + + assert( ctx->num_imms < MAX_REGISTERS ); + + /* No immediates allowed after the first instruction. + */ + if (ctx->num_instructions > 0) + report_error( ctx, "Instruction expected but immediate found" ); + + /* Mark the register as declared. + */ + ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG)); + ctx->num_imms++; + + /* Check data type validity. + */ + if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) { + report_error( ctx, "Invalid immediate data type" ); + return TRUE; + } + + return TRUE; +} + +static boolean +epilog( + struct tgsi_iterate_context *iter ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + uint file; + + /* There must be an END instruction somewhere. + */ + if (ctx->index_of_END == ~0) { + report_error( ctx, "Missing END instruction" ); + } + + /* Check if all declared registers were used. + */ + for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) { + uint i; + + for (i = 0; i < MAX_REGISTERS; i++) { + if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) { + report_warning( ctx, "Register never used" ); + } + } + } + + /* Print totals, if any. + */ + if (ctx->errors || ctx->warnings) + debug_printf( "%u errors, %u warnings\n", ctx->errors, ctx->warnings ); + + return TRUE; +} + +boolean +tgsi_sanity_check( + struct tgsi_token *tokens ) +{ + struct sanity_check_ctx ctx; + + ctx.iter.prolog = NULL; + ctx.iter.iterate_instruction = iter_instruction; + ctx.iter.iterate_declaration = iter_declaration; + ctx.iter.iterate_immediate = iter_immediate; + ctx.iter.epilog = epilog; + + memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) ); + memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) ); + memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) ); + ctx.num_imms = 0; + ctx.num_instructions = 0; + ctx.index_of_END = ~0; + + ctx.errors = 0; + ctx.warnings = 0; + + if (!tgsi_iterate_shader( tokens, &ctx.iter )) + return FALSE; + + return ctx.errors == 0; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h new file mode 100644 index 0000000000..ca45e94c7a --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h @@ -0,0 +1,49 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_SANITY_H +#define TGSI_SANITY_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +/* Check the given token stream for errors and common mistakes. + * Diagnostic messages are printed out to the debug output. + * Returns TRUE if there are no errors, even though there could be some warnings. + */ +boolean +tgsi_sanity_check( + struct tgsi_token *tokens ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_SANITY_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c new file mode 100644 index 0000000000..be4870a498 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -0,0 +1,226 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI program scan utility. + * Used to determine which registers and instructions are used by a shader. + * + * Authors: Brian Paul + */ + + +#include "util/u_math.h" +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" + + + + +/** + */ +void +tgsi_scan_shader(const struct tgsi_token *tokens, + struct tgsi_shader_info *info) +{ + uint procType, i; + struct tgsi_parse_context parse; + + memset(info, 0, sizeof(*info)); + for (i = 0; i < TGSI_FILE_COUNT; i++) + info->file_max[i] = -1; + + /** + ** Setup to begin parsing input shader + **/ + if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_scan_shader()!\n"); + return; + } + procType = parse.FullHeader.Processor.Processor; + assert(procType == TGSI_PROCESSOR_FRAGMENT || + procType == TGSI_PROCESSOR_VERTEX || + procType == TGSI_PROCESSOR_GEOMETRY); + + + /** + ** Loop over incoming program tokens/instructions + */ + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + info->num_tokens++; + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *fullinst + = &parse.FullToken.FullInstruction; + + assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); + info->opcode_count[fullinst->Instruction.Opcode]++; + } + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + { + struct tgsi_full_declaration *fulldecl + = &parse.FullToken.FullDeclaration; + uint file = fulldecl->Declaration.File; + uint i; + for (i = fulldecl->DeclarationRange.First; + i <= fulldecl->DeclarationRange.Last; + i++) { + + /* only first 32 regs will appear in this bitfield */ + info->file_mask[file] |= (1 << i); + info->file_count[file]++; + info->file_max[file] = MAX2(info->file_max[file], (int)i); + + if (file == TGSI_FILE_INPUT) { + info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; + info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->num_inputs++; + } + + if (file == TGSI_FILE_OUTPUT) { + info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; + info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->num_outputs++; + } + + /* special case */ + if (procType == TGSI_PROCESSOR_FRAGMENT && + file == TGSI_FILE_OUTPUT && + fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + info->writes_z = TRUE; + } + } + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + info->immediate_count++; + break; + + default: + assert( 0 ); + } + } + + assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs ); + assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs ); + + info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || + info->opcode_count[TGSI_OPCODE_KILP]); + + tgsi_parse_free (&parse); +} + + + +/** + * Check if the given shader is a "passthrough" shader consisting of only + * MOV instructions of the form: MOV OUT[n], IN[n] + * + */ +boolean +tgsi_is_passthrough_shader(const struct tgsi_token *tokens) +{ + struct tgsi_parse_context parse; + + /** + ** Setup to begin parsing input shader + **/ + if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_is_passthrough_shader()!\n"); + return FALSE; + } + + /** + ** Loop over incoming program tokens/instructions + */ + while (!tgsi_parse_end_of_tokens(&parse)) { + + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *fullinst = + &parse.FullToken.FullInstruction; + const struct tgsi_full_src_register *src = + &fullinst->FullSrcRegisters[0]; + const struct tgsi_full_dst_register *dst = + &fullinst->FullDstRegisters[0]; + + /* Do a whole bunch of checks for a simple move */ + if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || + src->SrcRegister.File != TGSI_FILE_INPUT || + dst->DstRegister.File != TGSI_FILE_OUTPUT || + src->SrcRegister.Index != dst->DstRegister.Index || + + src->SrcRegister.Negate || + src->SrcRegisterExtMod.Negate || + src->SrcRegisterExtMod.Absolute || + src->SrcRegisterExtMod.Scale2X || + src->SrcRegisterExtMod.Bias || + src->SrcRegisterExtMod.Complement || + + src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || + src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || + src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || + src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W || + + src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || + src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || + src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || + src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W || + + dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW) + { + tgsi_parse_free(&parse); + return FALSE; + } + } + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + /* fall-through */ + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* fall-through */ + default: + ; /* no-op */ + } + } + + tgsi_parse_free(&parse); + + /* if we get here, it's a pass-through shader */ + return TRUE; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h new file mode 100644 index 0000000000..5cb6efb343 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -0,0 +1,74 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_SCAN_H +#define TGSI_SCAN_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" + + +/** + * Shader summary info + */ +struct tgsi_shader_info +{ + uint num_tokens; + + /* XXX eventually remove the corresponding fields from pipe_shader_state: */ + ubyte num_inputs; + ubyte num_outputs; + ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ + ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ + ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + + uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */ + uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ + int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ + + uint immediate_count; /**< number of immediates declared */ + + uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ + + boolean writes_z; /**< does fragment shader write Z value? */ + boolean uses_kill; /**< KIL or KILP instruction used? */ +}; + + +extern void +tgsi_scan_shader(const struct tgsi_token *tokens, + struct tgsi_shader_info *info); + + +extern boolean +tgsi_is_passthrough_shader(const struct tgsi_token *tokens); + + +#endif /* TGSI_SCAN_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c new file mode 100644 index 0000000000..f79170b9d6 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -0,0 +1,2542 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE) + +#include "pipe/p_debug.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_sse.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi_exec.h" +#include "tgsi_sse2.h" + +#include "rtasm/rtasm_x86sse.h" + +/* for 1/sqrt() + * + * This costs about 100fps (close to 10%) in gears: + */ +#define HIGH_PRECISION 1 + +#define FAST_MATH 1 + + +#define FOR_EACH_CHANNEL( CHAN )\ + for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) + +#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ + FOR_EACH_CHANNEL( CHAN )\ + IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + +#define TEMP_ONE_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_ONE_C TGSI_EXEC_TEMP_ONE_C + +#define TEMP_R0 TGSI_EXEC_TEMP_R0 +#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR + +/** + * X86 utility functions. + */ + +static struct x86_reg +make_xmm( + unsigned xmm ) +{ + return x86_make_reg( + file_XMM, + (enum x86_reg_name) xmm ); +} + +/** + * X86 register mapping helpers. + */ + +static struct x86_reg +get_const_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_CX ); +} + +static struct x86_reg +get_input_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_AX ); +} + +static struct x86_reg +get_output_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DX ); +} + +static struct x86_reg +get_temp_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_BX ); +} + +static struct x86_reg +get_coef_base( void ) +{ + return get_output_base(); +} + +static struct x86_reg +get_immediate_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DI ); +} + + +/** + * Data access helpers. + */ + + +static struct x86_reg +get_immediate( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_immediate_base(), + (vec * 4 + chan) * 4 ); +} + +static struct x86_reg +get_const( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_const_base(), + (vec * 4 + chan) * 4 ); +} + +static struct x86_reg +get_input( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_input_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_output( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_output_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_temp( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_temp_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_coef( + unsigned vec, + unsigned chan, + unsigned member ) +{ + return x86_make_disp( + get_coef_base(), + ((vec * 3 + member) * 4 + chan) * 4 ); +} + + +static void +emit_ret( + struct x86_function *func ) +{ + x86_ret( func ); +} + + +/** + * Data fetch helpers. + */ + +/** + * Copy a shader constant to xmm register + * \param xmm the destination xmm register + * \param vec the src const buffer index + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_const( + struct x86_function *func, + uint xmm, + int vec, + uint chan, + uint indirect, + uint indirectFile, + int indirectIndex ) +{ + if (indirect) { + struct x86_reg r0 = get_input_base(); + struct x86_reg r1 = get_output_base(); + uint i; + + assert( indirectFile == TGSI_FILE_ADDRESS ); + assert( indirectIndex == 0 ); + + x86_push( func, r0 ); + x86_push( func, r1 ); + + for (i = 0; i < QUAD_SIZE; i++) { + x86_lea( func, r0, get_const( vec, chan ) ); + x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) ); + + /* Quick hack to multiply by 16 -- need to add SHL to rtasm. + */ + x86_add( func, r1, r1 ); + x86_add( func, r1, r1 ); + x86_add( func, r1, r1 ); + x86_add( func, r1, r1 ); + + x86_add( func, r0, r1 ); + x86_mov( func, r1, x86_deref( r0 ) ); + x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 ); + } + + x86_pop( func, r1 ); + x86_pop( func, r0 ); + + sse_movaps( + func, + make_xmm( xmm ), + get_temp( TEMP_R0, CHAN_X ) ); + } + else { + assert( vec >= 0 ); + + sse_movss( + func, + make_xmm( xmm ), + get_const( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); + } +} + +static void +emit_immediate( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_immediate( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + + +/** + * Copy a shader input to xmm register + * \param xmm the destination xmm register + * \param vec the src input attrib + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_inputf( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + make_xmm( xmm ), + get_input( vec, chan ) ); +} + +/** + * Store an xmm register to a shader output + * \param xmm the source xmm register + * \param vec the dest output attrib + * \param chan src dest channel to store (X, Y, Z or W) + */ +static void +emit_output( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + get_output( vec, chan ), + make_xmm( xmm ) ); +} + +/** + * Copy a shader temporary to xmm register + * \param xmm the destination xmm register + * \param vec the src temp register + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_tempf( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movaps( + func, + make_xmm( xmm ), + get_temp( vec, chan ) ); +} + +/** + * Load an xmm register with an input attrib coefficient (a0, dadx or dady) + * \param xmm the destination xmm register + * \param vec the src input/attribute coefficient index + * \param chan src channel to fetch (X, Y, Z or W) + * \param member 0=a0, 1=dadx, 2=dady + */ +static void +emit_coef( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan, + unsigned member ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_coef( vec, chan, member ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + +/** + * Data store helpers. + */ + +static void +emit_inputs( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + get_input( vec, chan ), + make_xmm( xmm ) ); +} + +static void +emit_temps( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movaps( + func, + get_temp( vec, chan ), + make_xmm( xmm ) ); +} + +static void +emit_addrs( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + assert( vec == 0 ); + + emit_temps( + func, + xmm, + vec + TGSI_EXEC_TEMP_ADDR, + chan ); +} + +/** + * Coefficent fetch helpers. + */ + +static void +emit_coef_a0( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 0 ); +} + +static void +emit_coef_dadx( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 1 ); +} + +static void +emit_coef_dady( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 2 ); +} + +/** + * Function call helpers. + */ + +/** + * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be + * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee + * that the stack pointer is 16 byte aligned, as expected. + */ +static void +emit_func_call_dst( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst, + void (PIPE_CDECL *code)() ) +{ + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + unsigned i, n, xmm; + unsigned xmm_mask; + + /* Bitmask of the xmm registers to save */ + xmm_mask = (1 << xmm_save) - 1; + xmm_mask &= ~(1 << xmm_dst); + + sse_movaps( + func, + get_temp( TEMP_R0, 0 ), + make_xmm( xmm_dst ) ); + + x86_push( + func, + x86_make_reg( file_REG32, reg_AX) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_CX) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_DX) ); + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) + ++n; + + x86_sub_imm( + func, + x86_make_reg( file_REG32, reg_SP ), + n*16); + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) { + sse_movups( + func, + x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ), + make_xmm( xmm ) ); + ++n; + } + + x86_lea( + func, + ecx, + get_temp( TEMP_R0, 0 ) ); + + x86_push( func, ecx ); + x86_mov_reg_imm( func, ecx, (unsigned long) code ); + x86_call( func, ecx ); + x86_pop(func, ecx ); + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) { + sse_movups( + func, + make_xmm( xmm ), + x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) ); + ++n; + } + + x86_add_imm( + func, + x86_make_reg( file_REG32, reg_SP ), + n*16); + + /* Restore GP registers in a reverse order. + */ + x86_pop( + func, + x86_make_reg( file_REG32, reg_DX) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_CX) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_AX) ); + + sse_movaps( + func, + make_xmm( xmm_dst ), + get_temp( TEMP_R0, 0 ) ); +} + +static void +emit_func_call_dst_src( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst, + unsigned xmm_src, + void (PIPE_CDECL *code)() ) +{ + sse_movaps( + func, + get_temp( TEMP_R0, 1 ), + make_xmm( xmm_src ) ); + + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + code ); +} + +/* + * Fast SSE2 implementation of special math functions. + */ + +#define POLY0(x, c0) _mm_set1_ps(c0) +#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), _mm_set1_ps(c0)) +#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), _mm_set1_ps(c0)) +#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), x), _mm_set1_ps(c0)) +#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, c3, c4), x), _mm_set1_ps(c0)) +#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, c2, c3, c4, c5), x), _mm_set1_ps(c0)) + +#define EXP_POLY_DEGREE 3 +#define LOG_POLY_DEGREE 5 + +/** + * See http://www.devmaster.net/forums/showthread.php?p=43580 + */ +static INLINE __m128 +exp2f4(__m128 x) +{ + __m128i ipart; + __m128 fpart, expipart, expfpart; + + x = _mm_min_ps(x, _mm_set1_ps( 129.00000f)); + x = _mm_max_ps(x, _mm_set1_ps(-126.99999f)); + + /* ipart = int(x - 0.5) */ + ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f))); + + /* fpart = x - ipart */ + fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart)); + + /* expipart = (float) (1 << ipart) */ + expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23)); + + /* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */ +#if EXP_POLY_DEGREE == 5 + expfpart = POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f); +#elif EXP_POLY_DEGREE == 4 + expfpart = POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f); +#elif EXP_POLY_DEGREE == 3 + expfpart = POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f); +#elif EXP_POLY_DEGREE == 2 + expfpart = POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f); +#else +#error +#endif + + return _mm_mul_ps(expipart, expfpart); +} + +/** + * See http://www.devmaster.net/forums/showthread.php?p=43580 + */ +static INLINE __m128 +log2f4(__m128 x) +{ + __m128i expmask = _mm_set1_epi32(0x7f800000); + __m128i mantmask = _mm_set1_epi32(0x007fffff); + __m128 one = _mm_set1_ps(1.0f); + + __m128i i = _mm_castps_si128(x); + + /* exp = (float) exponent(x) */ + __m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, expmask), 23), _mm_set1_epi32(127))); + + /* mant = (float) mantissa(x) */ + __m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mantmask)), one); + + __m128 logmant; + + /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ + * These coefficients can be generate with + * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html + */ +#if LOG_POLY_DEGREE == 6 + logmant = POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f, -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f); +#elif LOG_POLY_DEGREE == 5 + logmant = POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f); +#elif LOG_POLY_DEGREE == 4 + logmant = POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f); +#elif LOG_POLY_DEGREE == 3 + logmant = POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f); +#else +#error +#endif + + /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ + logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one)); + + return _mm_add_ps(logmant, exp); +} + +static INLINE __m128 +powf4(__m128 x, __m128 y) +{ + return exp2f4(_mm_mul_ps(log2f4(x), y)); +} + + +/** + * Low-level instruction translators. + */ + +static void +emit_abs( + struct x86_function *func, + unsigned xmm ) +{ + sse_andps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_7FFFFFFF_I, + TGSI_EXEC_TEMP_7FFFFFFF_C ) ); +} + +static void +emit_add( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_addps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void PIPE_CDECL +cos4f( + float *store ) +{ + store[0] = cosf( store[0] ); + store[1] = cosf( store[1] ); + store[2] = cosf( store[2] ); + store[3] = cosf( store[3] ); +} + +static void +emit_cos( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + cos4f ); +} + +static void PIPE_CDECL +#if defined(PIPE_CC_GCC) +__attribute__((force_align_arg_pointer)) +#endif +ex24f( + float *store ) +{ + _mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) )); +} + +static void +emit_ex2( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + ex24f ); +} + +static void +emit_f2it( + struct x86_function *func, + unsigned xmm ) +{ + sse2_cvttps2dq( + func, + make_xmm( xmm ), + make_xmm( xmm ) ); +} + +static void PIPE_CDECL +flr4f( + float *store ) +{ + store[0] = floorf( store[0] ); + store[1] = floorf( store[1] ); + store[2] = floorf( store[2] ); + store[3] = floorf( store[3] ); +} + +static void +emit_flr( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + flr4f ); +} + +static void PIPE_CDECL +frc4f( + float *store ) +{ + store[0] -= floorf( store[0] ); + store[1] -= floorf( store[1] ); + store[2] -= floorf( store[2] ); + store[3] -= floorf( store[3] ); +} + +static void +emit_frc( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + frc4f ); +} + +static void PIPE_CDECL +#if defined(PIPE_CC_GCC) +__attribute__((force_align_arg_pointer)) +#endif +lg24f( + float *store ) +{ + _mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) )); +} + +static void +emit_lg2( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + lg24f ); +} + +static void +emit_MOV( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_movups( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_mul (struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src) +{ + sse_mulps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_neg( + struct x86_function *func, + unsigned xmm ) +{ + sse_xorps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); +} + +static void PIPE_CDECL +#if defined(PIPE_CC_GCC) +__attribute__((force_align_arg_pointer)) +#endif +pow4f( + float *store ) +{ +#if 1 + _mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) )); +#else + store[0] = powf( store[0], store[4] ); + store[1] = powf( store[1], store[5] ); + store[2] = powf( store[2], store[6] ); + store[3] = powf( store[3], store[7] ); +#endif +} + +static void +emit_pow( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst, + unsigned xmm_src ) +{ + emit_func_call_dst_src( + func, + xmm_save, + xmm_dst, + xmm_src, + pow4f ); +} + +static void +emit_rcp ( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + /* On Intel CPUs at least, this is only accurate to 12 bits -- not + * good enough. Need to either emit a proper divide or use the + * iterative technique described below in emit_rsqrt(). + */ + sse2_rcpps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_rsqrt( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ +#if HIGH_PRECISION + /* Although rsqrtps() and rcpps() are low precision on some/all SSE + * implementations, it is possible to improve its precision at + * fairly low cost, using a newton/raphson step, as below: + * + * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a) + * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)] + * + * See: http://softwarecommunity.intel.com/articles/eng/1818.htm + */ + { + struct x86_reg dst = make_xmm( xmm_dst ); + struct x86_reg src = make_xmm( xmm_src ); + struct x86_reg tmp0 = make_xmm( 2 ); + struct x86_reg tmp1 = make_xmm( 3 ); + + assert( xmm_dst != xmm_src ); + assert( xmm_dst != 2 && xmm_dst != 3 ); + assert( xmm_src != 2 && xmm_src != 3 ); + + sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); + sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); + sse_rsqrtps( func, tmp1, src ); + sse_mulps( func, src, tmp1 ); + sse_mulps( func, dst, tmp1 ); + sse_mulps( func, src, tmp1 ); + sse_subps( func, tmp0, src ); + sse_mulps( func, dst, tmp0 ); + } +#else + /* On Intel CPUs at least, this is only accurate to 12 bits -- not + * good enough. + */ + sse_rsqrtps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +#endif +} + +static void +emit_setsign( + struct x86_function *func, + unsigned xmm ) +{ + sse_orps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); +} + +static void PIPE_CDECL +sin4f( + float *store ) +{ + store[0] = sinf( store[0] ); + store[1] = sinf( store[1] ); + store[2] = sinf( store[2] ); + store[3] = sinf( store[3] ); +} + +static void +emit_sin (struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + sin4f ); +} + +static void +emit_sub( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_subps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +/** + * Register fetch. + */ + +static void +emit_fetch( + struct x86_function *func, + unsigned xmm, + const struct tgsi_full_src_register *reg, + const unsigned chan_index ) +{ + unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + + switch (swizzle) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch (reg->SrcRegister.File) { + case TGSI_FILE_CONSTANT: + emit_const( + func, + xmm, + reg->SrcRegister.Index, + swizzle, + reg->SrcRegister.Indirect, + reg->SrcRegisterInd.File, + reg->SrcRegisterInd.Index ); + break; + + case TGSI_FILE_IMMEDIATE: + emit_immediate( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_INPUT: + emit_inputf( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_TEMPORARY: + emit_tempf( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + emit_tempf( + func, + xmm, + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ); + break; + + case TGSI_EXTSWIZZLE_ONE: + emit_tempf( + func, + xmm, + TEMP_ONE_I, + TEMP_ONE_C ); + break; + + default: + assert( 0 ); + } + + switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { + case TGSI_UTIL_SIGN_CLEAR: + emit_abs( func, xmm ); + break; + + case TGSI_UTIL_SIGN_SET: + emit_setsign( func, xmm ); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + emit_neg( func, xmm ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } +} + +#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ + emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) + +/** + * Register store. + */ + +static void +emit_store( + struct x86_function *func, + unsigned xmm, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + unsigned chan_index ) +{ + switch( reg->DstRegister.File ) { + case TGSI_FILE_OUTPUT: + emit_output( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + case TGSI_FILE_TEMPORARY: + emit_temps( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + case TGSI_FILE_ADDRESS: + emit_addrs( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + default: + assert( 0 ); + } + + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + /* assert( 0 ); */ + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + } +} + +#define STORE( FUNC, INST, XMM, INDEX, CHAN )\ + emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + +/** + * High-level instruction translators. + */ + +static void +emit_kil( + struct x86_function *func, + const struct tgsi_full_src_register *reg ) +{ + unsigned uniquemask; + unsigned registers[4]; + unsigned nextregister = 0; + unsigned firstchan = ~0; + unsigned chan_index; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + FOR_EACH_CHANNEL( chan_index ) { + unsigned swizzle; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle( + reg, + chan_index ); + + /* check if the component has not been already tested */ + if( !(uniquemask & (1 << swizzle)) ) { + uniquemask |= 1 << swizzle; + + /* allocate register */ + registers[chan_index] = nextregister; + emit_fetch( + func, + nextregister, + reg, + chan_index ); + nextregister++; + + /* mark the first channel used */ + if( firstchan == ~0 ) { + firstchan = chan_index; + } + } + } + + x86_push( + func, + x86_make_reg( file_REG32, reg_AX ) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_DX ) ); + + FOR_EACH_CHANNEL( chan_index ) { + if( uniquemask & (1 << chan_index) ) { + sse_cmpps( + func, + make_xmm( registers[chan_index] ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ), + cc_LessThan ); + + if( chan_index == firstchan ) { + sse_pmovmskb( + func, + x86_make_reg( file_REG32, reg_AX ), + make_xmm( registers[chan_index] ) ); + } + else { + sse_pmovmskb( + func, + x86_make_reg( file_REG32, reg_DX ), + make_xmm( registers[chan_index] ) ); + x86_or( + func, + x86_make_reg( file_REG32, reg_AX ), + x86_make_reg( file_REG32, reg_DX ) ); + } + } + } + + x86_or( + func, + get_temp( + TGSI_EXEC_TEMP_KILMASK_I, + TGSI_EXEC_TEMP_KILMASK_C ), + x86_make_reg( file_REG32, reg_AX ) ); + + x86_pop( + func, + x86_make_reg( file_REG32, reg_DX ) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_AX ) ); +} + + +static void +emit_kilp( + struct x86_function *func ) +{ + /* XXX todo / fix me */ +} + + +static void +emit_setcc( + struct x86_function *func, + struct tgsi_full_instruction *inst, + enum sse_cc cc ) +{ + unsigned chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_cmpps( + func, + make_xmm( 0 ), + make_xmm( 1 ), + cc ); + sse_andps( + func, + make_xmm( 0 ), + get_temp( + TEMP_ONE_I, + TEMP_ONE_C ) ); + STORE( func, *inst, 0, 0, chan_index ); + } +} + +static void +emit_cmp( + struct x86_function *func, + struct tgsi_full_instruction *inst ) +{ + unsigned chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + sse_cmpps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ), + cc_LessThan ); + sse_andps( + func, + make_xmm( 1 ), + make_xmm( 0 ) ); + sse_andnps( + func, + make_xmm( 0 ), + make_xmm( 2 ) ); + sse_orps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } +} + +static int +emit_instruction( + struct x86_function *func, + struct tgsi_full_instruction *inst ) +{ + unsigned chan_index; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_f2it( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + emit_tempf( + func, + 0, + TEMP_ONE_I, + TEMP_ONE_C); + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + STORE( func, *inst, 0, 0, CHAN_X ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( func, *inst, 0, 0, CHAN_W ); + } + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + sse_maxps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + /* XMM[1] = SrcReg[0].yyyy */ + FETCH( func, *inst, 1, 0, CHAN_Y ); + /* XMM[1] = max(XMM[1], 0) */ + sse_maxps( + func, + make_xmm( 1 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + /* XMM[2] = SrcReg[0].wwww */ + FETCH( func, *inst, 2, 0, CHAN_W ); + /* XMM[2] = min(XMM[2], 128.0) */ + sse_minps( + func, + make_xmm( 2 ), + get_temp( + TGSI_EXEC_TEMP_128_I, + TGSI_EXEC_TEMP_128_C ) ); + /* XMM[2] = max(XMM[2], -128.0) */ + sse_maxps( + func, + make_xmm( 2 ), + get_temp( + TGSI_EXEC_TEMP_MINUS_128_I, + TGSI_EXEC_TEMP_MINUS_128_C ) ); + emit_pow( func, 3, 1, 2 ); + FETCH( func, *inst, 0, 0, CHAN_X ); + sse_xorps( + func, + make_xmm( 2 ), + make_xmm( 2 ) ); + sse_cmpps( + func, + make_xmm( 2 ), + make_xmm( 0 ), + cc_LessThanEqual ); + sse_andps( + func, + make_xmm( 2 ), + make_xmm( 1 ) ); + STORE( func, *inst, 2, 0, CHAN_Z ); + } + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_rcp( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_rsqrt( func, 1, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 1, 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( func, *inst, 0, 0, CHAN_X ); + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_MOV( func, 1, 0 ); + emit_flr( func, 2, 1 ); + /* dst.x = ex2(floor(src.x)) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { + emit_MOV( func, 2, 1 ); + emit_ex2( func, 3, 2 ); + STORE( func, *inst, 2, 0, CHAN_X ); + } + /* dst.y = src.x - floor(src.x) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_MOV( func, 2, 0 ); + emit_sub( func, 2, 1 ); + STORE( func, *inst, 2, 0, CHAN_Y ); + } + } + /* dst.z = ex2(src.x) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + emit_ex2( func, 3, 0 ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + } + /* dst.w = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { + emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_LOG: + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_abs( func, 0 ); + emit_MOV( func, 1, 0 ); + emit_lg2( func, 2, 1 ); + /* dst.z = lg2(abs(src.x)) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( func, *inst, 1, 0, CHAN_Z ); + } + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_flr( func, 2, 1 ); + /* dst.x = floor(lg2(abs(src.x))) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( func, *inst, 1, 0, CHAN_X ); + } + /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_ex2( func, 2, 1 ); + emit_rcp( func, 1, 1 ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + } + } + /* dst.w = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { + emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_add( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul(func, 1, 2 ); + emit_add(func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_W ); + FETCH( func, *inst, 2, 1, CHAN_W ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + emit_tempf( + func, + 0, + TEMP_ONE_I, + TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 1, 1, CHAN_Y ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + FETCH( func, *inst, 0, 0, CHAN_Z ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + FETCH( func, *inst, 0, 1, CHAN_W ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_minps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_maxps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + emit_setcc( func, inst, cc_LessThan ); + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + emit_setcc( func, inst, cc_NotLessThan ); + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_mul( func, 0, 1 ); + emit_add( func, 0, 2 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_sub( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_sub( func, 1, 2 ); + emit_mul( func, 0, 1 ); + emit_add( func, 0, 2 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CND: + return 0; + break; + + case TGSI_OPCODE_CND0: + return 0; + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + return 0; + break; + + case TGSI_OPCODE_INDEX: + return 0; + break; + + case TGSI_OPCODE_NEGATE: + return 0; + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_frc( func, 0, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + return 0; + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_flr( func, 0, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + return 0; + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_ex2( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_lg2( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_pow( func, 0, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( func, *inst, 1, 1, CHAN_Z ); + FETCH( func, *inst, 3, 0, CHAN_Z ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 4, 1, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + emit_MOV( func, 2, 0 ); + emit_mul( func, 2, 1 ); + emit_MOV( func, 5, 3 ); + emit_mul( func, 5, 4 ); + emit_sub( func, 2, 5 ); + STORE( func, *inst, 2, 0, CHAN_X ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 2, 1, CHAN_X ); + FETCH( func, *inst, 5, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + emit_mul( func, 3, 2 ); + emit_mul( func, 1, 5 ); + emit_sub( func, 3, 1 ); + STORE( func, *inst, 3, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + emit_mul( func, 5, 4 ); + emit_mul( func, 0, 2 ); + emit_sub( func, 5, 0 ); + STORE( func, *inst, 5, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + emit_tempf( + func, + 0, + TEMP_ONE_I, + TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + return 0; + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_abs( func, 0) ; + + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_RCC: + return 0; + break; + + case TGSI_OPCODE_DPH: + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 1, CHAN_W ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_cos( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + return 0; + break; + + case TGSI_OPCODE_DDY: + return 0; + break; + + case TGSI_OPCODE_KILP: + /* predicated kill */ + emit_kilp( func ); + return 0; /* XXX fix me */ + break; + + case TGSI_OPCODE_KIL: + /* conditional kill */ + emit_kil( func, &inst->FullSrcRegisters[0] ); + break; + + case TGSI_OPCODE_PK2H: + return 0; + break; + + case TGSI_OPCODE_PK2US: + return 0; + break; + + case TGSI_OPCODE_PK4B: + return 0; + break; + + case TGSI_OPCODE_PK4UB: + return 0; + break; + + case TGSI_OPCODE_RFL: + return 0; + break; + + case TGSI_OPCODE_SEQ: + return 0; + break; + + case TGSI_OPCODE_SFL: + return 0; + break; + + case TGSI_OPCODE_SGT: + return 0; + break; + + case TGSI_OPCODE_SIN: + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_sin( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + return 0; + break; + + case TGSI_OPCODE_SNE: + return 0; + break; + + case TGSI_OPCODE_STR: + return 0; + break; + + case TGSI_OPCODE_TEX: + if (0) { + /* Disable dummy texture code: + */ + emit_tempf( + func, + 0, + TEMP_ONE_I, + TEMP_ONE_C ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + } + else { + return 0; + } + break; + + case TGSI_OPCODE_TXD: + return 0; + break; + + case TGSI_OPCODE_UP2H: + return 0; + break; + + case TGSI_OPCODE_UP2US: + return 0; + break; + + case TGSI_OPCODE_UP4B: + return 0; + break; + + case TGSI_OPCODE_UP4UB: + return 0; + break; + + case TGSI_OPCODE_X2D: + return 0; + break; + + case TGSI_OPCODE_ARA: + return 0; + break; + + case TGSI_OPCODE_ARR: + return 0; + break; + + case TGSI_OPCODE_BRA: + return 0; + break; + + case TGSI_OPCODE_CAL: + return 0; + break; + + case TGSI_OPCODE_RET: + emit_ret( func ); + break; + + case TGSI_OPCODE_END: + break; + + case TGSI_OPCODE_SSG: + return 0; + break; + + case TGSI_OPCODE_CMP: + emit_cmp (func, inst); + break; + + case TGSI_OPCODE_SCS: + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_cos( func, 0, 0 ); + STORE( func, *inst, 0, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_sin( func, 0, 0 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + emit_tempf( + func, + 0, + TEMP_ONE_I, + TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_TXB: + return 0; + break; + + case TGSI_OPCODE_NRM: + return 0; + break; + + case TGSI_OPCODE_DIV: + return 0; + break; + + case TGSI_OPCODE_DP2: + return 0; + break; + + case TGSI_OPCODE_TXL: + return 0; + break; + + case TGSI_OPCODE_BRK: + return 0; + break; + + case TGSI_OPCODE_IF: + return 0; + break; + + case TGSI_OPCODE_LOOP: + return 0; + break; + + case TGSI_OPCODE_REP: + return 0; + break; + + case TGSI_OPCODE_ELSE: + return 0; + break; + + case TGSI_OPCODE_ENDIF: + return 0; + break; + + case TGSI_OPCODE_ENDLOOP: + return 0; + break; + + case TGSI_OPCODE_ENDREP: + return 0; + break; + + case TGSI_OPCODE_PUSHA: + return 0; + break; + + case TGSI_OPCODE_POPA: + return 0; + break; + + case TGSI_OPCODE_CEIL: + return 0; + break; + + case TGSI_OPCODE_I2F: + return 0; + break; + + case TGSI_OPCODE_NOT: + return 0; + break; + + case TGSI_OPCODE_TRUNC: + return 0; + break; + + case TGSI_OPCODE_SHL: + return 0; + break; + + case TGSI_OPCODE_SHR: + return 0; + break; + + case TGSI_OPCODE_AND: + return 0; + break; + + case TGSI_OPCODE_OR: + return 0; + break; + + case TGSI_OPCODE_MOD: + return 0; + break; + + case TGSI_OPCODE_XOR: + return 0; + break; + + case TGSI_OPCODE_SAD: + return 0; + break; + + case TGSI_OPCODE_TXF: + return 0; + break; + + case TGSI_OPCODE_TXQ: + return 0; + break; + + case TGSI_OPCODE_CONT: + return 0; + break; + + case TGSI_OPCODE_EMIT: + return 0; + break; + + case TGSI_OPCODE_ENDPRIM: + return 0; + break; + + default: + return 0; + } + + return 1; +} + +static void +emit_declaration( + struct x86_function *func, + struct tgsi_full_declaration *decl ) +{ + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + unsigned i, j; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + emit_coef_a0( func, 0, i, j ); + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_coef_a0( func, 4, i, j ); + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 4 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); + emit_coef_a0( func, 5, i, j ); + emit_rcp( func, 4, 4 ); /* 1.0 / w */ + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 5 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ + emit_inputs( func, 0, i, j ); + break; + + default: + assert( 0 ); + break; + } + } + } + } + } +} + +static void aos_to_soa( struct x86_function *func, + uint arg_aos, + uint arg_soa, + uint arg_num, + uint arg_stride ) +{ + struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX ); + struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX ); + struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX ); + struct x86_reg stride = x86_make_reg( file_REG32, reg_DX ); + int inner_loop; + + + /* Save EBX */ + x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + + x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) ); + x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) ); + x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) ); + x86_mov( func, stride, x86_fn_arg( func, arg_stride ) ); + + /* do */ + inner_loop = x86_get_label( func ); + { + x86_push( func, aos_input ); + sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_pop( func, aos_input ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 ); + sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd ); + sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 ); + sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd ); + + sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) ); + sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) ); + sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) ); + sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) ); + + /* Advance to next input */ + x86_lea( func, aos_input, x86_make_disp(aos_input, 16) ); + x86_lea( func, soa_input, x86_make_disp(soa_input, 64) ); + } + /* while --num_inputs */ + x86_dec( func, num_inputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, aos_input ); +} + +static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) +{ + struct x86_reg soa_output; + struct x86_reg aos_output; + struct x86_reg num_outputs; + struct x86_reg temp; + int inner_loop; + + soa_output = x86_make_reg( file_REG32, reg_AX ); + aos_output = x86_make_reg( file_REG32, reg_BX ); + num_outputs = x86_make_reg( file_REG32, reg_CX ); + temp = x86_make_reg( file_REG32, reg_DX ); + + /* Save EBX */ + x86_push( func, aos_output ); + + x86_mov( func, soa_output, x86_fn_arg( func, soa ) ); + x86_mov( func, aos_output, x86_fn_arg( func, aos ) ); + x86_mov( func, num_outputs, x86_fn_arg( func, num ) ); + + /* do */ + inner_loop = x86_get_label( func ); + { + sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) ); + sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) ); + sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) ); + sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) ); + sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) ); + sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); + sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); + + x86_mov( func, temp, x86_fn_arg( func, stride ) ); + x86_push( func, aos_output ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_pop( func, aos_output ); + + /* Advance to next output */ + x86_lea( func, aos_output, x86_make_disp(aos_output, 16) ); + x86_lea( func, soa_output, x86_make_disp(soa_output, 64) ); + } + /* while --num_outputs */ + x86_dec( func, num_outputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, aos_output ); +} + +/** + * Translate a TGSI vertex/fragment shader to SSE2 code. + * Slightly different things are done for vertex vs. fragment shaders. + * + * Note that fragment shaders are responsible for interpolating shader + * inputs. Because on x86 we have only 4 GP registers, and here we + * have 5 shader arguments (input, output, const, temp and coef), the + * code is split into two phases -- DECLARATION and INSTRUCTION phase. + * GP register holding the output argument is aliased with the coeff + * argument, as outputs are not needed in the DECLARATION phase. + * + * \param tokens the TGSI input shader + * \param func the output SSE code/function + * \param immediates buffer to place immediates, later passed to SSE func + * \param return 1 for success, 0 if translation failed + */ +unsigned +tgsi_emit_sse2( + const struct tgsi_token *tokens, + struct x86_function *func, + float (*immediates)[4], + boolean do_swizzles ) +{ + struct tgsi_parse_context parse; + boolean instruction_phase = FALSE; + unsigned ok = 1; + uint num_immediates = 0; + + util_init_math(); + + func->csr = func->store; + + tgsi_parse_init( &parse, tokens ); + + /* Can't just use EDI, EBX without save/restoring them: + */ + x86_push( + func, + get_immediate_base() ); + + x86_push( + func, + get_temp_base() ); + + + /* + * Different function args for vertex/fragment shaders: + */ + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + /* DECLARATION phase, do not load output argument. */ + x86_mov( + func, + get_input_base(), + x86_fn_arg( func, 1 ) ); + /* skipping outputs argument here */ + x86_mov( + func, + get_const_base(), + x86_fn_arg( func, 3 ) ); + x86_mov( + func, + get_temp_base(), + x86_fn_arg( func, 4 ) ); + x86_mov( + func, + get_coef_base(), + x86_fn_arg( func, 5 ) ); + x86_mov( + func, + get_immediate_base(), + x86_fn_arg( func, 6 ) ); + } + else { + assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); + + if (do_swizzles) + aos_to_soa( func, + 6, /* aos_input */ + 1, /* machine->input */ + 7, /* num_inputs */ + 8 ); /* input_stride */ + + x86_mov( + func, + get_input_base(), + x86_fn_arg( func, 1 ) ); + x86_mov( + func, + get_output_base(), + x86_fn_arg( func, 2 ) ); + x86_mov( + func, + get_const_base(), + x86_fn_arg( func, 3 ) ); + x86_mov( + func, + get_temp_base(), + x86_fn_arg( func, 4 ) ); + x86_mov( + func, + get_immediate_base(), + x86_fn_arg( func, 5 ) ); + } + + while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + emit_declaration( + func, + &parse.FullToken.FullDeclaration ); + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + if( !instruction_phase ) { + /* INSTRUCTION phase, overwrite coeff with output. */ + instruction_phase = TRUE; + x86_mov( + func, + get_output_base(), + x86_fn_arg( func, 2 ) ); + } + } + + ok = emit_instruction( + func, + &parse.FullToken.FullInstruction ); + + if (!ok) { + debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n", + parse.FullToken.FullInstruction.Instruction.Opcode, + parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? + "vertex shader" : "fragment shader"); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* simply copy the immediate values into the next immediates[] slot */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for( i = 0; i < size; i++ ) { + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } +#if 0 + debug_printf("SSE FS immediate[%d] = %f %f %f %f\n", + num_immediates, + immediates[num_immediates][0], + immediates[num_immediates][1], + immediates[num_immediates][2], + immediates[num_immediates][3]); +#endif + num_immediates++; + } + break; + + default: + ok = 0; + assert( 0 ); + } + } + + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { + if (do_swizzles) + soa_to_aos( func, 9, 2, 10, 11 ); + } + + /* Can't just use EBX, EDI without save/restoring them: + */ + x86_pop( + func, + get_temp_base() ); + + x86_pop( + func, + get_immediate_base() ); + + emit_ret( func ); + + tgsi_parse_free( &parse ); + + return ok; +} + +#endif /* PIPE_ARCH_X86 */ + diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/tgsi_sse2.h new file mode 100644 index 0000000000..af838b2a25 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.h @@ -0,0 +1,49 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_SSE2_H +#define TGSI_SSE2_H + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_token; +struct x86_function; + +unsigned +tgsi_emit_sse2( + const struct tgsi_token *tokens, + struct x86_function *function, + float (*immediates)[4], + boolean do_swizzles ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_SSE2_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c new file mode 100644 index 0000000000..9454563361 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -0,0 +1,1097 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_text.h" +#include "tgsi_build.h" +#include "tgsi_info.h" +#include "tgsi_parse.h" +#include "tgsi_sanity.h" +#include "tgsi_util.h" + +static boolean is_alpha_underscore( const char *cur ) +{ + return + (*cur >= 'a' && *cur <= 'z') || + (*cur >= 'A' && *cur <= 'Z') || + *cur == '_'; +} + +static boolean is_digit( const char *cur ) +{ + return *cur >= '0' && *cur <= '9'; +} + +static boolean is_digit_alpha_underscore( const char *cur ) +{ + return is_digit( cur ) || is_alpha_underscore( cur ); +} + +static boolean uprcase( char c ) +{ + if (c >= 'a' && c <= 'z') + return c += 'A' - 'a'; + return c; +} + +static boolean str_match_no_case( const char **pcur, const char *str ) +{ + const char *cur = *pcur; + + while (*str != '\0' && *str == uprcase( *cur )) { + str++; + cur++; + } + if (*str == '\0') { + *pcur = cur; + return TRUE; + } + return FALSE; +} + +/* Eat zero or more whitespaces. + */ +static void eat_opt_white( const char **pcur ) +{ + while (**pcur == ' ' || **pcur == '\t' || **pcur == '\n') + (*pcur)++; +} + +/* Eat one or more whitespaces. + * Return TRUE if at least one whitespace eaten. + */ +static boolean eat_white( const char **pcur ) +{ + const char *cur = *pcur; + + eat_opt_white( pcur ); + return *pcur > cur; +} + +/* Parse unsigned integer. + * No checks for overflow. + */ +static boolean parse_uint( const char **pcur, uint *val ) +{ + const char *cur = *pcur; + + if (is_digit( cur )) { + *val = *cur++ - '0'; + while (is_digit( cur )) + *val = *val * 10 + *cur++ - '0'; + *pcur = cur; + return TRUE; + } + return FALSE; +} + +/* Parse floating point. + */ +static boolean parse_float( const char **pcur, float *val ) +{ + const char *cur = *pcur; + boolean integral_part = FALSE; + boolean fractional_part = FALSE; + + *val = (float) atof( cur ); + + if (*cur == '-' || *cur == '+') + cur++; + if (is_digit( cur )) { + cur++; + integral_part = TRUE; + while (is_digit( cur )) + cur++; + } + if (*cur == '.') { + cur++; + if (is_digit( cur )) { + cur++; + fractional_part = TRUE; + while (is_digit( cur )) + cur++; + } + } + if (!integral_part && !fractional_part) + return FALSE; + if (uprcase( *cur ) == 'E') { + cur++; + if (*cur == '-' || *cur == '+') + cur++; + if (is_digit( cur )) { + cur++; + while (is_digit( cur )) + cur++; + } + else + return FALSE; + } + *pcur = cur; + return TRUE; +} + +struct translate_ctx +{ + const char *text; + const char *cur; + struct tgsi_token *tokens; + struct tgsi_token *tokens_cur; + struct tgsi_token *tokens_end; + struct tgsi_header *header; +}; + +static void report_error( struct translate_ctx *ctx, const char *msg ) +{ + debug_printf( "\nError: %s", msg ); +} + +/* Parse shader header. + * Return TRUE for one of the following headers. + * FRAG1.1 + * GEOM1.1 + * VERT1.1 + */ +static boolean parse_header( struct translate_ctx *ctx ) +{ + uint processor; + + if (str_match_no_case( &ctx->cur, "FRAG1.1" )) + processor = TGSI_PROCESSOR_FRAGMENT; + else if (str_match_no_case( &ctx->cur, "VERT1.1" )) + processor = TGSI_PROCESSOR_VERTEX; + else if (str_match_no_case( &ctx->cur, "GEOM1.1" )) + processor = TGSI_PROCESSOR_GEOMETRY; + else { + report_error( ctx, "Unknown header" ); + return FALSE; + } + + if (ctx->tokens_cur >= ctx->tokens_end) + return FALSE; + *(struct tgsi_version *) ctx->tokens_cur++ = tgsi_build_version(); + + if (ctx->tokens_cur >= ctx->tokens_end) + return FALSE; + ctx->header = (struct tgsi_header *) ctx->tokens_cur++; + *ctx->header = tgsi_build_header(); + + if (ctx->tokens_cur >= ctx->tokens_end) + return FALSE; + *(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header ); + + return TRUE; +} + +static boolean parse_label( struct translate_ctx *ctx, uint *val ) +{ + const char *cur = ctx->cur; + + if (parse_uint( &cur, val )) { + eat_opt_white( &cur ); + if (*cur == ':') { + cur++; + ctx->cur = cur; + return TRUE; + } + } + return FALSE; +} + +static const char *file_names[TGSI_FILE_COUNT] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM" +}; + +static boolean +parse_file( const char **pcur, uint *file ) +{ + uint i; + + for (i = 0; i < TGSI_FILE_COUNT; i++) { + const char *cur = *pcur; + + if (str_match_no_case( &cur, file_names[i] )) { + if (!is_digit_alpha_underscore( cur )) { + *pcur = cur; + *file = i; + return TRUE; + } + } + } + return FALSE; +} + +static boolean +parse_opt_writemask( + struct translate_ctx *ctx, + uint *writemask ) +{ + const char *cur; + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == '.') { + cur++; + *writemask = TGSI_WRITEMASK_NONE; + eat_opt_white( &cur ); + if (uprcase( *cur ) == 'X') { + cur++; + *writemask |= TGSI_WRITEMASK_X; + } + if (uprcase( *cur ) == 'Y') { + cur++; + *writemask |= TGSI_WRITEMASK_Y; + } + if (uprcase( *cur ) == 'Z') { + cur++; + *writemask |= TGSI_WRITEMASK_Z; + } + if (uprcase( *cur ) == 'W') { + cur++; + *writemask |= TGSI_WRITEMASK_W; + } + + if (*writemask == TGSI_WRITEMASK_NONE) { + report_error( ctx, "Writemask expected" ); + return FALSE; + } + + ctx->cur = cur; + } + else { + *writemask = TGSI_WRITEMASK_XYZW; + } + return TRUE; +} + +/* <register_file_bracket> ::= <file> `[' + */ +static boolean +parse_register_file_bracket( + struct translate_ctx *ctx, + uint *file ) +{ + if (!parse_file( &ctx->cur, file )) { + report_error( ctx, "Unknown register file" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '[') { + report_error( ctx, "Expected `['" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* <register_file_bracket_index> ::= <register_file_bracket> <uint> + */ +static boolean +parse_register_file_bracket_index( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + uint uindex; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + *index = (int) uindex; + return TRUE; +} + +/* Parse destination register operand. + * <register_dst> ::= <register_file_bracket_index> `]' + */ +static boolean +parse_register_dst( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + if (!parse_register_file_bracket_index( ctx, file, index )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* Parse source register operand. + * <register_src> ::= <register_file_bracket_index> `]' | + * <register_file_bracket> <register_dst> `]' | + * <register_file_bracket> <register_dst> `+' <uint> `]' | + * <register_file_bracket> <register_dst> `-' <uint> `]' + */ +static boolean +parse_register_src( + struct translate_ctx *ctx, + uint *file, + int *index, + uint *ind_file, + int *ind_index ) +{ + const char *cur; + uint uindex; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + cur = ctx->cur; + if (parse_file( &cur, ind_file )) { + if (!parse_register_dst( ctx, ind_file, ind_index )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur == '+' || *ctx->cur == '-') { + boolean negate; + + negate = *ctx->cur == '-'; + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + if (negate) + *index = -(int) uindex; + else + *index = (int) uindex; + } + else { + *index = 0; + } + } + else { + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + *index = (int) uindex; + *ind_file = TGSI_FILE_NULL; + *ind_index = 0; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* Parse register declaration. + * <register_dcl> ::= <register_file_bracket_index> `]' | + * <register_file_bracket_index> `..' <index> `]' + */ +static boolean +parse_register_dcl( + struct translate_ctx *ctx, + uint *file, + int *first, + int *last ) +{ + if (!parse_register_file_bracket_index( ctx, file, first )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (ctx->cur[0] == '.' && ctx->cur[1] == '.') { + uint uindex; + + ctx->cur += 2; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal integer" ); + return FALSE; + } + *last = (int) uindex; + eat_opt_white( &ctx->cur ); + } + else { + *last = *first; + } + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]' or `..'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +static const char *modulate_names[TGSI_MODULATE_COUNT] = +{ + "_1X", + "_2X", + "_4X", + "_8X", + "_D2", + "_D4", + "_D8" +}; + +static boolean +parse_dst_operand( + struct translate_ctx *ctx, + struct tgsi_full_dst_register *dst ) +{ + uint file; + int index; + uint writemask; + const char *cur; + + if (!parse_register_dst( ctx, &file, &index )) + return FALSE; + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == '_') { + uint i; + + for (i = 0; i < TGSI_MODULATE_COUNT; i++) { + if (str_match_no_case( &cur, modulate_names[i] )) { + if (!is_digit_alpha_underscore( cur )) { + dst->DstRegisterExtModulate.Modulate = i; + ctx->cur = cur; + break; + } + } + } + } + + if (!parse_opt_writemask( ctx, &writemask )) + return FALSE; + + dst->DstRegister.File = file; + dst->DstRegister.Index = index; + dst->DstRegister.WriteMask = writemask; + return TRUE; +} + +static boolean +parse_optional_swizzle( + struct translate_ctx *ctx, + uint swizzle[4], + boolean *parsed_swizzle, + boolean *parsed_extswizzle ) +{ + const char *cur = ctx->cur; + + *parsed_swizzle = FALSE; + *parsed_extswizzle = FALSE; + + eat_opt_white( &cur ); + if (*cur == '.') { + uint i; + + cur++; + eat_opt_white( &cur ); + for (i = 0; i < 4; i++) { + if (uprcase( *cur ) == 'X') + swizzle[i] = TGSI_SWIZZLE_X; + else if (uprcase( *cur ) == 'Y') + swizzle[i] = TGSI_SWIZZLE_Y; + else if (uprcase( *cur ) == 'Z') + swizzle[i] = TGSI_SWIZZLE_Z; + else if (uprcase( *cur ) == 'W') + swizzle[i] = TGSI_SWIZZLE_W; + else { + if (*cur == '0') + swizzle[i] = TGSI_EXTSWIZZLE_ZERO; + else if (*cur == '1') + swizzle[i] = TGSI_EXTSWIZZLE_ONE; + else { + report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" ); + return FALSE; + } + *parsed_extswizzle = TRUE; + } + cur++; + } + *parsed_swizzle = TRUE; + ctx->cur = cur; + } + return TRUE; +} + +static boolean +parse_src_operand( + struct translate_ctx *ctx, + struct tgsi_full_src_register *src ) +{ + const char *cur; + float value; + uint file; + int index; + uint ind_file; + int ind_index; + uint swizzle[4]; + boolean parsed_swizzle; + boolean parsed_extswizzle; + + if (*ctx->cur == '-') { + cur = ctx->cur; + cur++; + eat_opt_white( &cur ); + if (*cur == '(') { + cur++; + src->SrcRegisterExtMod.Negate = 1; + eat_opt_white( &cur ); + ctx->cur = cur; + } + } + + if (*ctx->cur == '|') { + ctx->cur++; + eat_opt_white( &ctx->cur ); + src->SrcRegisterExtMod.Absolute = 1; + } + + if (*ctx->cur == '-') { + ctx->cur++; + eat_opt_white( &ctx->cur ); + src->SrcRegister.Negate = 1; + } + + cur = ctx->cur; + if (parse_float( &cur, &value )) { + if (value == 2.0f) { + eat_opt_white( &cur ); + if (*cur != '*') { + report_error( ctx, "Expected `*'" ); + return FALSE; + } + cur++; + if (*cur != '(') { + report_error( ctx, "Expected `('" ); + return FALSE; + } + cur++; + src->SrcRegisterExtMod.Scale2X = 1; + eat_opt_white( &cur ); + ctx->cur = cur; + } + } + + if (*ctx->cur == '(') { + ctx->cur++; + eat_opt_white( &ctx->cur ); + src->SrcRegisterExtMod.Bias = 1; + } + + cur = ctx->cur; + if (parse_float( &cur, &value )) { + if (value == 1.0f) { + eat_opt_white( &cur ); + if (*cur != '-') { + report_error( ctx, "Expected `-'" ); + return FALSE; + } + cur++; + if (*cur != '(') { + report_error( ctx, "Expected `('" ); + return FALSE; + } + cur++; + src->SrcRegisterExtMod.Complement = 1; + eat_opt_white( &cur ); + ctx->cur = cur; + } + } + + if (!parse_register_src( ctx, &file, &index, &ind_file, &ind_index )) + return FALSE; + src->SrcRegister.File = file; + src->SrcRegister.Index = index; + if (ind_file != TGSI_FILE_NULL) { + src->SrcRegister.Indirect = 1; + src->SrcRegisterInd.File = ind_file; + src->SrcRegisterInd.Index = ind_index; + } + + /* Parse optional swizzle. + */ + if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, &parsed_extswizzle )) { + if (parsed_extswizzle) { + assert( parsed_swizzle ); + + src->SrcRegisterExtSwz.ExtSwizzleX = swizzle[0]; + src->SrcRegisterExtSwz.ExtSwizzleY = swizzle[1]; + src->SrcRegisterExtSwz.ExtSwizzleZ = swizzle[2]; + src->SrcRegisterExtSwz.ExtSwizzleW = swizzle[3]; + } + else if (parsed_swizzle) { + src->SrcRegister.SwizzleX = swizzle[0]; + src->SrcRegister.SwizzleY = swizzle[1]; + src->SrcRegister.SwizzleZ = swizzle[2]; + src->SrcRegister.SwizzleW = swizzle[3]; + } + } + + if (src->SrcRegisterExtMod.Complement) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + } + + if (src->SrcRegisterExtMod.Bias) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '-') { + report_error( ctx, "Expected `-'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (!parse_float( &ctx->cur, &value )) { + report_error( ctx, "Expected literal floating point" ); + return FALSE; + } + if (value != 0.5f) { + report_error( ctx, "Expected 0.5" ); + return FALSE; + } + } + + if (src->SrcRegisterExtMod.Scale2X) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + } + + if (src->SrcRegisterExtMod.Absolute) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '|') { + report_error( ctx, "Expected `|'" ); + return FALSE; + } + ctx->cur++; + } + + if (src->SrcRegisterExtMod.Negate) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + } + + return TRUE; +} + +static const char *texture_names[TGSI_TEXTURE_COUNT] = +{ + "UNKNOWN", + "1D", + "2D", + "3D", + "CUBE", + "RECT", + "SHADOW1D", + "SHADOW2D", + "SHADOWRECT" +}; + +static boolean +parse_instruction( + struct translate_ctx *ctx, + boolean has_label ) +{ + uint i; + uint saturate = TGSI_SAT_NONE; + const struct tgsi_opcode_info *info; + struct tgsi_full_instruction inst; + uint advance; + + /* Parse instruction name. + */ + eat_opt_white( &ctx->cur ); + for (i = 0; i < TGSI_OPCODE_LAST; i++) { + const char *cur = ctx->cur; + + info = tgsi_get_opcode_info( i ); + if (str_match_no_case( &cur, info->mnemonic )) { + if (str_match_no_case( &cur, "_SATNV" )) + saturate = TGSI_SAT_MINUS_PLUS_ONE; + else if (str_match_no_case( &cur, "_SAT" )) + saturate = TGSI_SAT_ZERO_ONE; + + if (info->num_dst + info->num_src + info->is_tex == 0) { + if (!is_digit_alpha_underscore( cur )) { + ctx->cur = cur; + break; + } + } + else if (*cur == '\0' || eat_white( &cur )) { + ctx->cur = cur; + break; + } + } + } + if (i == TGSI_OPCODE_LAST) { + if (has_label) + report_error( ctx, "Unknown opcode" ); + else + report_error( ctx, "Expected `DCL', `IMM' or a label" ); + return FALSE; + } + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = i; + inst.Instruction.Saturate = saturate; + inst.Instruction.NumDstRegs = info->num_dst; + inst.Instruction.NumSrcRegs = info->num_src; + + /* Parse instruction operands. + */ + for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) { + if (i > 0) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ',') { + report_error( ctx, "Expected `,'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + } + + if (i < info->num_dst) { + if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] )) + return FALSE; + } + else if (i < info->num_dst + info->num_src) { + if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] )) + return FALSE; + } + else { + uint j; + + for (j = 0; j < TGSI_TEXTURE_COUNT; j++) { + if (str_match_no_case( &ctx->cur, texture_names[j] )) { + if (!is_digit_alpha_underscore( ctx->cur )) { + inst.InstructionExtTexture.Texture = j; + break; + } + } + } + if (j == TGSI_TEXTURE_COUNT) { + report_error( ctx, "Expected texture target" ); + return FALSE; + } + } + } + + if (info->is_branch) { + uint target; + + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ':') { + report_error( ctx, "Expected `:'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &target )) { + report_error( ctx, "Expected a label" ); + return FALSE; + } + inst.InstructionExtLabel.Label = target; + } + + advance = tgsi_build_full_instruction( + &inst, + ctx->tokens_cur, + ctx->header, + (uint) (ctx->tokens_end - ctx->tokens_cur) ); + if (advance == 0) + return FALSE; + ctx->tokens_cur += advance; + + return TRUE; +} + +static const char *semantic_names[TGSI_SEMANTIC_COUNT] = +{ + "POSITION", + "COLOR", + "BCOLOR", + "FOG", + "PSIZE", + "GENERIC", + "NORMAL" +}; + +static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = +{ + "CONSTANT", + "LINEAR", + "PERSPECTIVE" +}; + +static boolean parse_declaration( struct translate_ctx *ctx ) +{ + struct tgsi_full_declaration decl; + uint file; + int first; + int last; + uint writemask; + const char *cur; + uint advance; + + if (!eat_white( &ctx->cur )) { + report_error( ctx, "Syntax error" ); + return FALSE; + } + if (!parse_register_dcl( ctx, &file, &first, &last )) + return FALSE; + if (!parse_opt_writemask( ctx, &writemask )) + return FALSE; + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = file; + decl.Declaration.UsageMask = writemask; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == ',') { + uint i; + + cur++; + eat_opt_white( &cur ); + for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) { + if (str_match_no_case( &cur, semantic_names[i] )) { + const char *cur2 = cur; + uint index; + + if (is_digit_alpha_underscore( cur )) + continue; + eat_opt_white( &cur2 ); + if (*cur2 == '[') { + cur2++; + eat_opt_white( &cur2 ); + if (!parse_uint( &cur2, &index )) { + report_error( ctx, "Expected literal integer" ); + return FALSE; + } + eat_opt_white( &cur2 ); + if (*cur2 != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + cur2++; + + decl.Semantic.SemanticIndex = index; + + cur = cur2; + } + + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = i; + + ctx->cur = cur; + break; + } + } + } + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == ',') { + uint i; + + cur++; + eat_opt_white( &cur ); + for (i = 0; i < TGSI_INTERPOLATE_COUNT; i++) { + if (str_match_no_case( &cur, interpolate_names[i] )) { + if (is_digit_alpha_underscore( cur )) + continue; + decl.Declaration.Interpolate = i; + + ctx->cur = cur; + break; + } + } + if (i == TGSI_INTERPOLATE_COUNT) { + report_error( ctx, "Expected semantic or interpolate attribute" ); + return FALSE; + } + } + + advance = tgsi_build_full_declaration( + &decl, + ctx->tokens_cur, + ctx->header, + (uint) (ctx->tokens_end - ctx->tokens_cur) ); + if (advance == 0) + return FALSE; + ctx->tokens_cur += advance; + + return TRUE; +} + +static boolean parse_immediate( struct translate_ctx *ctx ) +{ + struct tgsi_full_immediate imm; + uint i; + float values[4]; + uint advance; + + if (!eat_white( &ctx->cur )) { + report_error( ctx, "Syntax error" ); + return FALSE; + } + if (!str_match_no_case( &ctx->cur, "FLT32" ) || is_digit_alpha_underscore( ctx->cur )) { + report_error( ctx, "Expected `FLT32'" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '{') { + report_error( ctx, "Expected `{'" ); + return FALSE; + } + ctx->cur++; + for (i = 0; i < 4; i++) { + eat_opt_white( &ctx->cur ); + if (i > 0) { + if (*ctx->cur != ',') { + report_error( ctx, "Expected `,'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + } + if (!parse_float( &ctx->cur, &values[i] )) { + report_error( ctx, "Expected literal floating point" ); + return FALSE; + } + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '}') { + report_error( ctx, "Expected `}'" ); + return FALSE; + } + ctx->cur++; + + imm = tgsi_default_full_immediate(); + imm.Immediate.Size += 4; + imm.Immediate.DataType = TGSI_IMM_FLOAT32; + imm.u.Pointer = values; + + advance = tgsi_build_full_immediate( + &imm, + ctx->tokens_cur, + ctx->header, + (uint) (ctx->tokens_end - ctx->tokens_cur) ); + if (advance == 0) + return FALSE; + ctx->tokens_cur += advance; + + return TRUE; +} + +static boolean translate( struct translate_ctx *ctx ) +{ + eat_opt_white( &ctx->cur ); + if (!parse_header( ctx )) + return FALSE; + + while (*ctx->cur != '\0') { + uint label_val = 0; + + if (!eat_white( &ctx->cur )) { + report_error( ctx, "Syntax error" ); + return FALSE; + } + + if (*ctx->cur == '\0') + break; + + if (parse_label( ctx, &label_val )) { + if (!parse_instruction( ctx, TRUE )) + return FALSE; + } + else if (str_match_no_case( &ctx->cur, "DCL" )) { + if (!parse_declaration( ctx )) + return FALSE; + } + else if (str_match_no_case( &ctx->cur, "IMM" )) { + if (!parse_immediate( ctx )) + return FALSE; + } + else if (!parse_instruction( ctx, FALSE )) { + return FALSE; + } + } + + return TRUE; +} + +boolean +tgsi_text_translate( + const char *text, + struct tgsi_token *tokens, + uint num_tokens ) +{ + struct translate_ctx ctx; + + ctx.text = text; + ctx.cur = text; + ctx.tokens = tokens; + ctx.tokens_cur = tokens; + ctx.tokens_end = tokens + num_tokens; + + if (!translate( &ctx )) + return FALSE; + + return tgsi_sanity_check( tokens ); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.h b/src/gallium/auxiliary/tgsi/tgsi_text.h new file mode 100644 index 0000000000..8eeeeef140 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_text.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_TEXT_H +#define TGSI_TEXT_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +boolean +tgsi_text_translate( + const char *text, + struct tgsi_token *tokens, + uint num_tokens ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_TEXT_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c new file mode 100644 index 0000000000..ea87da31e5 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c @@ -0,0 +1,200 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI program transformation utility. + * + * Authors: Brian Paul + */ + +#include "pipe/p_debug.h" + +#include "tgsi_transform.h" + + + +static void +emit_instruction(struct tgsi_transform_context *ctx, + const struct tgsi_full_instruction *inst) +{ + uint ti = ctx->ti; + + ti += tgsi_build_full_instruction(inst, + ctx->tokens_out + ti, + ctx->header, + ctx->max_tokens_out - ti); + ctx->ti = ti; +} + + +static void +emit_declaration(struct tgsi_transform_context *ctx, + const struct tgsi_full_declaration *decl) +{ + uint ti = ctx->ti; + + ti += tgsi_build_full_declaration(decl, + ctx->tokens_out + ti, + ctx->header, + ctx->max_tokens_out - ti); + ctx->ti = ti; +} + + +static void +emit_immediate(struct tgsi_transform_context *ctx, + const struct tgsi_full_immediate *imm) +{ + uint ti = ctx->ti; + + ti += tgsi_build_full_immediate(imm, + ctx->tokens_out + ti, + ctx->header, + ctx->max_tokens_out - ti); + ctx->ti = ti; +} + + + +/** + * Apply user-defined transformations to the input shader to produce + * the output shader. + * For example, a register search-and-replace operation could be applied + * by defining a transform_instruction() callback that examined and changed + * the instruction src/dest regs. + * + * \return number of tokens emitted + */ +int +tgsi_transform_shader(const struct tgsi_token *tokens_in, + struct tgsi_token *tokens_out, + uint max_tokens_out, + struct tgsi_transform_context *ctx) +{ + uint procType; + + /* input shader */ + struct tgsi_parse_context parse; + + /* output shader */ + struct tgsi_processor *processor; + + + /** + ** callback context init + **/ + ctx->emit_instruction = emit_instruction; + ctx->emit_declaration = emit_declaration; + ctx->emit_immediate = emit_immediate; + ctx->tokens_out = tokens_out; + ctx->max_tokens_out = max_tokens_out; + + + /** + ** Setup to begin parsing input shader + **/ + if (tgsi_parse_init( &parse, tokens_in ) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_transform_shader()!\n"); + return -1; + } + procType = parse.FullHeader.Processor.Processor; + assert(procType == TGSI_PROCESSOR_FRAGMENT || + procType == TGSI_PROCESSOR_VERTEX || + procType == TGSI_PROCESSOR_GEOMETRY); + + + /** + ** Setup output shader + **/ + *(struct tgsi_version *) &tokens_out[0] = tgsi_build_version(); + + ctx->header = (struct tgsi_header *) (tokens_out + 1); + *ctx->header = tgsi_build_header(); + + processor = (struct tgsi_processor *) (tokens_out + 2); + *processor = tgsi_build_processor( procType, ctx->header ); + + ctx->ti = 3; + + + /** + ** Loop over incoming program tokens/instructions + */ + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *fullinst + = &parse.FullToken.FullInstruction; + + if (ctx->transform_instruction) + ctx->transform_instruction(ctx, fullinst); + else + ctx->emit_instruction(ctx, fullinst); + } + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + { + struct tgsi_full_declaration *fulldecl + = &parse.FullToken.FullDeclaration; + + if (ctx->transform_declaration) + ctx->transform_declaration(ctx, fulldecl); + else + ctx->emit_declaration(ctx, fulldecl); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *fullimm + = &parse.FullToken.FullImmediate; + + if (ctx->transform_immediate) + ctx->transform_immediate(ctx, fullimm); + else + ctx->emit_immediate(ctx, fullimm); + } + break; + + default: + assert( 0 ); + } + } + + if (ctx->epilog) { + ctx->epilog(ctx); + } + + tgsi_parse_free (&parse); + + return ctx->ti; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h new file mode 100644 index 0000000000..a121adbaef --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -0,0 +1,92 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_TRANSFORM_H +#define TGSI_TRANSFORM_H + + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_build.h" + + + +/** + * Subclass this to add caller-specific data + */ +struct tgsi_transform_context +{ +/**** PUBLIC ***/ + + /** + * User-defined callbacks invoked per instruction. + */ + void (*transform_instruction)(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst); + + void (*transform_declaration)(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl); + + void (*transform_immediate)(struct tgsi_transform_context *ctx, + struct tgsi_full_immediate *imm); + + /** + * Called at end of input program to allow caller to append extra + * instructions. Return number of tokens emitted. + */ + void (*epilog)(struct tgsi_transform_context *ctx); + + +/*** PRIVATE ***/ + + /** + * These are setup by tgsi_transform_shader() and cannot be overridden. + * Meant to be called from in the above user callback functions. + */ + void (*emit_instruction)(struct tgsi_transform_context *ctx, + const struct tgsi_full_instruction *inst); + void (*emit_declaration)(struct tgsi_transform_context *ctx, + const struct tgsi_full_declaration *decl); + void (*emit_immediate)(struct tgsi_transform_context *ctx, + const struct tgsi_full_immediate *imm); + + struct tgsi_header *header; + uint max_tokens_out; + struct tgsi_token *tokens_out; + uint ti; +}; + + + +extern int +tgsi_transform_shader(const struct tgsi_token *tokens_in, + struct tgsi_token *tokens_out, + uint max_tokens_out, + struct tgsi_transform_context *ctx); + + +#endif /* TGSI_TRANSFORM_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c new file mode 100644 index 0000000000..50101a9bb0 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -0,0 +1,299 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi_parse.h" +#include "tgsi_build.h" +#include "tgsi_util.h" + +union pointer_hack +{ + void *pointer; + uint64_t uint64; +}; + +void * +tgsi_align_128bit( + void *unaligned ) +{ + union pointer_hack ph; + + ph.uint64 = 0; + ph.pointer = unaligned; + ph.uint64 = (ph.uint64 + 15) & ~15; + return ph.pointer; +} + +unsigned +tgsi_util_get_src_register_swizzle( + const struct tgsi_src_register *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->SwizzleX; + case 1: + return reg->SwizzleY; + case 2: + return reg->SwizzleZ; + case 3: + return reg->SwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_src_register_extswizzle( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->ExtSwizzleX; + case 1: + return reg->ExtSwizzleY; + case 2: + return reg->ExtSwizzleZ; + case 3: + return reg->ExtSwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_full_src_register_extswizzle( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned swizzle; + + /* + * First, calculate the extended swizzle for a given channel. This will give + * us either a channel index into the simple swizzle or a constant 1 or 0. + */ + swizzle = tgsi_util_get_src_register_extswizzle( + ®->SrcRegisterExtSwz, + component ); + + assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); + assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); + assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); + assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); + + /* + * Second, calculate the simple swizzle for the unswizzled channel index. + * Leave the constants intact, they are not affected by the simple swizzle. + */ + if( swizzle <= TGSI_SWIZZLE_W ) { + swizzle = tgsi_util_get_src_register_swizzle( + ®->SrcRegister, + swizzle ); + } + + return swizzle; +} + +void +tgsi_util_set_src_register_swizzle( + struct tgsi_src_register *reg, + unsigned swizzle, + unsigned component ) +{ + switch( component ) { + case 0: + reg->SwizzleX = swizzle; + break; + case 1: + reg->SwizzleY = swizzle; + break; + case 2: + reg->SwizzleZ = swizzle; + break; + case 3: + reg->SwizzleW = swizzle; + break; + default: + assert( 0 ); + } +} + +void +tgsi_util_set_src_register_extswizzle( + struct tgsi_src_register_ext_swz *reg, + unsigned swizzle, + unsigned component ) +{ + switch( component ) { + case 0: + reg->ExtSwizzleX = swizzle; + break; + case 1: + reg->ExtSwizzleY = swizzle; + break; + case 2: + reg->ExtSwizzleZ = swizzle; + break; + case 3: + reg->ExtSwizzleW = swizzle; + break; + default: + assert( 0 ); + } +} + +unsigned +tgsi_util_get_src_register_extnegate( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->NegateX; + case 1: + return reg->NegateY; + case 2: + return reg->NegateZ; + case 3: + return reg->NegateW; + default: + assert( 0 ); + } + return 0; +} + +void +tgsi_util_set_src_register_extnegate( + struct tgsi_src_register_ext_swz *reg, + unsigned negate, + unsigned component ) +{ + switch( component ) { + case 0: + reg->NegateX = negate; + break; + case 1: + reg->NegateY = negate; + break; + case 2: + reg->NegateZ = negate; + break; + case 3: + reg->NegateW = negate; + break; + default: + assert( 0 ); + } +} + +unsigned +tgsi_util_get_full_src_register_sign_mode( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned sign_mode; + + if( reg->SrcRegisterExtMod.Absolute ) { + /* Consider only the post-abs negation. */ + + if( reg->SrcRegisterExtMod.Negate ) { + sign_mode = TGSI_UTIL_SIGN_SET; + } + else { + sign_mode = TGSI_UTIL_SIGN_CLEAR; + } + } + else { + /* Accumulate the three negations. */ + + unsigned negate; + + negate = reg->SrcRegister.Negate; + if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { + negate = !negate; + } + if( reg->SrcRegisterExtMod.Negate ) { + negate = !negate; + } + + if( negate ) { + sign_mode = TGSI_UTIL_SIGN_TOGGLE; + } + else { + sign_mode = TGSI_UTIL_SIGN_KEEP; + } + } + + return sign_mode; +} + +void +tgsi_util_set_full_src_register_sign_mode( + struct tgsi_full_src_register *reg, + unsigned sign_mode ) +{ + reg->SrcRegisterExtSwz.NegateX = 0; + reg->SrcRegisterExtSwz.NegateY = 0; + reg->SrcRegisterExtSwz.NegateZ = 0; + reg->SrcRegisterExtSwz.NegateW = 0; + + switch (sign_mode) + { + case TGSI_UTIL_SIGN_CLEAR: + reg->SrcRegister.Negate = 0; + reg->SrcRegisterExtMod.Absolute = 1; + reg->SrcRegisterExtMod.Negate = 0; + break; + + case TGSI_UTIL_SIGN_SET: + reg->SrcRegister.Negate = 0; + reg->SrcRegisterExtMod.Absolute = 1; + reg->SrcRegisterExtMod.Negate = 1; + break; + + case TGSI_UTIL_SIGN_TOGGLE: + reg->SrcRegister.Negate = 1; + reg->SrcRegisterExtMod.Absolute = 0; + reg->SrcRegisterExtMod.Negate = 0; + break; + + case TGSI_UTIL_SIGN_KEEP: + reg->SrcRegister.Negate = 0; + reg->SrcRegisterExtMod.Absolute = 0; + reg->SrcRegisterExtMod.Negate = 0; + break; + + default: + assert( 0 ); + } +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h new file mode 100644 index 0000000000..7877f34558 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -0,0 +1,96 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_UTIL_H +#define TGSI_UTIL_H + +#if defined __cplusplus +extern "C" { +#endif + +void * +tgsi_align_128bit( + void *unaligned ); + +unsigned +tgsi_util_get_src_register_swizzle( + const struct tgsi_src_register *reg, + unsigned component ); + +unsigned +tgsi_util_get_src_register_extswizzle( + const struct tgsi_src_register_ext_swz *reg, + unsigned component); + +unsigned +tgsi_util_get_full_src_register_extswizzle( + const struct tgsi_full_src_register *reg, + unsigned component ); + +void +tgsi_util_set_src_register_swizzle( + struct tgsi_src_register *reg, + unsigned swizzle, + unsigned component ); + +void +tgsi_util_set_src_register_extswizzle( + struct tgsi_src_register_ext_swz *reg, + unsigned swizzle, + unsigned component ); + +unsigned +tgsi_util_get_src_register_extnegate( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ); + +void +tgsi_util_set_src_register_extnegate( + struct tgsi_src_register_ext_swz *reg, + unsigned negate, + unsigned component ); + +#define TGSI_UTIL_SIGN_CLEAR 0 /* Force positive */ +#define TGSI_UTIL_SIGN_SET 1 /* Force negative */ +#define TGSI_UTIL_SIGN_TOGGLE 2 /* Negate */ +#define TGSI_UTIL_SIGN_KEEP 3 /* No change */ + +unsigned +tgsi_util_get_full_src_register_sign_mode( + const struct tgsi_full_src_register *reg, + unsigned component ); + +void +tgsi_util_set_full_src_register_sign_mode( + struct tgsi_full_src_register *reg, + unsigned sign_mode ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_UTIL_H */ diff --git a/src/gallium/auxiliary/translate/Makefile b/src/gallium/auxiliary/translate/Makefile new file mode 100644 index 0000000000..ad2a5b705e --- /dev/null +++ b/src/gallium/auxiliary/translate/Makefile @@ -0,0 +1,15 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = translate + +C_SOURCES = \ + translate_generic.c \ + translate_sse.c \ + translate.c \ + translate_cache.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/translate/SConscript b/src/gallium/auxiliary/translate/SConscript new file mode 100644 index 0000000000..9553a67537 --- /dev/null +++ b/src/gallium/auxiliary/translate/SConscript @@ -0,0 +1,12 @@ +Import('*') + +translate = env.ConvenienceLibrary( + target = 'translate', + source = [ + 'translate_generic.c', + 'translate_sse.c', + 'translate.c', + 'translate_cache.c', + ]) + +auxiliaries.insert(0, translate) diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c new file mode 100644 index 0000000000..7678903f75 --- /dev/null +++ b/src/gallium/auxiliary/translate/translate.c @@ -0,0 +1,48 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_config.h" +#include "pipe/p_state.h" +#include "translate.h" + +struct translate *translate_create( const struct translate_key *key ) +{ + struct translate *translate = NULL; + +#if defined(PIPE_ARCH_X86) + translate = translate_sse2_create( key ); + if (translate) + return translate; +#endif + + return translate_generic_create( key ); +} diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h new file mode 100644 index 0000000000..650cd81fa6 --- /dev/null +++ b/src/gallium/auxiliary/translate/translate.h @@ -0,0 +1,127 @@ +/* + * Copyright 2008 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/** + * Vertex fetch/store/convert code. This functionality is used in two places: + * 1. Vertex fetch/convert - to grab vertex data from incoming vertex + * arrays and convert to format needed by vertex shaders. + * 2. Vertex store/emit - to convert simple float[][4] vertex attributes + * (which is the organization used throughout the draw/prim pipeline) to + * hardware-specific formats and emit into hardware vertex buffers. + * + * + * Authors: + * Keith Whitwell <keithw@tungstengraphics.com> + */ + +#ifndef _TRANSLATE_H +#define _TRANSLATE_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" + +struct translate_element +{ + enum pipe_format input_format; + enum pipe_format output_format; + unsigned input_buffer; + unsigned input_offset; /* can't really reduce the size of these */ + unsigned output_offset; +}; + + +struct translate_key { + unsigned output_stride; + unsigned nr_elements; + struct translate_element element[PIPE_MAX_ATTRIBS + 1]; +}; + + +struct translate { + struct translate_key key; + + void (*release)( struct translate * ); + + void (*set_buffer)( struct translate *, + unsigned i, + const void *ptr, + unsigned stride ); + + void (PIPE_CDECL *run_elts)( struct translate *, + const unsigned *elts, + unsigned count, + void *output_buffer); + + void (PIPE_CDECL *run)( struct translate *, + unsigned start, + unsigned count, + void *output_buffer); +}; + + + +#if 0 +struct translate_context *translate_context_create( void ); +void translate_context_destroy( struct translate_context * ); + +struct translate *translate_lookup_or_create( struct translate_context *tctx, + const struct translate_key *key ); +#endif + + +struct translate *translate_create( const struct translate_key *key ); + +static INLINE int translate_keysize( const struct translate_key *key ) +{ + return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element); +} + +static INLINE int translate_key_compare( const struct translate_key *a, + const struct translate_key *b ) +{ + int keysize = translate_keysize(a); + return memcmp(a, b, keysize); +} + + +static INLINE void translate_key_sanitize( struct translate_key *a ) +{ + int keysize = translate_keysize(a); + char *ptr = (char *)a; + memset(ptr + keysize, 0, sizeof(*a) - keysize); +} + + +/******************************************************************************* + * Private: + */ +struct translate *translate_sse2_create( const struct translate_key *key ); + +struct translate *translate_generic_create( const struct translate_key *key ); + + +#endif diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c new file mode 100644 index 0000000000..d8069a149c --- /dev/null +++ b/src/gallium/auxiliary/translate/translate_cache.c @@ -0,0 +1,102 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "pipe/p_state.h" +#include "translate.h" +#include "translate_cache.h" + +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" + +struct translate_cache { + struct cso_hash *hash; +}; + +struct translate_cache * translate_cache_create( void ) +{ + struct translate_cache *cache = MALLOC_STRUCT(translate_cache); + cache->hash = cso_hash_create(); + return cache; +} + + +static INLINE void delete_translates(struct translate_cache *cache) +{ + struct cso_hash *hash = cache->hash; + struct cso_hash_iter iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + struct translate *state = (struct translate*)cso_hash_iter_data(iter); + iter = cso_hash_iter_next(iter); + if (state) { + state->release(state); + } + } +} + +void translate_cache_destroy(struct translate_cache *cache) +{ + delete_translates(cache); + cso_hash_delete(cache->hash); + FREE(cache); +} + + +static INLINE unsigned translate_hash_key_size(struct translate_key *key) +{ + unsigned size = sizeof(struct translate_key) - + sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements); + return size; +} + +static INLINE unsigned create_key(struct translate_key *key) +{ + unsigned hash_key; + unsigned size = translate_hash_key_size(key); + /*debug_printf("key size = %d, (els = %d)\n", + size, key->nr_elements);*/ + hash_key = cso_construct_key(key, size); + return hash_key; +} + +struct translate * translate_cache_find(struct translate_cache *cache, + struct translate_key *key) +{ + unsigned hash_key = create_key(key); + struct translate *translate = (struct translate*) + cso_hash_find_data_from_template(cache->hash, + hash_key, + key, sizeof(*key)); + + if (!translate) { + /* create/insert */ + translate = translate_create(key); + cso_hash_insert(cache->hash, hash_key, translate); + } + + return translate; +} diff --git a/src/gallium/auxiliary/translate/translate_cache.h b/src/gallium/auxiliary/translate/translate_cache.h new file mode 100644 index 0000000000..7dba871e57 --- /dev/null +++ b/src/gallium/auxiliary/translate/translate_cache.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _TRANSLATE_CACHE_H +#define _TRANSLATE_CACHE_H + + +/******************************************************************************* + * Translate cache. + * Simply used to cache created translates. Avoids unecessary creation of + * translate's if one suitable for a given translate_key has already been + * created. + * + * Note: this functionality depends and requires the CSO module. + */ +struct translate_cache; + +struct translate_key; +struct translate; + +struct translate_cache *translate_cache_create( void ); +void translate_cache_destroy(struct translate_cache *cache); + +/** + * Will try to find a translate structure matched by the given key. + * If such a structure doesn't exist in the cache the function + * will automatically create it, insert it in the cache and + * return the created version. + * + */ +struct translate *translate_cache_find(struct translate_cache *cache, + struct translate_key *key); + +#endif diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c new file mode 100644 index 0000000000..8d39b64c6c --- /dev/null +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -0,0 +1,700 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" +#include "pipe/p_state.h" +#include "translate.h" + + +#define DRAW_DBG 0 + +typedef void (*fetch_func)(const void *ptr, float *attrib); +typedef void (*emit_func)(const float *attrib, void *ptr); + + + +struct translate_generic { + struct translate translate; + + struct { + fetch_func fetch; + unsigned buffer; + unsigned input_offset; + + emit_func emit; + unsigned output_offset; + + char *input_ptr; + unsigned input_stride; + + } attrib[PIPE_MAX_ATTRIBS]; + + unsigned nr_attrib; +}; + + +static struct translate_generic *translate_generic( struct translate *translate ) +{ + return (struct translate_generic *)translate; +} + +/** + * Fetch a float[4] vertex attribute from memory, doing format/type + * conversion as needed. + * + * This is probably needed/dupliocated elsewhere, eg format + * conversion, texture sampling etc. + */ +#define ATTRIB( NAME, SZ, TYPE, FROM, TO ) \ +static void \ +fetch_##NAME(const void *ptr, float *attrib) \ +{ \ + const float defaults[4] = { 0.0f,0.0f,0.0f,1.0f }; \ + unsigned i; \ + \ + for (i = 0; i < SZ; i++) { \ + attrib[i] = FROM(i); \ + } \ + \ + for (; i < 4; i++) { \ + attrib[i] = defaults[i]; \ + } \ +} \ + \ +static void \ +emit_##NAME(const float *attrib, void *ptr) \ +{ \ + unsigned i; \ + TYPE *out = (TYPE *)ptr; \ + \ + for (i = 0; i < SZ; i++) { \ + out[i] = TO(attrib[i]); \ + } \ +} + + +#define FROM_64_FLOAT(i) ((float) ((double *) ptr)[i]) +#define FROM_32_FLOAT(i) (((float *) ptr)[i]) + +#define FROM_8_USCALED(i) ((float) ((unsigned char *) ptr)[i]) +#define FROM_16_USCALED(i) ((float) ((unsigned short *) ptr)[i]) +#define FROM_32_USCALED(i) ((float) ((unsigned int *) ptr)[i]) + +#define FROM_8_SSCALED(i) ((float) ((char *) ptr)[i]) +#define FROM_16_SSCALED(i) ((float) ((short *) ptr)[i]) +#define FROM_32_SSCALED(i) ((float) ((int *) ptr)[i]) + +#define FROM_8_UNORM(i) ((float) ((unsigned char *) ptr)[i] / 255.0f) +#define FROM_16_UNORM(i) ((float) ((unsigned short *) ptr)[i] / 65535.0f) +#define FROM_32_UNORM(i) ((float) ((unsigned int *) ptr)[i] / 4294967295.0f) + +#define FROM_8_SNORM(i) ((float) ((char *) ptr)[i] / 127.0f) +#define FROM_16_SNORM(i) ((float) ((short *) ptr)[i] / 32767.0f) +#define FROM_32_SNORM(i) ((float) ((int *) ptr)[i] / 2147483647.0f) + +#define FROM_32_FIXED(i) (((int *) ptr)[i] / 65536.0f) + +#define TO_64_FLOAT(x) ((double) x) +#define TO_32_FLOAT(x) (x) + +#define TO_8_USCALED(x) ((unsigned char) x) +#define TO_16_USCALED(x) ((unsigned short) x) +#define TO_32_USCALED(x) ((unsigned int) x) + +#define TO_8_SSCALED(x) ((char) x) +#define TO_16_SSCALED(x) ((short) x) +#define TO_32_SSCALED(x) ((int) x) + +#define TO_8_UNORM(x) ((unsigned char) (x * 255.0f)) +#define TO_16_UNORM(x) ((unsigned short) (x * 65535.0f)) +#define TO_32_UNORM(x) ((unsigned int) (x * 4294967295.0f)) + +#define TO_8_SNORM(x) ((char) (x * 127.0f)) +#define TO_16_SNORM(x) ((short) (x * 32767.0f)) +#define TO_32_SNORM(x) ((int) (x * 2147483647.0f)) + +#define TO_32_FIXED(x) ((int) (x * 65536.0f)) + + + +ATTRIB( R64G64B64A64_FLOAT, 4, double, FROM_64_FLOAT, TO_64_FLOAT ) +ATTRIB( R64G64B64_FLOAT, 3, double, FROM_64_FLOAT, TO_64_FLOAT ) +ATTRIB( R64G64_FLOAT, 2, double, FROM_64_FLOAT, TO_64_FLOAT ) +ATTRIB( R64_FLOAT, 1, double, FROM_64_FLOAT, TO_64_FLOAT ) + +ATTRIB( R32G32B32A32_FLOAT, 4, float, FROM_32_FLOAT, TO_32_FLOAT ) +ATTRIB( R32G32B32_FLOAT, 3, float, FROM_32_FLOAT, TO_32_FLOAT ) +ATTRIB( R32G32_FLOAT, 2, float, FROM_32_FLOAT, TO_32_FLOAT ) +ATTRIB( R32_FLOAT, 1, float, FROM_32_FLOAT, TO_32_FLOAT ) + +ATTRIB( R32G32B32A32_USCALED, 4, unsigned, FROM_32_USCALED, TO_32_USCALED ) +ATTRIB( R32G32B32_USCALED, 3, unsigned, FROM_32_USCALED, TO_32_USCALED ) +ATTRIB( R32G32_USCALED, 2, unsigned, FROM_32_USCALED, TO_32_USCALED ) +ATTRIB( R32_USCALED, 1, unsigned, FROM_32_USCALED, TO_32_USCALED ) + +ATTRIB( R32G32B32A32_SSCALED, 4, int, FROM_32_SSCALED, TO_32_SSCALED ) +ATTRIB( R32G32B32_SSCALED, 3, int, FROM_32_SSCALED, TO_32_SSCALED ) +ATTRIB( R32G32_SSCALED, 2, int, FROM_32_SSCALED, TO_32_SSCALED ) +ATTRIB( R32_SSCALED, 1, int, FROM_32_SSCALED, TO_32_SSCALED ) + +ATTRIB( R32G32B32A32_UNORM, 4, unsigned, FROM_32_UNORM, TO_32_UNORM ) +ATTRIB( R32G32B32_UNORM, 3, unsigned, FROM_32_UNORM, TO_32_UNORM ) +ATTRIB( R32G32_UNORM, 2, unsigned, FROM_32_UNORM, TO_32_UNORM ) +ATTRIB( R32_UNORM, 1, unsigned, FROM_32_UNORM, TO_32_UNORM ) + +ATTRIB( R32G32B32A32_SNORM, 4, int, FROM_32_SNORM, TO_32_SNORM ) +ATTRIB( R32G32B32_SNORM, 3, int, FROM_32_SNORM, TO_32_SNORM ) +ATTRIB( R32G32_SNORM, 2, int, FROM_32_SNORM, TO_32_SNORM ) +ATTRIB( R32_SNORM, 1, int, FROM_32_SNORM, TO_32_SNORM ) + +ATTRIB( R16G16B16A16_USCALED, 4, ushort, FROM_16_USCALED, TO_16_USCALED ) +ATTRIB( R16G16B16_USCALED, 3, ushort, FROM_16_USCALED, TO_16_USCALED ) +ATTRIB( R16G16_USCALED, 2, ushort, FROM_16_USCALED, TO_16_USCALED ) +ATTRIB( R16_USCALED, 1, ushort, FROM_16_USCALED, TO_16_USCALED ) + +ATTRIB( R16G16B16A16_SSCALED, 4, short, FROM_16_SSCALED, TO_16_SSCALED ) +ATTRIB( R16G16B16_SSCALED, 3, short, FROM_16_SSCALED, TO_16_SSCALED ) +ATTRIB( R16G16_SSCALED, 2, short, FROM_16_SSCALED, TO_16_SSCALED ) +ATTRIB( R16_SSCALED, 1, short, FROM_16_SSCALED, TO_16_SSCALED ) + +ATTRIB( R16G16B16A16_UNORM, 4, ushort, FROM_16_UNORM, TO_16_UNORM ) +ATTRIB( R16G16B16_UNORM, 3, ushort, FROM_16_UNORM, TO_16_UNORM ) +ATTRIB( R16G16_UNORM, 2, ushort, FROM_16_UNORM, TO_16_UNORM ) +ATTRIB( R16_UNORM, 1, ushort, FROM_16_UNORM, TO_16_UNORM ) + +ATTRIB( R16G16B16A16_SNORM, 4, short, FROM_16_SNORM, TO_16_SNORM ) +ATTRIB( R16G16B16_SNORM, 3, short, FROM_16_SNORM, TO_16_SNORM ) +ATTRIB( R16G16_SNORM, 2, short, FROM_16_SNORM, TO_16_SNORM ) +ATTRIB( R16_SNORM, 1, short, FROM_16_SNORM, TO_16_SNORM ) + +ATTRIB( R8G8B8A8_USCALED, 4, ubyte, FROM_8_USCALED, TO_8_USCALED ) +ATTRIB( R8G8B8_USCALED, 3, ubyte, FROM_8_USCALED, TO_8_USCALED ) +ATTRIB( R8G8_USCALED, 2, ubyte, FROM_8_USCALED, TO_8_USCALED ) +ATTRIB( R8_USCALED, 1, ubyte, FROM_8_USCALED, TO_8_USCALED ) + +ATTRIB( R8G8B8A8_SSCALED, 4, char, FROM_8_SSCALED, TO_8_SSCALED ) +ATTRIB( R8G8B8_SSCALED, 3, char, FROM_8_SSCALED, TO_8_SSCALED ) +ATTRIB( R8G8_SSCALED, 2, char, FROM_8_SSCALED, TO_8_SSCALED ) +ATTRIB( R8_SSCALED, 1, char, FROM_8_SSCALED, TO_8_SSCALED ) + +ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) +ATTRIB( R8G8B8_UNORM, 3, ubyte, FROM_8_UNORM, TO_8_UNORM ) +ATTRIB( R8G8_UNORM, 2, ubyte, FROM_8_UNORM, TO_8_UNORM ) +ATTRIB( R8_UNORM, 1, ubyte, FROM_8_UNORM, TO_8_UNORM ) + +ATTRIB( R8G8B8A8_SNORM, 4, char, FROM_8_SNORM, TO_8_SNORM ) +ATTRIB( R8G8B8_SNORM, 3, char, FROM_8_SNORM, TO_8_SNORM ) +ATTRIB( R8G8_SNORM, 2, char, FROM_8_SNORM, TO_8_SNORM ) +ATTRIB( R8_SNORM, 1, char, FROM_8_SNORM, TO_8_SNORM ) + +ATTRIB( A8R8G8B8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) +//ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) + +ATTRIB( R32G32B32A32_FIXED, 4, int, FROM_32_FIXED, TO_32_FIXED ) +ATTRIB( R32G32B32_FIXED, 3, int, FROM_32_FIXED, TO_32_FIXED ) +ATTRIB( R32G32_FIXED, 2, int, FROM_32_FIXED, TO_32_FIXED ) +ATTRIB( R32_FIXED, 1, int, FROM_32_FIXED, TO_32_FIXED ) + + + +static void +fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib) +{ + attrib[2] = FROM_8_UNORM(0); + attrib[1] = FROM_8_UNORM(1); + attrib[0] = FROM_8_UNORM(2); + attrib[3] = FROM_8_UNORM(3); +} + +static void +emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) +{ + ubyte *out = (ubyte *)ptr; + out[2] = TO_8_UNORM(attrib[0]); + out[1] = TO_8_UNORM(attrib[1]); + out[0] = TO_8_UNORM(attrib[2]); + out[3] = TO_8_UNORM(attrib[3]); +} + +static void +fetch_NULL( const void *ptr, float *attrib ) +{ + attrib[0] = 0; + attrib[1] = 0; + attrib[2] = 0; + attrib[3] = 1; +} + +static void +emit_NULL( const float *attrib, void *ptr ) +{ + /* do nothing is the only sensible option */ +} + +static fetch_func get_fetch_func( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return &fetch_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return &fetch_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return &fetch_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return &fetch_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return &fetch_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return &fetch_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return &fetch_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return &fetch_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return &fetch_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return &fetch_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return &fetch_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return &fetch_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return &fetch_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return &fetch_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return &fetch_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return &fetch_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return &fetch_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return &fetch_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return &fetch_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return &fetch_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return &fetch_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return &fetch_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return &fetch_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return &fetch_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return &fetch_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return &fetch_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return &fetch_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return &fetch_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return &fetch_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return &fetch_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return &fetch_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return &fetch_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return &fetch_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return &fetch_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return &fetch_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return &fetch_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return &fetch_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return &fetch_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return &fetch_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return &fetch_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return &fetch_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return &fetch_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return &fetch_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return &fetch_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return &fetch_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return &fetch_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return &fetch_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return &fetch_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return &fetch_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return &fetch_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return &fetch_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return &fetch_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return &fetch_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return &fetch_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return &fetch_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return &fetch_R8G8B8A8_SSCALED; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return &fetch_A8R8G8B8_UNORM; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + return &fetch_B8G8R8A8_UNORM; + + case PIPE_FORMAT_R32_FIXED: + return &fetch_R32_FIXED; + case PIPE_FORMAT_R32G32_FIXED: + return &fetch_R32G32_FIXED; + case PIPE_FORMAT_R32G32B32_FIXED: + return &fetch_R32G32B32_FIXED; + case PIPE_FORMAT_R32G32B32A32_FIXED: + return &fetch_R32G32B32A32_FIXED; + + default: + assert(0); + return &fetch_NULL; + } +} + + + + +static emit_func get_emit_func( enum pipe_format format ) +{ + /* silence warnings */ + (void) emit_R32G32B32A32_FIXED; + (void) emit_R32G32B32_FIXED; + (void) emit_R32G32_FIXED; + (void) emit_R32_FIXED; + + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return &emit_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return &emit_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return &emit_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return &emit_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return &emit_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return &emit_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return &emit_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return &emit_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return &emit_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return &emit_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return &emit_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return &emit_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return &emit_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return &emit_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return &emit_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return &emit_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return &emit_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return &emit_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return &emit_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return &emit_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return &emit_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return &emit_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return &emit_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return &emit_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return &emit_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return &emit_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return &emit_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return &emit_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return &emit_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return &emit_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return &emit_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return &emit_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return &emit_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return &emit_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return &emit_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return &emit_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return &emit_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return &emit_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return &emit_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return &emit_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return &emit_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return &emit_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return &emit_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return &emit_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return &emit_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return &emit_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return &emit_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return &emit_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return &emit_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return &emit_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return &emit_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return &emit_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return &emit_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return &emit_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return &emit_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return &emit_R8G8B8A8_SSCALED; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return &emit_A8R8G8B8_UNORM; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + return &emit_B8G8R8A8_UNORM; + + default: + assert(0); + return &emit_NULL; + } +} + + + +/** + * Fetch vertex attributes for 'count' vertices. + */ +static void PIPE_CDECL generic_run_elts( struct translate *translate, + const unsigned *elts, + unsigned count, + void *output_buffer ) +{ + struct translate_generic *tg = translate_generic(translate); + char *vert = output_buffer; + unsigned nr_attrs = tg->nr_attrib; + unsigned attr; + unsigned i; + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + unsigned elt = *elts++; + + for (attr = 0; attr < nr_attrs; attr++) { + float data[4]; + + const char *src = (tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt); + + char *dst = (vert + + tg->attrib[attr].output_offset); + + tg->attrib[attr].fetch( src, data ); + + if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", + i, elt, attr, data[0], data[1], data[2], data[3]); + + tg->attrib[attr].emit( data, dst ); + } + + vert += tg->translate.key.output_stride; + } +} + + + +static void PIPE_CDECL generic_run( struct translate *translate, + unsigned start, + unsigned count, + void *output_buffer ) +{ + struct translate_generic *tg = translate_generic(translate); + char *vert = output_buffer; + unsigned nr_attrs = tg->nr_attrib; + unsigned attr; + unsigned i; + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + unsigned elt = start + i; + + for (attr = 0; attr < nr_attrs; attr++) { + float data[4]; + + const char *src = (tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt); + + char *dst = (vert + + tg->attrib[attr].output_offset); + + tg->attrib[attr].fetch( src, data ); + + if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", + i, attr, data[0], data[1], data[2], data[3]); + + tg->attrib[attr].emit( data, dst ); + } + + vert += tg->translate.key.output_stride; + } +} + + + +static void generic_set_buffer( struct translate *translate, + unsigned buf, + const void *ptr, + unsigned stride ) +{ + struct translate_generic *tg = translate_generic(translate); + unsigned i; + + for (i = 0; i < tg->nr_attrib; i++) { + if (tg->attrib[i].buffer == buf) { + tg->attrib[i].input_ptr = ((char *)ptr + + tg->attrib[i].input_offset); + tg->attrib[i].input_stride = stride; + } + } +} + + +static void generic_release( struct translate *translate ) +{ + /* Refcount? + */ + FREE(translate); +} + +struct translate *translate_generic_create( const struct translate_key *key ) +{ + struct translate_generic *tg = CALLOC_STRUCT(translate_generic); + unsigned i; + + if (tg == NULL) + return NULL; + + tg->translate.key = *key; + tg->translate.release = generic_release; + tg->translate.set_buffer = generic_set_buffer; + tg->translate.run_elts = generic_run_elts; + tg->translate.run = generic_run; + + for (i = 0; i < key->nr_elements; i++) { + + tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); + tg->attrib[i].buffer = key->element[i].input_buffer; + tg->attrib[i].input_offset = key->element[i].input_offset; + + tg->attrib[i].emit = get_emit_func(key->element[i].output_format); + tg->attrib[i].output_offset = key->element[i].output_offset; + + } + + tg->nr_attrib = key->nr_elements; + + + return &tg->translate; +} diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c new file mode 100644 index 0000000000..7955186e16 --- /dev/null +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -0,0 +1,614 @@ +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keithw@tungstengraphics.com> + */ + + +#include "pipe/p_config.h" +#include "pipe/p_compiler.h" +#include "util/u_memory.h" +#include "util/u_simple_list.h" + +#include "translate.h" + + +#if defined(PIPE_ARCH_X86) + +#include "rtasm/rtasm_cpu.h" +#include "rtasm/rtasm_x86sse.h" + + +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 + + +typedef void (PIPE_CDECL *run_func)( struct translate *translate, + unsigned start, + unsigned count, + void *output_buffer ); + +typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, + const unsigned *elts, + unsigned count, + void *output_buffer ); + + + +struct translate_sse { + struct translate translate; + + struct x86_function linear_func; + struct x86_function elt_func; + struct x86_function *func; + + boolean loaded_identity; + boolean loaded_255; + boolean loaded_inv_255; + + float identity[4]; + float float_255[4]; + float inv_255[4]; + + struct { + char *input_ptr; + unsigned input_stride; + } attrib[PIPE_MAX_ATTRIBS]; + + run_func gen_run; + run_elts_func gen_run_elts; + +}; + +static int get_offset( const void *a, const void *b ) +{ + return (const char *)b - (const char *)a; +} + + + +static struct x86_reg get_identity( struct translate_sse *p ) +{ + struct x86_reg reg = x86_make_reg(file_XMM, 6); + + if (!p->loaded_identity) { + /* Nasty: + */ + struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); + + p->loaded_identity = TRUE; + p->identity[0] = 0; + p->identity[1] = 0; + p->identity[2] = 0; + p->identity[3] = 1; + + sse_movups(p->func, reg, + x86_make_disp(translateESI, + get_offset(p, &p->identity[0]))); + } + + return reg; +} + +static struct x86_reg get_255( struct translate_sse *p ) +{ + struct x86_reg reg = x86_make_reg(file_XMM, 6); + + if (!p->loaded_255) { + struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); + + p->loaded_255 = TRUE; + p->float_255[0] = + p->float_255[1] = + p->float_255[2] = + p->float_255[3] = 255.0f; + + sse_movups(p->func, reg, + x86_make_disp(translateESI, + get_offset(p, &p->float_255[0]))); + } + + return reg; + return x86_make_reg(file_XMM, 7); +} + +static struct x86_reg get_inv_255( struct translate_sse *p ) +{ + struct x86_reg reg = x86_make_reg(file_XMM, 5); + + if (!p->loaded_inv_255) { + struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); + + p->loaded_inv_255 = TRUE; + p->inv_255[0] = + p->inv_255[1] = + p->inv_255[2] = + p->inv_255[3] = 1.0f / 255.0f; + + sse_movups(p->func, reg, + x86_make_disp(translateESI, + get_offset(p, &p->inv_255[0]))); + } + + return reg; +} + + +static void emit_load_R32G32B32A32( struct translate_sse *p, + struct x86_reg data, + struct x86_reg arg0 ) +{ + sse_movups(p->func, data, arg0); +} + +static void emit_load_R32G32B32( struct translate_sse *p, + struct x86_reg data, + struct x86_reg arg0 ) +{ + /* Have to jump through some hoops: + * + * c 0 0 0 + * c 0 0 1 + * 0 0 c 1 + * a b c 1 + */ + sse_movss(p->func, data, x86_make_disp(arg0, 8)); + sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) ); + sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) ); + sse_movlps(p->func, data, arg0); +} + +static void emit_load_R32G32( struct translate_sse *p, + struct x86_reg data, + struct x86_reg arg0 ) +{ + /* 0 0 0 1 + * a b 0 1 + */ + sse_movups(p->func, data, get_identity(p) ); + sse_movlps(p->func, data, arg0); +} + + +static void emit_load_R32( struct translate_sse *p, + struct x86_reg data, + struct x86_reg arg0 ) +{ + /* a 0 0 0 + * a 0 0 1 + */ + sse_movss(p->func, data, arg0); + sse_orps(p->func, data, get_identity(p) ); +} + + +static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p, + struct x86_reg data, + struct x86_reg src ) +{ + + /* Load and unpack twice: + */ + sse_movss(p->func, data, src); + sse2_punpcklbw(p->func, data, get_identity(p)); + sse2_punpcklbw(p->func, data, get_identity(p)); + + /* Convert to float: + */ + sse2_cvtdq2ps(p->func, data, data); + + + /* Scale by 1/255.0 + */ + sse_mulps(p->func, data, get_inv_255(p)); +} + + + + +static void emit_store_R32G32B32A32( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + sse_movups(p->func, dest, dataXMM); +} + +static void emit_store_R32G32B32( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + /* Emit two, shuffle, emit one. + */ + sse_movlps(p->func, dest, dataXMM); + sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ + sse_movss(p->func, x86_make_disp(dest,8), dataXMM); +} + +static void emit_store_R32G32( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + sse_movlps(p->func, dest, dataXMM); +} + +static void emit_store_R32( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + sse_movss(p->func, dest, dataXMM); +} + + + +static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + /* Scale by 255.0 + */ + sse_mulps(p->func, dataXMM, get_255(p)); + + /* Pack and emit: + */ + sse2_cvtps2dq(p->func, dataXMM, dataXMM); + sse2_packssdw(p->func, dataXMM, dataXMM); + sse2_packuswb(p->func, dataXMM, dataXMM); + sse_movss(p->func, dest, dataXMM); +} + + + + + +static void get_src_ptr( struct translate_sse *p, + struct x86_reg srcEAX, + struct x86_reg translateREG, + struct x86_reg eltREG, + unsigned a ) +{ + struct x86_reg input_ptr = + x86_make_disp(translateREG, + get_offset(p, &p->attrib[a].input_ptr)); + + struct x86_reg input_stride = + x86_make_disp(translateREG, + get_offset(p, &p->attrib[a].input_stride)); + + /* Calculate pointer to current attrib: + */ + x86_mov(p->func, srcEAX, input_stride); + x86_imul(p->func, srcEAX, eltREG); + x86_add(p->func, srcEAX, input_ptr); +} + + +/* Extended swizzles? Maybe later. + */ +static void emit_swizzle( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg src, + unsigned char shuffle ) +{ + sse_shufps(p->func, dest, src, shuffle); +} + + +static boolean translate_attr( struct translate_sse *p, + const struct translate_element *a, + struct x86_reg srcECX, + struct x86_reg dstEAX) +{ + struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); + + switch (a->input_format) { + case PIPE_FORMAT_R32_FLOAT: + emit_load_R32(p, dataXMM, srcECX); + break; + case PIPE_FORMAT_R32G32_FLOAT: + emit_load_R32G32(p, dataXMM, srcECX); + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + emit_load_R32G32B32(p, dataXMM, srcECX); + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + emit_load_R32G32B32A32(p, dataXMM, srcECX); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); + emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); + break; + default: + return FALSE; + } + + switch (a->output_format) { + case PIPE_FORMAT_R32_FLOAT: + emit_store_R32(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_R32G32_FLOAT: + emit_store_R32G32(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + emit_store_R32G32B32(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + emit_store_R32G32B32A32(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); + emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); + break; + default: + return FALSE; + } + + return TRUE; +} + +/* Build run( struct translate *translate, + * unsigned start, + * unsigned count, + * void *output_buffer ) + * or + * run_elts( struct translate *translate, + * unsigned *elts, + * unsigned count, + * void *output_buffer ) + * + * Lots of hardcoding + * + * EAX -- pointer to current output vertex + * ECX -- pointer to current attribute + * + */ +static boolean build_vertex_emit( struct translate_sse *p, + struct x86_function *func, + boolean linear ) +{ + struct x86_reg vertexECX = x86_make_reg(file_REG32, reg_AX); + struct x86_reg idxEBX = x86_make_reg(file_REG32, reg_BX); + struct x86_reg srcEAX = x86_make_reg(file_REG32, reg_CX); + struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP); + struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); + int fixup, label; + unsigned j; + + p->func = func; + p->loaded_inv_255 = FALSE; + p->loaded_255 = FALSE; + p->loaded_identity = FALSE; + + x86_init_func(p->func); + + /* Push a few regs? + */ + x86_push(p->func, countEBP); + x86_push(p->func, translateESI); + x86_push(p->func, idxEBX); + + /* Get vertex count, compare to zero + */ + x86_xor(p->func, idxEBX, idxEBX); + x86_mov(p->func, countEBP, x86_fn_arg(p->func, 3)); + x86_cmp(p->func, countEBP, idxEBX); + fixup = x86_jcc_forward(p->func, cc_E); + + /* If linear, idx is the current element, otherwise it is a pointer + * to the current element. + */ + x86_mov(p->func, idxEBX, x86_fn_arg(p->func, 2)); + + /* Initialize destination register. + */ + x86_mov(p->func, vertexECX, x86_fn_arg(p->func, 4)); + + /* Move argument 1 (translate_sse pointer) into a reg: + */ + x86_mov(p->func, translateESI, x86_fn_arg(p->func, 1)); + + + /* always load, needed or not: + */ + + /* Note address for loop jump */ + label = x86_get_label(p->func); + + + for (j = 0; j < p->translate.key.nr_elements; j++) { + const struct translate_element *a = &p->translate.key.element[j]; + + struct x86_reg destEAX = x86_make_disp(vertexECX, + a->output_offset); + + /* Figure out source pointer address: + */ + if (linear) { + get_src_ptr(p, srcEAX, translateESI, idxEBX, j); + } + else { + get_src_ptr(p, srcEAX, translateESI, x86_deref(idxEBX), j); + } + + if (!translate_attr( p, a, x86_deref(srcEAX), destEAX )) + return FALSE; + } + + /* Next vertex: + */ + x86_lea(p->func, vertexECX, x86_make_disp(vertexECX, p->translate.key.output_stride)); + + /* Incr index + */ + if (linear) { + x86_inc(p->func, idxEBX); + } + else { + x86_lea(p->func, idxEBX, x86_make_disp(idxEBX, 4)); + } + + /* decr count, loop if not zero + */ + x86_dec(p->func, countEBP); + x86_test(p->func, countEBP, countEBP); + x86_jcc(p->func, cc_NZ, label); + + /* Exit mmx state? + */ + if (p->func->need_emms) + mmx_emms(p->func); + + /* Land forward jump here: + */ + x86_fixup_fwd_jump(p->func, fixup); + + /* Pop regs and return + */ + + x86_pop(p->func, idxEBX); + x86_pop(p->func, translateESI); + x86_pop(p->func, countEBP); + x86_ret(p->func); + + return TRUE; +} + + + + + + + +static void translate_sse_set_buffer( struct translate *translate, + unsigned buf, + const void *ptr, + unsigned stride ) +{ + struct translate_sse *p = (struct translate_sse *)translate; + unsigned i; + + for (i = 0; i < p->translate.key.nr_elements; i++) { + if (p->translate.key.element[i].input_buffer == buf) { + p->attrib[i].input_ptr = ((char *)ptr + + p->translate.key.element[i].input_offset); + p->attrib[i].input_stride = stride; + } + } +} + + +static void translate_sse_release( struct translate *translate ) +{ + struct translate_sse *p = (struct translate_sse *)translate; + + x86_release_func( &p->linear_func ); + x86_release_func( &p->elt_func ); + + FREE(p); +} + +static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, + const unsigned *elts, + unsigned count, + void *output_buffer ) +{ + struct translate_sse *p = (struct translate_sse *)translate; + + p->gen_run_elts( translate, + elts, + count, + output_buffer ); +} + +static void PIPE_CDECL translate_sse_run( struct translate *translate, + unsigned start, + unsigned count, + void *output_buffer ) +{ + struct translate_sse *p = (struct translate_sse *)translate; + + p->gen_run( translate, + start, + count, + output_buffer ); +} + + +struct translate *translate_sse2_create( const struct translate_key *key ) +{ + struct translate_sse *p = NULL; + + if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2()) + goto fail; + + p = CALLOC_STRUCT( translate_sse ); + if (p == NULL) + goto fail; + + p->translate.key = *key; + p->translate.release = translate_sse_release; + p->translate.set_buffer = translate_sse_set_buffer; + p->translate.run_elts = translate_sse_run_elts; + p->translate.run = translate_sse_run; + + if (!build_vertex_emit(p, &p->linear_func, TRUE)) + goto fail; + + if (!build_vertex_emit(p, &p->elt_func, FALSE)) + goto fail; + + p->gen_run = (run_func)x86_get_func(&p->linear_func); + if (p->gen_run == NULL) + goto fail; + + p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func); + if (p->gen_run_elts == NULL) + goto fail; + + return &p->translate; + + fail: + if (p) + translate_sse_release( &p->translate ); + + return NULL; +} + + + +#else + +struct translate *translate_sse2_create( const struct translate_key *key ) +{ + return NULL; +} + +#endif diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile new file mode 100644 index 0000000000..b3d1045a8f --- /dev/null +++ b/src/gallium/auxiliary/util/Makefile @@ -0,0 +1,28 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = util + +C_SOURCES = \ + p_debug.c \ + u_blit.c \ + u_draw_quad.c \ + u_gen_mipmap.c \ + u_handle_table.c \ + u_hash_table.c \ + u_keymap.c \ + u_math.c \ + u_mm.c \ + u_rect.c \ + u_simple_shaders.c \ + u_snprintf.c \ + u_stream_stdc.c \ + u_stream_wd.c \ + u_tile.c \ + u_time.c \ + u_timed_winsys.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript new file mode 100644 index 0000000000..8a04955a16 --- /dev/null +++ b/src/gallium/auxiliary/util/SConscript @@ -0,0 +1,26 @@ +Import('*') + +util = env.ConvenienceLibrary( + target = 'util', + source = [ + 'p_debug.c', + 'p_debug_mem.c', + 'p_debug_prof.c', + 'u_blit.c', + 'u_draw_quad.c', + 'u_gen_mipmap.c', + 'u_handle_table.c', + 'u_hash_table.c', + 'u_keymap.c', + 'u_math.c', + 'u_mm.c', + 'u_rect.c', + 'u_simple_shaders.c', + 'u_snprintf.c', + 'u_stream_stdc.c', + 'u_stream_wd.c', + 'u_tile.c', + 'u_time.c', + ]) + +auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c new file mode 100644 index 0000000000..3ed8bdfdf3 --- /dev/null +++ b/src/gallium/auxiliary/util/p_debug.c @@ -0,0 +1,708 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_config.h" + +#include <stdarg.h> + + +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY + +#include <windows.h> +#include <winddi.h> + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers +#endif +#include <windows.h> + +#else + +#include <stdio.h> +#include <stdlib.h> + +#endif + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "util/u_stream.h" +#include "util/u_math.h" +#include "util/u_tile.h" + + +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY +static INLINE void +_EngDebugPrint(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + EngDebugPrint("", (PCHAR)format, ap); + va_end(ap); +} +#endif + + +void _debug_vprintf(const char *format, va_list ap) +{ +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + /* EngDebugPrint does not handle float point arguments, so we need to use + * our own vsnprintf implementation. It is also very slow, so buffer until + * we find a newline. */ + static char buf[512] = {'\0'}; + size_t len = strlen(buf); + int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap); + if(ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) { + _EngDebugPrint("%s", buf); + buf[0] = '\0'; + } +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + /* EngDebugPrint does not handle float point arguments, so we need to use + * our own vsnprintf implementation. It is also very slow, so buffer until + * we find a newline. */ + static char buf[512 + 1] = {'\0'}; + size_t len = strlen(buf); + int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap); + if(ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) { + OutputDebugStringA(buf); + buf[0] = '\0'; + } +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + /* TODO */ +#else /* !PIPE_SUBSYSTEM_WINDOWS */ + vfprintf(stderr, format, ap); +#endif +} + + +#ifdef DEBUG +void debug_print_blob( const char *name, + const void *blob, + unsigned size ) +{ + const unsigned *ublob = (const unsigned *)blob; + unsigned i; + + debug_printf("%s (%d dwords%s)\n", name, size/4, + size%4 ? "... plus a few bytes" : ""); + + for (i = 0; i < size/4; i++) { + debug_printf("%d:\t%08x\n", i, ublob[i]); + } +} +#endif + + +void _debug_break(void) +{ +#if defined(PIPE_ARCH_X86) && defined(PIPE_CC_GCC) + __asm("int3"); +#elif defined(PIPE_ARCH_X86) && defined(PIPE_CC_MSVC) + _asm {int 3}; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + EngDebugBreak(); +#else + abort(); +#endif +} + + +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY +static const char * +find(const char *start, const char *end, char c) +{ + const char *p; + for(p = start; !end || p != end; ++p) { + if(*p == c) + return p; + if(*p < 32) + break; + } + return NULL; +} + +static int +compare(const char *start, const char *end, const char *s) +{ + const char *p, *q; + for(p = start, q = s; p != end && *q != '\0'; ++p, ++q) { + if(*p != *q) + return 0; + } + return p == end && *q == '\0'; +} + +static void +copy(char *dst, const char *start, const char *end, size_t n) +{ + const char *p; + char *q; + for(p = start, q = dst, n = n - 1; p != end && n; ++p, ++q, --n) + *q = *p; + *q = '\0'; +} +#endif + + +static INLINE const char * +_debug_get_option(const char *name) +{ +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + /* EngMapFile creates the file if it does not exists, so it must either be + * disabled on release versions (or put in a less conspicuous place). */ +#ifdef DEBUG + const char *result = NULL; + ULONG_PTR iFile = 0; + const void *pMap = NULL; + const char *sol, *eol, *sep; + static char output[1024]; + + pMap = EngMapFile(L"\\??\\c:\\gallium.cfg", 0, &iFile); + if(pMap) { + sol = (const char *)pMap; + while(1) { + /* TODO: handle LF line endings */ + eol = find(sol, NULL, '\r'); + if(!eol || eol == sol) + break; + sep = find(sol, eol, '='); + if(!sep) + break; + if(compare(sol, sep, name)) { + copy(output, sep + 1, eol, sizeof(output)); + result = output; + break; + } + sol = eol + 2; + } + EngUnmapFile(iFile); + } + return result; +#else + return NULL; +#endif +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + /* TODO: implement */ + return NULL; +#else + return getenv(name); +#endif +} + +const char * +debug_get_option(const char *name, const char *dfault) +{ + const char *result; + + result = _debug_get_option(name); + if(!result) + result = dfault; + + debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)"); + + return result; +} + +boolean +debug_get_bool_option(const char *name, boolean dfault) +{ + const char *str = _debug_get_option(name); + boolean result; + + if(str == NULL) + result = dfault; + else if(!util_strcmp(str, "n")) + result = FALSE; + else if(!util_strcmp(str, "no")) + result = FALSE; + else if(!util_strcmp(str, "0")) + result = FALSE; + else if(!util_strcmp(str, "f")) + result = FALSE; + else if(!util_strcmp(str, "false")) + result = FALSE; + else + result = TRUE; + + debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? "TRUE" : "FALSE"); + + return result; +} + + +long +debug_get_num_option(const char *name, long dfault) +{ + long result; + const char *str; + + str = _debug_get_option(name); + if(!str) + result = dfault; + else { + long sign; + char c; + c = *str++; + if(c == '-') { + sign = -1; + c = *str++; + } + else { + sign = 1; + } + result = 0; + while('0' <= c && c <= '9') { + result = result*10 + (c - '0'); + c = *str++; + } + result *= sign; + } + + debug_printf("%s: %s = %li\n", __FUNCTION__, name, result); + + return result; +} + + +unsigned long +debug_get_flags_option(const char *name, + const struct debug_named_value *flags, + unsigned long dfault) +{ + unsigned long result; + const char *str; + + str = _debug_get_option(name); + if(!str) + result = dfault; + else if (!util_strcmp(str, "help")) { + result = dfault; + while (flags->name) { + debug_printf("%s: help for %s: %s [0x%lx]\n", __FUNCTION__, name, flags->name, flags->value); + flags++; + } + } + else { + result = 0; + while( flags->name ) { + if (!util_strcmp(str, "all") || util_strstr(str, flags->name )) + result |= flags->value; + ++flags; + } + } + + if (str) { + debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str); + } + else { + debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result); + } + + return result; +} + + +void _debug_assert_fail(const char *expr, + const char *file, + unsigned line, + const char *function) +{ + _debug_printf("%s:%u:%s: Assertion `%s' failed.\n", file, line, function, expr); +#if defined(PIPE_OS_WINDOWS) + if (debug_get_bool_option("GALLIUM_ABORT_ON_ASSERT", FALSE)) +#else + if (debug_get_bool_option("GALLIUM_ABORT_ON_ASSERT", TRUE)) +#endif + debug_break(); + else + _debug_printf("continuing...\n"); +} + + +const char * +debug_dump_enum(const struct debug_named_value *names, + unsigned long value) +{ + static char rest[64]; + + while(names->name) { + if(names->value == value) + return names->name; + ++names; + } + + util_snprintf(rest, sizeof(rest), "0x%08lx", value); + return rest; +} + + +const char * +debug_dump_flags(const struct debug_named_value *names, + unsigned long value) +{ + static char output[4096]; + static char rest[256]; + int first = 1; + + output[0] = '\0'; + + while(names->name) { + if((names->value & value) == names->value) { + if (!first) + util_strncat(output, "|", sizeof(output)); + else + first = 0; + util_strncat(output, names->name, sizeof(output)); + value &= ~names->value; + } + ++names; + } + + if (value) { + if (!first) + util_strncat(output, "|", sizeof(output)); + else + first = 0; + + util_snprintf(rest, sizeof(rest), "0x%08lx", value); + util_strncat(output, rest, sizeof(output)); + } + + if(first) + return "0"; + + return output; +} + + +static const struct debug_named_value pipe_format_names[] = { +#ifdef DEBUG + DEBUG_NAMED_VALUE(PIPE_FORMAT_NONE), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8R8G8B8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_X8R8G8B8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_B8G8R8A8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_B8G8R8X8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A1R5G5B5_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A4R4G4B4_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R5G6B5_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A2B10G10R10_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_L8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_I8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8L8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_L16_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_YCBCR), + DEBUG_NAMED_VALUE(PIPE_FORMAT_YCBCR_REV), + DEBUG_NAMED_VALUE(PIPE_FORMAT_Z16_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_Z32_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_Z32_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_S8Z24_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_Z24S8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_X8Z24_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_Z24X8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_S8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R64_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R64G64_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R64G64B64_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R64G64B64A64_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32A32_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32A32_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32A32_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32A32_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32A32_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16A16_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16A16_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16A16_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16A16_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_B6G5R5_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8B8G8R8_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_X8B8G8R8_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_L8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8_L8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_X8UB8UG8SR8S_NORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_B6UG5SR5S_NORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGBA), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_RGBA), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT5_RGBA), +#endif + DEBUG_NAMED_VALUE_END +}; + +#ifdef DEBUG +void debug_print_format(const char *msg, unsigned fmt ) +{ + debug_printf("%s: %s\n", msg, debug_dump_enum(pipe_format_names, fmt)); +} +#endif + +const char *pf_name( enum pipe_format format ) +{ + return debug_dump_enum(pipe_format_names, format); +} + + +#ifdef DEBUG +void debug_dump_image(const char *prefix, + unsigned format, unsigned cpp, + unsigned width, unsigned height, + unsigned stride, + const void *data) +{ +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY + static unsigned no = 0; + char filename[256]; + WCHAR wfilename[sizeof(filename)]; + ULONG_PTR iFile = 0; + struct { + unsigned format; + unsigned cpp; + unsigned width; + unsigned height; + } header; + unsigned char *pMap = NULL; + unsigned i; + + util_snprintf(filename, sizeof(filename), "\\??\\c:\\%03u%s.raw", ++no, prefix); + for(i = 0; i < sizeof(filename); ++i) + wfilename[i] = (WCHAR)filename[i]; + + pMap = (unsigned char *)EngMapFile(wfilename, sizeof(header) + height*width*cpp, &iFile); + if(!pMap) + return; + + header.format = format; + header.cpp = cpp; + header.width = width; + header.height = height; + memcpy(pMap, &header, sizeof(header)); + pMap += sizeof(header); + + for(i = 0; i < height; ++i) { + memcpy(pMap, (unsigned char *)data + stride*i, cpp*width); + pMap += cpp*width; + } + + EngUnmapFile(iFile); +#endif +} + +void debug_dump_surface(const char *prefix, + struct pipe_surface *surface) +{ + unsigned surface_usage; + void *data; + + if (!surface) + goto error1; + + /* XXX: force mappable surface */ + surface_usage = surface->usage; + surface->usage |= PIPE_BUFFER_USAGE_CPU_READ; + + data = pipe_surface_map(surface, + PIPE_BUFFER_USAGE_CPU_READ); + if(!data) + goto error2; + + debug_dump_image(prefix, + surface->format, + surface->block.size, + surface->nblocksx, + surface->nblocksy, + surface->stride, + data); + + pipe_surface_unmap(surface); +error2: + surface->usage = surface_usage; +error1: + ; +} + + +#pragma pack(push,2) +struct bmp_file_header { + uint16_t bfType; + uint32_t bfSize; + uint16_t bfReserved1; + uint16_t bfReserved2; + uint32_t bfOffBits; +}; +#pragma pack(pop) + +struct bmp_info_header { + uint32_t biSize; + int32_t biWidth; + int32_t biHeight; + uint16_t biPlanes; + uint16_t biBitCount; + uint32_t biCompression; + uint32_t biSizeImage; + int32_t biXPelsPerMeter; + int32_t biYPelsPerMeter; + uint32_t biClrUsed; + uint32_t biClrImportant; +}; + +struct bmp_rgb_quad { + uint8_t rgbBlue; + uint8_t rgbGreen; + uint8_t rgbRed; + uint8_t rgbAlpha; +}; + +void +debug_dump_surface_bmp(const char *filename, + struct pipe_surface *surface) +{ + struct util_stream *stream; + unsigned surface_usage; + struct bmp_file_header bmfh; + struct bmp_info_header bmih; + float *rgba; + unsigned x, y; + + if (!surface) + goto error1; + + rgba = MALLOC(surface->width*4*sizeof(float)); + if(!rgba) + goto error1; + + bmfh.bfType = 0x4d42; + bmfh.bfSize = 14 + 40 + surface->height*surface->width*4; + bmfh.bfReserved1 = 0; + bmfh.bfReserved2 = 0; + bmfh.bfOffBits = 14 + 40; + + bmih.biSize = 40; + bmih.biWidth = surface->width; + bmih.biHeight = surface->height; + bmih.biPlanes = 1; + bmih.biBitCount = 32; + bmih.biCompression = 0; + bmih.biSizeImage = surface->height*surface->width*4; + bmih.biXPelsPerMeter = 0; + bmih.biYPelsPerMeter = 0; + bmih.biClrUsed = 0; + bmih.biClrImportant = 0; + + stream = util_stream_create(filename, bmfh.bfSize); + if(!stream) + goto error2; + + util_stream_write(stream, &bmfh, 14); + util_stream_write(stream, &bmih, 40); + + /* XXX: force mappable surface */ + surface_usage = surface->usage; + surface->usage |= PIPE_BUFFER_USAGE_CPU_READ; + + y = surface->height; + while(y--) { + pipe_get_tile_rgba(surface, + 0, y, surface->width, 1, + rgba); + for(x = 0; x < surface->width; ++x) + { + struct bmp_rgb_quad pixel; + pixel.rgbRed = float_to_ubyte(rgba[x*4 + 0]); + pixel.rgbGreen = float_to_ubyte(rgba[x*4 + 1]); + pixel.rgbBlue = float_to_ubyte(rgba[x*4 + 2]); + pixel.rgbAlpha = float_to_ubyte(rgba[x*4 + 3]); + util_stream_write(stream, &pixel, 4); + } + } + + surface->usage = surface_usage; + + util_stream_close(stream); +error2: + FREE(rgba); +error1: + ; +} + +#endif diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c new file mode 100644 index 0000000000..9511479cbb --- /dev/null +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -0,0 +1,305 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Memory debugging. + * + * @author José Fonseca <jrfonseca@tungstengraphics.com> + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) +#include <windows.h> +#include <winddi.h> +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +#include <wdm.h> +#else +#include <stdio.h> +#include <stdlib.h> +#endif + +#include "pipe/p_debug.h" +#include "util/u_double_list.h" + + +#define DEBUG_MEMORY_MAGIC 0x6e34090aU + + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && !defined(WINCE) +#define real_malloc(_size) EngAllocMem(0, _size, 'D3AG') +#define real_free(_ptr) EngFreeMem(_ptr) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +#define real_malloc(_size) ExAllocatePool(0, _size) +#define real_free(_ptr) ExFreePool(_ptr) +#else +#define real_malloc(_size) malloc(_size) +#define real_free(_ptr) free(_ptr) +#endif + + +struct debug_memory_header +{ + struct list_head head; + + unsigned long no; + const char *file; + unsigned line; + const char *function; + size_t size; + unsigned magic; +}; + +struct debug_memory_footer +{ + unsigned magic; +}; + + +static struct list_head list = { &list, &list }; + +static unsigned long last_no = 0; + + +static INLINE struct debug_memory_header * +header_from_data(void *data) +{ + if(data) + return (struct debug_memory_header *)((char *)data - sizeof(struct debug_memory_header)); + else + return NULL; +} + +static INLINE void * +data_from_header(struct debug_memory_header *hdr) +{ + if(hdr) + return (void *)((char *)hdr + sizeof(struct debug_memory_header)); + else + return NULL; +} + +static INLINE struct debug_memory_footer * +footer_from_header(struct debug_memory_header *hdr) +{ + if(hdr) + return (struct debug_memory_footer *)((char *)hdr + sizeof(struct debug_memory_header) + hdr->size); + else + return NULL; +} + + +void * +debug_malloc(const char *file, unsigned line, const char *function, + size_t size) +{ + struct debug_memory_header *hdr; + struct debug_memory_footer *ftr; + + hdr = real_malloc(sizeof(*hdr) + size + sizeof(*ftr)); + if(!hdr) { + debug_printf("%s:%u:%s: out of memory when trying to allocate %lu bytes\n", + file, line, function, + (long unsigned)size); + return NULL; + } + + hdr->no = last_no++; + hdr->file = file; + hdr->line = line; + hdr->function = function; + hdr->size = size; + hdr->magic = DEBUG_MEMORY_MAGIC; + + ftr = footer_from_header(hdr); + ftr->magic = DEBUG_MEMORY_MAGIC; + + LIST_ADDTAIL(&hdr->head, &list); + + return data_from_header(hdr); +} + +void +debug_free(const char *file, unsigned line, const char *function, + void *ptr) +{ + struct debug_memory_header *hdr; + struct debug_memory_footer *ftr; + + if(!ptr) + return; + + hdr = header_from_data(ptr); + if(hdr->magic != DEBUG_MEMORY_MAGIC) { + debug_printf("%s:%u:%s: freeing bad or corrupted memory %p\n", + file, line, function, + ptr); + debug_assert(0); + return; + } + + ftr = footer_from_header(hdr); + if(ftr->magic != DEBUG_MEMORY_MAGIC) { + debug_printf("%s:%u:%s: buffer overflow %p\n", + hdr->file, hdr->line, hdr->function, + ptr); + debug_assert(0); + } + + LIST_DEL(&hdr->head); + hdr->magic = 0; + ftr->magic = 0; + + real_free(hdr); +} + +void * +debug_calloc(const char *file, unsigned line, const char *function, + size_t count, size_t size ) +{ + void *ptr = debug_malloc( file, line, function, count * size ); + if( ptr ) + memset( ptr, 0, count * size ); + return ptr; +} + +void * +debug_realloc(const char *file, unsigned line, const char *function, + void *old_ptr, size_t old_size, size_t new_size ) +{ + struct debug_memory_header *old_hdr, *new_hdr; + struct debug_memory_footer *old_ftr, *new_ftr; + void *new_ptr; + + if(!old_ptr) + return debug_malloc( file, line, function, new_size ); + + if(!new_size) { + debug_free( file, line, function, old_ptr ); + return NULL; + } + + old_hdr = header_from_data(old_ptr); + if(old_hdr->magic != DEBUG_MEMORY_MAGIC) { + debug_printf("%s:%u:%s: reallocating bad or corrupted memory %p\n", + file, line, function, + old_ptr); + debug_assert(0); + return NULL; + } + + old_ftr = footer_from_header(old_hdr); + if(old_ftr->magic != DEBUG_MEMORY_MAGIC) { + debug_printf("%s:%u:%s: buffer overflow %p\n", + old_hdr->file, old_hdr->line, old_hdr->function, + old_ptr); + debug_assert(0); + } + + /* alloc new */ + new_hdr = real_malloc(sizeof(*new_hdr) + new_size + sizeof(*new_ftr)); + if(!new_hdr) { + debug_printf("%s:%u:%s: out of memory when trying to allocate %lu bytes\n", + file, line, function, + (long unsigned)new_size); + return NULL; + } + new_hdr->no = old_hdr->no; + new_hdr->file = old_hdr->file; + new_hdr->line = old_hdr->line; + new_hdr->function = old_hdr->function; + new_hdr->size = new_size; + new_hdr->magic = DEBUG_MEMORY_MAGIC; + + new_ftr = footer_from_header(new_hdr); + new_ftr->magic = DEBUG_MEMORY_MAGIC; + + LIST_REPLACE(&old_hdr->head, &new_hdr->head); + + /* copy data */ + new_ptr = data_from_header(new_hdr); + memcpy( new_ptr, old_ptr, old_size < new_size ? old_size : new_size ); + + /* free old */ + old_hdr->magic = 0; + old_ftr->magic = 0; + real_free(old_hdr); + + return new_ptr; +} + +unsigned long +debug_memory_begin(void) +{ + return last_no; +} + +void +debug_memory_end(unsigned long start_no) +{ + size_t total_size = 0; + struct list_head *entry; + + entry = list.prev; + for (; entry != &list; entry = entry->prev) { + struct debug_memory_header *hdr; + void *ptr; + struct debug_memory_footer *ftr; + + hdr = LIST_ENTRY(struct debug_memory_header, entry, head); + ptr = data_from_header(hdr); + ftr = footer_from_header(hdr); + + if(hdr->magic != DEBUG_MEMORY_MAGIC) { + debug_printf("%s:%u:%s: bad or corrupted memory %p\n", + hdr->file, hdr->line, hdr->function, + ptr); + debug_assert(0); + } + + if((start_no <= hdr->no && hdr->no < last_no) || + (last_no < start_no && (hdr->no < last_no || start_no <= hdr->no))) { + debug_printf("%s:%u:%s: %u bytes at %p not freed\n", + hdr->file, hdr->line, hdr->function, + hdr->size, ptr); + total_size += hdr->size; + } + + if(ftr->magic != DEBUG_MEMORY_MAGIC) { + debug_printf("%s:%u:%s: buffer overflow %p\n", + hdr->file, hdr->line, hdr->function, + ptr); + debug_assert(0); + } + } + + if(total_size) { + debug_printf("Total of %u KB of system memory apparently leaked\n", + (total_size + 1023)/1024); + } +} diff --git a/src/gallium/auxiliary/util/p_debug_prof.c b/src/gallium/auxiliary/util/p_debug_prof.c new file mode 100644 index 0000000000..5f9772ef91 --- /dev/null +++ b/src/gallium/auxiliary/util/p_debug_prof.c @@ -0,0 +1,320 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Poor-man profiling. + * + * @author José Fonseca <jrfonseca@tungstengraphics.com> + * + * @sa http://blogs.msdn.com/joshpoley/archive/2008/03/12/poor-man-s-profiler.aspx + * @sa http://www.johnpanzer.com/aci_cuj/index.html + */ + +#include "pipe/p_config.h" + +#if defined(PROFILE) && defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +#include <windows.h> +#include <winddi.h> + +#include "pipe/p_debug.h" +#include "util/u_string.h" + + +#define PROFILE_TABLE_SIZE (1024*1024) +#define FILE_NAME_SIZE 256 + +struct debug_profile_entry +{ + uintptr_t caller; + uintptr_t callee; + uint64_t samples; +}; + +static unsigned long enabled = 0; + +static WCHAR wFileName[FILE_NAME_SIZE] = L"\\??\\c:\\00000000.prof"; +static ULONG_PTR iFile = 0; + +static struct debug_profile_entry *table = NULL; +static unsigned long free_table_entries = 0; +static unsigned long max_table_entries = 0; + +uint64_t start_stamp = 0; +uint64_t end_stamp = 0; + + +static void +debug_profile_entry(uintptr_t caller, uintptr_t callee, uint64_t samples) +{ + unsigned hash = ( caller + callee ) & PROFILE_TABLE_SIZE - 1; + + while(1) { + if(table[hash].caller == 0 && table[hash].callee == 0) { + table[hash].caller = caller; + table[hash].callee = callee; + table[hash].samples = samples; + --free_table_entries; + break; + } + else if(table[hash].caller == caller && table[hash].callee == callee) { + table[hash].samples += samples; + break; + } + else { + ++hash; + } + } +} + + +static uintptr_t caller_stack[1024]; +static unsigned last_caller = 0; + + +static int64_t delta(void) { + int64_t result = end_stamp - start_stamp; + if(result > UINT64_C(0xffffffff)) + result = 0; + return result; +} + + +static void __cdecl +debug_profile_enter(uintptr_t callee) +{ + uintptr_t caller = last_caller ? caller_stack[last_caller - 1] : 0; + + if (caller) + debug_profile_entry(caller, 0, delta()); + debug_profile_entry(caller, callee, 1); + caller_stack[last_caller++] = callee; +} + + +static void __cdecl +debug_profile_exit(uintptr_t callee) +{ + debug_profile_entry(callee, 0, delta()); + if(last_caller) + --last_caller; +} + + +/** + * Called at the start of every method or function. + * + * @sa http://msdn.microsoft.com/en-us/library/c63a9b7h.aspx + */ +void __declspec(naked) __cdecl +_penter(void) { + _asm { + push eax + mov eax, [enabled] + test eax, eax + jz skip + + push edx + + rdtsc + mov dword ptr [end_stamp], eax + mov dword ptr [end_stamp+4], edx + + xor eax, eax + mov [enabled], eax + + mov eax, [esp+8] + + push ebx + push ecx + push ebp + push edi + push esi + + push eax + call debug_profile_enter + add esp, 4 + + pop esi + pop edi + pop ebp + pop ecx + pop ebx + + mov eax, 1 + mov [enabled], eax + + rdtsc + mov dword ptr [start_stamp], eax + mov dword ptr [start_stamp+4], edx + + pop edx +skip: + pop eax + ret + } +} + + +/** + * Called at the end of Calls the end of every method or function. + * + * @sa http://msdn.microsoft.com/en-us/library/xc11y76y.aspx + */ +void __declspec(naked) __cdecl +_pexit(void) { + _asm { + push eax + mov eax, [enabled] + test eax, eax + jz skip + + push edx + + rdtsc + mov dword ptr [end_stamp], eax + mov dword ptr [end_stamp+4], edx + + xor eax, eax + mov [enabled], eax + + mov eax, [esp+8] + + push ebx + push ecx + push ebp + push edi + push esi + + push eax + call debug_profile_exit + add esp, 4 + + pop esi + pop edi + pop ebp + pop ecx + pop ebx + + mov eax, 1 + mov [enabled], eax + + rdtsc + mov dword ptr [start_stamp], eax + mov dword ptr [start_stamp+4], edx + + pop edx +skip: + pop eax + ret + } +} + + +/** + * Reference function for calibration. + */ +void __declspec(naked) +__debug_profile_reference(void) { + _asm { + call _penter + call _pexit + ret + } +} + + +void +debug_profile_start(void) +{ + WCHAR *p; + + // increment starting from the less significant digit + p = &wFileName[14]; + while(1) { + if(*p == '9') { + *p-- = '0'; + } + else { + *p += 1; + break; + } + } + + table = EngMapFile(wFileName, + PROFILE_TABLE_SIZE*sizeof(struct debug_profile_entry), + &iFile); + if(table) { + unsigned i; + + free_table_entries = max_table_entries = PROFILE_TABLE_SIZE; + memset(table, 0, PROFILE_TABLE_SIZE*sizeof(struct debug_profile_entry)); + + table[0].caller = (uintptr_t)&__debug_profile_reference; + table[0].callee = 0; + table[0].samples = 0; + --free_table_entries; + + _asm { + push edx + push eax + + rdtsc + mov dword ptr [start_stamp], eax + mov dword ptr [start_stamp+4], edx + + pop edx + pop eax + } + + last_caller = 0; + + enabled = 1; + + for(i = 0; i < 8; ++i) { + _asm { + call __debug_profile_reference + } + } + } +} + + +void +debug_profile_stop(void) +{ + enabled = 0; + + if(iFile) + EngUnmapFile(iFile); + iFile = 0; + table = NULL; + free_table_entries = max_table_entries = 0; +} + +#endif /* PROFILE */ diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c new file mode 100644 index 0000000000..d28201ac8d --- /dev/null +++ b/src/gallium/auxiliary/util/u_blit.c @@ -0,0 +1,520 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Copy/blit pixel rect between surfaces + * + * @author Brian Paul + */ + + +#include "pipe/p_context.h" +#include "pipe/p_debug.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_shader_tokens.h" + +#include "util/u_blit.h" +#include "util/u_draw_quad.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_simple_shaders.h" + +#include "cso_cache/cso_context.h" + + +struct blit_state +{ + struct pipe_context *pipe; + struct cso_context *cso; + + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depthstencil; + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state sampler; + struct pipe_viewport_state viewport; + + struct pipe_shader_state vert_shader; + struct pipe_shader_state frag_shader; + void *vs; + void *fs; + + struct pipe_buffer *vbuf; /**< quad vertices */ + float vertices[4][2][4]; /**< vertex/texcoords for quad */ +}; + + +/** + * Create state object for blit. + * Intended to be created once and re-used for many blit() calls. + */ +struct blit_state * +util_create_blit(struct pipe_context *pipe, struct cso_context *cso) +{ + struct blit_state *ctx; + uint i; + + ctx = CALLOC_STRUCT(blit_state); + if (!ctx) + return NULL; + + ctx->pipe = pipe; + ctx->cso = cso; + + /* disabled blending/masking */ + memset(&ctx->blend, 0, sizeof(ctx->blend)); + ctx->blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + ctx->blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + ctx->blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + ctx->blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + ctx->blend.colormask = PIPE_MASK_RGBA; + + /* no-op depth/stencil/alpha */ + memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil)); + + /* rasterizer */ + memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer)); + ctx->rasterizer.front_winding = PIPE_WINDING_CW; + ctx->rasterizer.cull_mode = PIPE_WINDING_NONE; + ctx->rasterizer.bypass_clipping = 1; + /*ctx->rasterizer.bypass_vs = 1;*/ + ctx->rasterizer.gl_rasterization_rules = 1; + + /* samplers */ + memset(&ctx->sampler, 0, sizeof(ctx->sampler)); + ctx->sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + ctx->sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + ctx->sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + ctx->sampler.min_img_filter = 0; /* set later */ + ctx->sampler.mag_img_filter = 0; /* set later */ + ctx->sampler.normalized_coords = 1; + + /* viewport (identity, we setup vertices in wincoords) */ + ctx->viewport.scale[0] = 1.0; + ctx->viewport.scale[1] = 1.0; + ctx->viewport.scale[2] = 1.0; + ctx->viewport.scale[3] = 1.0; + ctx->viewport.translate[0] = 0.0; + ctx->viewport.translate[1] = 0.0; + ctx->viewport.translate[2] = 0.0; + ctx->viewport.translate[3] = 0.0; + + /* vertex shader */ + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indexes[] = { 0, 0 }; + ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, + semantic_indexes, + &ctx->vert_shader); + } + + /* fragment shader */ + ctx->fs = util_make_fragment_tex_shader(pipe, &ctx->frag_shader); + + ctx->vbuf = pipe_buffer_create(pipe->screen, + 32, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(ctx->vertices)); + if (!ctx->vbuf) { + FREE(ctx); + ctx->pipe->delete_fs_state(ctx->pipe, ctx->fs); + ctx->pipe->delete_vs_state(ctx->pipe, ctx->vs); + return NULL; + } + + /* init vertex data that doesn't change */ + for (i = 0; i < 4; i++) { + ctx->vertices[i][0][3] = 1.0f; /* w */ + ctx->vertices[i][1][2] = 0.0f; /* r */ + ctx->vertices[i][1][3] = 1.0f; /* q */ + } + + return ctx; +} + + +/** + * Destroy a blit context + */ +void +util_destroy_blit(struct blit_state *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + + pipe->delete_vs_state(pipe, ctx->vs); + pipe->delete_fs_state(pipe, ctx->fs); + + FREE((void*) ctx->vert_shader.tokens); + FREE((void*) ctx->frag_shader.tokens); + + pipe_buffer_reference(pipe->screen, &ctx->vbuf, NULL); + + FREE(ctx); +} + + +/** + * Setup vertex data for the textured quad we'll draw. + * Note: y=0=top + */ +static void +setup_vertex_data(struct blit_state *ctx, + float x0, float y0, float x1, float y1, float z) +{ + void *buf; + + ctx->vertices[0][0][0] = x0; + ctx->vertices[0][0][1] = y0; + ctx->vertices[0][0][2] = z; + ctx->vertices[0][1][0] = 0.0f; /*s*/ + ctx->vertices[0][1][1] = 0.0f; /*t*/ + + ctx->vertices[1][0][0] = x1; + ctx->vertices[1][0][1] = y0; + ctx->vertices[1][0][2] = z; + ctx->vertices[1][1][0] = 1.0f; /*s*/ + ctx->vertices[1][1][1] = 0.0f; /*t*/ + + ctx->vertices[2][0][0] = x1; + ctx->vertices[2][0][1] = y1; + ctx->vertices[2][0][2] = z; + ctx->vertices[2][1][0] = 1.0f; + ctx->vertices[2][1][1] = 1.0f; + + ctx->vertices[3][0][0] = x0; + ctx->vertices[3][0][1] = y1; + ctx->vertices[3][0][2] = z; + ctx->vertices[3][1][0] = 0.0f; + ctx->vertices[3][1][1] = 1.0f; + + buf = pipe_buffer_map(ctx->pipe->screen, ctx->vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); + + memcpy(buf, ctx->vertices, sizeof(ctx->vertices)); + + pipe_buffer_unmap(ctx->pipe->screen, ctx->vbuf); +} + + +/** + * Setup vertex data for the textured quad we'll draw. + * Note: y=0=top + */ +static void +setup_vertex_data_tex(struct blit_state *ctx, + float x0, float y0, float x1, float y1, + float s0, float t0, float s1, float t1, + float z) +{ + void *buf; + + ctx->vertices[0][0][0] = x0; + ctx->vertices[0][0][1] = y0; + ctx->vertices[0][0][2] = z; + ctx->vertices[0][1][0] = s0; /*s*/ + ctx->vertices[0][1][1] = t0; /*t*/ + + ctx->vertices[1][0][0] = x1; + ctx->vertices[1][0][1] = y0; + ctx->vertices[1][0][2] = z; + ctx->vertices[1][1][0] = s1; /*s*/ + ctx->vertices[1][1][1] = t0; /*t*/ + + ctx->vertices[2][0][0] = x1; + ctx->vertices[2][0][1] = y1; + ctx->vertices[2][0][2] = z; + ctx->vertices[2][1][0] = s1; + ctx->vertices[2][1][1] = t1; + + ctx->vertices[3][0][0] = x0; + ctx->vertices[3][0][1] = y1; + ctx->vertices[3][0][2] = z; + ctx->vertices[3][1][0] = s0; + ctx->vertices[3][1][1] = t1; + + buf = pipe_buffer_map(ctx->pipe->screen, ctx->vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); + + memcpy(buf, ctx->vertices, sizeof(ctx->vertices)); + + pipe_buffer_unmap(ctx->pipe->screen, ctx->vbuf); +} +/** + * Copy pixel block from src surface to dst surface. + * Overlapping regions are acceptable. + * XXX need some control over blitting Z and/or stencil. + */ +void +util_blit_pixels(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_texture texTemp, *tex; + struct pipe_surface *texSurf; + struct pipe_framebuffer_state fb; + const int srcW = abs(srcX1 - srcX0); + const int srcH = abs(srcY1 - srcY0); + const int srcLeft = MIN2(srcX0, srcX1); + const int srcTop = MIN2(srcY0, srcY1); + + assert(filter == PIPE_TEX_MIPFILTER_NEAREST || + filter == PIPE_TEX_MIPFILTER_LINEAR); + + if (srcLeft != srcX0) { + /* left-right flip */ + int tmp = dstX0; + dstX0 = dstX1; + dstX1 = tmp; + } + + if (srcTop != srcY0) { + /* up-down flip */ + int tmp = dstY0; + dstY0 = dstY1; + dstY1 = tmp; + } + + assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)); + assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)); + + if(dst->format == src->format && (dstX1 - dstX0) == srcW && (dstY1 - dstY0) == srcH) { + /* FIXME: this will most surely fail for overlapping rectangles */ + pipe->surface_copy(pipe, FALSE, + dst, dstX0, dstY0, /* dest */ + src, srcX0, srcY0, /* src */ + srcW, srcH); /* size */ + return; + } + + assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)); + + /* + * XXX for now we're always creating a temporary texture. + * Strictly speaking that's not always needed. + */ + + /* create temp texture */ + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = src->format; + texTemp.last_level = 0; + texTemp.width[0] = srcW; + texTemp.height[0] = srcH; + texTemp.depth[0] = 1; + texTemp.compressed = 0; + pf_get_block(src->format, &texTemp.block); + + tex = screen->texture_create(screen, &texTemp); + if (!tex) + return; + + texSurf = screen->get_tex_surface(screen, tex, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + + /* load temp texture */ + pipe->surface_copy(pipe, FALSE, + texSurf, 0, 0, /* dest */ + src, srcLeft, srcTop, /* src */ + srcW, srcH); /* size */ + + /* free the surface, update the texture if necessary. + */ + screen->tex_surface_release(screen, &texSurf); + + /* save state (restored below) */ + cso_save_blend(ctx->cso); + cso_save_depth_stencil_alpha(ctx->cso); + cso_save_rasterizer(ctx->cso); + cso_save_samplers(ctx->cso); + cso_save_sampler_textures(ctx->cso); + cso_save_framebuffer(ctx->cso); + cso_save_fragment_shader(ctx->cso); + cso_save_vertex_shader(ctx->cso); + cso_save_viewport(ctx->cso); + + /* set misc state we care about */ + cso_set_blend(ctx->cso, &ctx->blend); + cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); + cso_set_rasterizer(ctx->cso, &ctx->rasterizer); + cso_set_viewport(ctx->cso, &ctx->viewport); + + /* sampler */ + ctx->sampler.min_img_filter = filter; + ctx->sampler.mag_img_filter = filter; + cso_single_sampler(ctx->cso, 0, &ctx->sampler); + cso_single_sampler_done(ctx->cso); + + /* texture */ + cso_set_sampler_textures(ctx->cso, 1, &tex); + + /* shaders */ + cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_vertex_shader_handle(ctx->cso, ctx->vs); + + /* drawing dest */ + memset(&fb, 0, sizeof(fb)); + fb.width = dst->width; + fb.height = dst->height; + fb.num_cbufs = 1; + fb.cbufs[0] = dst; + cso_set_framebuffer(ctx->cso, &fb); + + /* draw quad */ + setup_vertex_data(ctx, + (float) dstX0, (float) dstY0, + (float) dstX1, (float) dstY1, z); + + util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + /* restore state we changed */ + cso_restore_blend(ctx->cso); + cso_restore_depth_stencil_alpha(ctx->cso); + cso_restore_rasterizer(ctx->cso); + cso_restore_samplers(ctx->cso); + cso_restore_sampler_textures(ctx->cso); + cso_restore_framebuffer(ctx->cso); + cso_restore_fragment_shader(ctx->cso); + cso_restore_vertex_shader(ctx->cso); + cso_restore_viewport(ctx->cso); + + screen->texture_release(screen, &tex); +} + +/** + * Copy pixel block from src texture to dst surface. + * Overlapping regions are acceptable. + * + * XXX Should support selection of level. + * XXX need some control over blitting Z and/or stencil. + */ +void +util_blit_pixels_tex(struct blit_state *ctx, + struct pipe_texture *tex, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_framebuffer_state fb; + float s0, t0, s1, t1; + + assert(filter == PIPE_TEX_MIPFILTER_NEAREST || + filter == PIPE_TEX_MIPFILTER_LINEAR); + + assert(tex->width[0] != 0); + assert(tex->height[0] != 0); + + s0 = srcX0 / (float)tex->width[0]; + s1 = srcX1 / (float)tex->width[0]; + t0 = srcY0 / (float)tex->height[0]; + t1 = srcY1 / (float)tex->height[0]; + + assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)); + + /* save state (restored below) */ + cso_save_blend(ctx->cso); + cso_save_depth_stencil_alpha(ctx->cso); + cso_save_rasterizer(ctx->cso); + cso_save_samplers(ctx->cso); + cso_save_sampler_textures(ctx->cso); + cso_save_framebuffer(ctx->cso); + cso_save_fragment_shader(ctx->cso); + cso_save_vertex_shader(ctx->cso); + cso_save_viewport(ctx->cso); + + /* set misc state we care about */ + cso_set_blend(ctx->cso, &ctx->blend); + cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); + cso_set_rasterizer(ctx->cso, &ctx->rasterizer); + cso_set_viewport(ctx->cso, &ctx->viewport); + + /* sampler */ + ctx->sampler.min_img_filter = filter; + ctx->sampler.mag_img_filter = filter; + cso_single_sampler(ctx->cso, 0, &ctx->sampler); + cso_single_sampler_done(ctx->cso); + + /* texture */ + cso_set_sampler_textures(ctx->cso, 1, &tex); + + /* shaders */ + cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_vertex_shader_handle(ctx->cso, ctx->vs); + + /* drawing dest */ + memset(&fb, 0, sizeof(fb)); + fb.width = dst->width; + fb.height = dst->height; + fb.num_cbufs = 1; + fb.cbufs[0] = dst; + cso_set_framebuffer(ctx->cso, &fb); + + /* draw quad */ + setup_vertex_data_tex(ctx, + (float) dstX0, (float) dstY0, + (float) dstX1, (float) dstY1, + s0, t0, s1, t1, + z); + + util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + /* restore state we changed */ + cso_restore_blend(ctx->cso); + cso_restore_depth_stencil_alpha(ctx->cso); + cso_restore_rasterizer(ctx->cso); + cso_restore_samplers(ctx->cso); + cso_restore_sampler_textures(ctx->cso); + cso_restore_framebuffer(ctx->cso); + cso_restore_fragment_shader(ctx->cso); + cso_restore_vertex_shader(ctx->cso); + cso_restore_viewport(ctx->cso); +} diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h new file mode 100644 index 0000000000..308075698f --- /dev/null +++ b/src/gallium/auxiliary/util/u_blit.h @@ -0,0 +1,78 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef U_BLIT_H +#define U_BLIT_H + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pipe_context; +struct pipe_surface; +struct pipe_texture; +struct cso_context; + + +struct blit_state; + + +extern struct blit_state * +util_create_blit(struct pipe_context *pipe, struct cso_context *cso); + +extern void +util_destroy_blit(struct blit_state *ctx); + +extern void +util_blit_pixels(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter); + +extern void +util_blit_pixels_tex(struct blit_state *ctx, + struct pipe_texture *tex, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c new file mode 100644 index 0000000000..d9f2f8fc28 --- /dev/null +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -0,0 +1,506 @@ +/************************************************************************** + * + * Copyright 2008 Dennis Smit + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Based on the work of Eric Anholt <anholt@FreeBSD.org> + */ + +/* FIXME: clean this entire file up */ + +#include "u_cpu_detect.h" + +#ifdef __linux__ +#define OS_LINUX +#endif +#ifdef WIN32 +#define OS_WIN32 +#endif + +#if defined(ARCH_POWERPC) +#if defined(OS_DARWIN) +#include <sys/sysctl.h> +#else +#include <signal.h> +#include <setjmp.h> +#endif +#endif + +#if defined(OS_NETBSD) || defined(OS_OPENBSD) +#include <sys/param.h> +#include <sys/sysctl.h> +#include <machine/cpu.h> +#endif + +#if defined(OS_FREEBSD) +#include <sys/types.h> +#include <sys/sysctl.h> +#endif + +#if defined(OS_LINUX) +#include <signal.h> +#endif + +#if defined(OS_WIN32) +#include <windows.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> + + +static struct cpu_detect_caps __cpu_detect_caps; +static int __cpu_detect_initialized = 0; + +static int has_cpuid(void); +static int cpuid(unsigned int ax, unsigned int *p); + +/* The sigill handlers */ +#if defined(ARCH_X86) /* x86 (linux katmai handler check thing) */ +#if defined(OS_LINUX) && defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC) +static void sigill_handler_sse(int signal, struct sigcontext sc) +{ + /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1" + * instructions are 3 bytes long. We must increment the instruction + * pointer manually to avoid repeated execution of the offending + * instruction. + * + * If the SIGILL is caused by a divide-by-zero when unmasked + * exceptions aren't supported, the SIMD FPU status and control + * word will be restored at the end of the test, so we don't need + * to worry about doing it here. Besides, we may not be able to... + */ + sc.eip += 3; + + __cpu_detect_caps.hasSSE=0; +} + +static void sigfpe_handler_sse(int signal, struct sigcontext sc) +{ + if (sc.fpstate->magic != 0xffff) { + /* Our signal context has the extended FPU state, so reset the + * divide-by-zero exception mask and clear the divide-by-zero + * exception bit. + */ + sc.fpstate->mxcsr |= 0x00000200; + sc.fpstate->mxcsr &= 0xfffffffb; + } else { + /* If we ever get here, we're completely hosed. + */ + } +} +#endif +#endif /* OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */ + +#if defined(OS_WIN32) +LONG CALLBACK win32_sig_handler_sse(EXCEPTION_POINTERS* ep) +{ + if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){ + ep->ContextRecord->Eip +=3; + __cpu_detect_caps.hasSSE=0; + return EXCEPTION_CONTINUE_EXECUTION; + } + return EXCEPTION_CONTINUE_SEARCH; +} +#endif /* OS_WIN32 */ + + +#if defined(ARCH_POWERPC) && !defined(OS_DARWIN) +static sigjmp_buf __lv_powerpc_jmpbuf; +static volatile sig_atomic_t __lv_powerpc_canjump = 0; + +static void sigill_handler (int sig); + +static void sigill_handler (int sig) +{ + if (!__lv_powerpc_canjump) { + signal (sig, SIG_DFL); + raise (sig); + } + + __lv_powerpc_canjump = 0; + siglongjmp(__lv_powerpc_jmpbuf, 1); +} + +static void check_os_altivec_support(void) +{ +#if defined(OS_DARWIN) + int sels[2] = {CTL_HW, HW_VECTORUNIT}; + int has_vu = 0; + int len = sizeof (has_vu); + int err; + + err = sysctl(sels, 2, &has_vu, &len, NULL, 0); + + if (err == 0) { + if (has_vu != 0) { + __cpu_detect_caps.hasAltiVec = 1; + } + } +#else /* !OS_DARWIN */ + /* no Darwin, do it the brute-force way */ + /* this is borrowed from the libmpeg2 library */ + signal(SIGILL, sigill_handler); + if (sigsetjmp(__lv_powerpc_jmpbuf, 1)) { + signal(SIGILL, SIG_DFL); + } else { + __lv_powerpc_canjump = 1; + + __asm __volatile + ("mtspr 256, %0\n\t" + "vand %%v0, %%v0, %%v0" + : + : "r" (-1)); + + signal(SIGILL, SIG_DFL); + __cpu_detect_caps.hasAltiVec = 1; + } +#endif +} +#endif + +/* If we're running on a processor that can do SSE, let's see if we + * are allowed to or not. This will catch 2.4.0 or later kernels that + * haven't been configured for a Pentium III but are running on one, + * and RedHat patched 2.2 kernels that have broken exception handling + * support for user space apps that do SSE. + */ +static void check_os_katmai_support(void) +{ +#if defined(ARCH_X86) +#if defined(OS_FREEBSD) + int has_sse=0, ret; + int len = sizeof (has_sse); + + ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0); + if (ret || !has_sse) + __cpu_detect_caps.hasSSE=0; + +#elif defined(OS_NETBSD) || defined(OS_OPENBSD) + int has_sse, has_sse2, ret, mib[2]; + int varlen; + + mib[0] = CTL_MACHDEP; + mib[1] = CPU_SSE; + varlen = sizeof (has_sse); + + ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0); + if (ret < 0 || !has_sse) { + __cpu_detect_caps.hasSSE = 0; + } else { + __cpu_detect_caps.hasSSE = 1; + } + + mib[1] = CPU_SSE2; + varlen = sizeof (has_sse2); + ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0); + if (ret < 0 || !has_sse2) { + __cpu_detect_caps.hasSSE2 = 0; + } else { + __cpu_detect_caps.hasSSE2 = 1; + } + __cpu_detect_caps.hasSSE = 0; /* FIXME ?!?!? */ + +#elif defined(OS_WIN32) + LPTOP_LEVEL_EXCEPTION_FILTER exc_fil; + if (__cpu_detect_caps.hasSSE) { + exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse); + __asm __volatile ("xorps %xmm0, %xmm0"); + SetUnhandledExceptionFilter(exc_fil); + } +#elif defined(OS_LINUX) + struct sigaction saved_sigill; + struct sigaction saved_sigfpe; + + /* Save the original signal handlers. + */ + sigaction(SIGILL, NULL, &saved_sigill); + sigaction(SIGFPE, NULL, &saved_sigfpe); + + signal(SIGILL, (void (*)(int))sigill_handler_sse); + signal(SIGFPE, (void (*)(int))sigfpe_handler_sse); + + /* Emulate test for OSFXSR in CR4. The OS will set this bit if it + * supports the extended FPU save and restore required for SSE. If + * we execute an SSE instruction on a PIII and get a SIGILL, the OS + * doesn't support Streaming SIMD Exceptions, even if the processor + * does. + */ + if (__cpu_detect_caps.hasSSE) { + __asm __volatile ("xorps %xmm1, %xmm0"); + } + + /* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if + * it supports unmasked SIMD FPU exceptions. If we unmask the + * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS + * doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE + * as expected, we're okay but we need to clean up after it. + * + * Are we being too stringent in our requirement that the OS support + * unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by + * setting CR4.OSFXSR but don't support unmasked exceptions. Win98 + * doesn't even support them. We at least know the user-space SSE + * support is good in kernels that do support unmasked exceptions, + * and therefore to be safe I'm going to leave this test in here. + */ + if (__cpu_detect_caps.hasSSE) { + // test_os_katmai_exception_support(); + } + + /* Restore the original signal handlers. + */ + sigaction(SIGILL, &saved_sigill, NULL); + sigaction(SIGFPE, &saved_sigfpe, NULL); + +#else + /* We can't use POSIX signal handling to test the availability of + * SSE, so we disable it by default. + */ + __cpu_detect_caps.hasSSE = 0; +#endif /* __linux__ */ +#endif +} + + +static int has_cpuid(void) +{ +#if defined(ARCH_X86) + int a, c; + + __asm __volatile + ("pushf\n" + "popl %0\n" + "movl %0, %1\n" + "xorl $0x200000, %0\n" + "push %0\n" + "popf\n" + "pushf\n" + "popl %0\n" + : "=a" (a), "=c" (c) + : + : "cc"); + + return a != c; +#else + return 0; +#endif +} + +static int cpuid(unsigned int ax, unsigned int *p) +{ +#if defined(ARCH_X86) + unsigned int flags; + + __asm __volatile + ("movl %%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl %%ebx, %%esi" + : "=a" (p[0]), "=S" (p[1]), + "=c" (p[2]), "=d" (p[3]) + : "0" (ax)); + + return 0; +#else + return -1; +#endif +} + +void cpu_detect_initialize() +{ + unsigned int regs[4]; + unsigned int regs2[4]; + + int mib[2], ncpu; + int len; + + memset(&__cpu_detect_caps, 0, sizeof (struct cpu_detect_caps)); + + /* Check for arch type */ +#if defined(ARCH_MIPS) + __cpu_detect_caps.type = CPU_DETECT_TYPE_MIPS; +#elif defined(ARCH_ALPHA) + __cpu_detect_caps.type = CPU_DETECT_TYPE_ALPHA; +#elif defined(ARCH_SPARC) + __cpu_detect_caps.type = CPU_DETECT_TYPE_SPARC; +#elif defined(ARCH_X86) + __cpu_detect_caps.type = CPU_DETECT_TYPE_X86; +#elif defined(ARCH_POWERPC) + __cpu_detect_caps.type = CPU_DETECT_TYPE_POWERPC; +#else + __cpu_detect_caps.type = CPU_DETECT_TYPE_OTHER; +#endif + + /* Count the number of CPUs in system */ +#if !defined(OS_WIN32) && !defined(OS_UNKNOWN) && defined(_SC_NPROCESSORS_ONLN) + __cpu_detect_caps.nrcpu = sysconf(_SC_NPROCESSORS_ONLN); + if (__cpu_detect_caps.nrcpu == -1) + __cpu_detect_caps.nrcpu = 1; + +#elif defined(OS_NETBSD) || defined(OS_FREEBSD) || defined(OS_OPENBSD) + + mib[0] = CTL_HW; + mib[1] = HW_NCPU; + + len = sizeof (ncpu); + sysctl(mib, 2, &ncpu, &len, NULL, 0); + __cpu_detect_caps.nrcpu = ncpu; + +#else + __cpu_detect_caps.nrcpu = 1; +#endif + +#if defined(ARCH_X86) + /* No cpuid, old 486 or lower */ + if (has_cpuid() == 0) + return; + + __cpu_detect_caps.cacheline = 32; + + /* Get max cpuid level */ + cpuid(0x00000000, regs); + + if (regs[0] >= 0x00000001) { + unsigned int cacheline; + + cpuid (0x00000001, regs2); + + __cpu_detect_caps.x86cpuType = (regs2[0] >> 8) & 0xf; + if (__cpu_detect_caps.x86cpuType == 0xf) + __cpu_detect_caps.x86cpuType = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */ + + /* general feature flags */ + __cpu_detect_caps.hasTSC = (regs2[3] & (1 << 8 )) >> 8; /* 0x0000010 */ + __cpu_detect_caps.hasMMX = (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */ + __cpu_detect_caps.hasSSE = (regs2[3] & (1 << 25 )) >> 25; /* 0x2000000 */ + __cpu_detect_caps.hasSSE2 = (regs2[3] & (1 << 26 )) >> 26; /* 0x4000000 */ + __cpu_detect_caps.hasSSE3 = (regs2[2] & (1)); /* 0x0000001 */ + __cpu_detect_caps.hasSSSE3 = (regs2[2] & (1 << 9 )) >> 9; /* 0x0000020 */ + __cpu_detect_caps.hasMMX2 = __cpu_detect_caps.hasSSE; /* SSE cpus supports mmxext too */ + + cacheline = ((regs2[1] >> 8) & 0xFF) * 8; + if (cacheline > 0) + __cpu_detect_caps.cacheline = cacheline; + } + + cpuid(0x80000000, regs); + + if (regs[0] >= 0x80000001) { + + cpuid(0x80000001, regs2); + + __cpu_detect_caps.hasMMX |= (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */ + __cpu_detect_caps.hasMMX2 |= (regs2[3] & (1 << 22 )) >> 22; /* 0x400000 */ + __cpu_detect_caps.has3DNow = (regs2[3] & (1 << 31 )) >> 31; /* 0x80000000 */ + __cpu_detect_caps.has3DNowExt = (regs2[3] & (1 << 30 )) >> 30; + } + + if (regs[0] >= 0x80000006) { + cpuid(0x80000006, regs2); + __cpu_detect_caps.cacheline = regs2[2] & 0xFF; + } + + +#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_CYGWIN) || defined(OS_OPENBSD) + if (__cpu_detect_caps.hasSSE) + check_os_katmai_support(); + + if (!__cpu_detect_caps.hasSSE) { + __cpu_detect_caps.hasSSE2 = 0; + __cpu_detect_caps.hasSSE3 = 0; + __cpu_detect_caps.hasSSSE3 = 0; + } +#else + __cpu_detect_caps.hasSSE = 0; + __cpu_detect_caps.hasSSE2 = 0; + __cpu_detect_caps.hasSSE3 = 0; + __cpu_detect_caps.hasSSSE3 = 0; +#endif +#endif /* ARCH_X86 */ + +#if defined(ARCH_POWERPC) + check_os_altivec_support(); +#endif /* ARCH_POWERPC */ + + __cpu_detect_initialized = 1; +} + +struct cpu_detect_caps *cpu_detect_get_caps() +{ + return &__cpu_detect_caps; +} + +/* The getters and setters for feature flags */ +int cpu_detect_get_tsc() +{ + return __cpu_detect_caps.hasTSC; +} + +int cpu_detect_get_mmx() +{ + return __cpu_detect_caps.hasMMX; +} + +int cpu_detect_get_mmx2() +{ + return __cpu_detect_caps.hasMMX2; +} + +int cpu_detect_get_sse() +{ + return __cpu_detect_caps.hasSSE; +} + +int cpu_detect_get_sse2() +{ + return __cpu_detect_caps.hasSSE2; +} + +int cpu_detect_get_sse3() +{ + return __cpu_detect_caps.hasSSE3; +} + +int cpu_detect_get_ssse3() +{ + return __cpu_detect_caps.hasSSSE3; +} + +int cpu_detect_get_3dnow() +{ + return __cpu_detect_caps.has3DNow; +} + +int cpu_detect_get_3dnow2() +{ + return __cpu_detect_caps.has3DNowExt; +} + +int cpu_detect_get_altivec() +{ + return __cpu_detect_caps.hasAltiVec; +} + diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h new file mode 100644 index 0000000000..1612d49286 --- /dev/null +++ b/src/gallium/auxiliary/util/u_cpu_detect.h @@ -0,0 +1,78 @@ +/************************************************************************** + * + * Copyright 2008 Dennis Smit + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + ***************************************************************************/ + +/* + * Based on the work of Eric Anholt <anholt@FreeBSD.org> + */ + +#ifndef _CPU_DETECT_H +#define _CPU_DETECT_H + +typedef enum { + CPU_DETECT_TYPE_MIPS, + CPU_DETECT_TYPE_ALPHA, + CPU_DETECT_TYPE_SPARC, + CPU_DETECT_TYPE_X86, + CPU_DETECT_TYPE_POWERPC, + CPU_DETECT_TYPE_OTHER +} cpu_detect_type; + +struct cpu_detect_caps { + cpu_detect_type type; + int nrcpu; + + /* Feature flags */ + int x86cpuType; + int cacheline; + + int hasTSC; + int hasMMX; + int hasMMX2; + int hasSSE; + int hasSSE2; + int hasSSE3; + int hasSSSE3; + int has3DNow; + int has3DNowExt; + int hasAltiVec; +}; + +/* prototypes */ +void cpu_detect_initialize(void); +struct cpu_detect_caps *cpu_detect_get_caps(void); + +int cpu_detect_get_tsc(void); +int cpu_detect_get_mmx(void); +int cpu_detect_get_mmx2(void); +int cpu_detect_get_sse(void); +int cpu_detect_get_sse2(void); +int cpu_detect_get_sse3(void); +int cpu_detect_get_ssse3(void); +int cpu_detect_get_3dnow(void); +int cpu_detect_get_3dnow2(void); +int cpu_detect_get_altivec(void); + +#endif /* _CPU_DETECT_H */ diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h new file mode 100644 index 0000000000..d108d92e52 --- /dev/null +++ b/src/gallium/auxiliary/util/u_double_list.h @@ -0,0 +1,99 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +/** + * \file + * List macros heavily inspired by the Linux kernel + * list handling. No list looping yet. + * + * Is not threadsafe, so common operations need to + * be protected using an external mutex. + */ + +#ifndef _U_DOUBLE_LIST_H_ +#define _U_DOUBLE_LIST_H_ + + +#include <stddef.h> + + +struct list_head +{ + struct list_head *prev; + struct list_head *next; +}; + + +#define LIST_INITHEAD(__item) \ + do { \ + (__item)->prev = (__item); \ + (__item)->next = (__item); \ + } while (0) + +#define LIST_ADD(__item, __list) \ + do { \ + (__item)->prev = (__list); \ + (__item)->next = (__list)->next; \ + (__list)->next->prev = (__item); \ + (__list)->next = (__item); \ + } while (0) + +#define LIST_ADDTAIL(__item, __list) \ + do { \ + (__item)->next = (__list); \ + (__item)->prev = (__list)->prev; \ + (__list)->prev->next = (__item); \ + (__list)->prev = (__item); \ + } while(0) + +#define LIST_REPLACE(__from, __to) \ + do { \ + (__to)->prev = (__from)->prev; \ + (__to)->next = (__from)->next; \ + (__from)->next->prev = (__to); \ + (__from)->prev->next = (__to); \ + } while (0) + +#define LIST_DEL(__item) \ + do { \ + (__item)->prev->next = (__item)->next; \ + (__item)->next->prev = (__item)->prev; \ + } while(0) + +#define LIST_DELINIT(__item) \ + do { \ + (__item)->prev->next = (__item)->next; \ + (__item)->next->prev = (__item)->prev; \ + (__item)->next = (__item); \ + (__item)->prev = (__item); \ + } while(0) + +#define LIST_ENTRY(__type, __item, __field) \ + ((__type *)(((char *)(__item)) - offsetof(__type, __field))) + + +#endif /*_U_DOUBLE_LIST_H_*/ diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c new file mode 100644 index 0000000000..8ecae71b64 --- /dev/null +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -0,0 +1,132 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_draw_quad.h" + + +/** + * Draw a simple vertex buffer / primitive. + * Limited to float[4] vertex attribs, tightly packed. + */ +void +util_draw_vertex_buffer(struct pipe_context *pipe, + struct pipe_buffer *vbuf, + uint prim_type, + uint num_verts, + uint num_attribs) +{ + struct pipe_vertex_buffer vbuffer; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; + uint i; + + assert(num_attribs <= PIPE_MAX_ATTRIBS); + + /* tell pipe about the vertex buffer */ + vbuffer.buffer = vbuf; + vbuffer.pitch = num_attribs * 4 * sizeof(float); /* vertex size */ + vbuffer.buffer_offset = 0; + pipe->set_vertex_buffers(pipe, 1, &vbuffer); + + /* tell pipe about the vertex attributes */ + for (i = 0; i < num_attribs; i++) { + velements[i].src_offset = i * 4 * sizeof(float); + velements[i].vertex_buffer_index = 0; + velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + velements[i].nr_components = 4; + } + pipe->set_vertex_elements(pipe, num_attribs, velements); + + /* draw */ + pipe->draw_arrays(pipe, prim_type, 0, num_verts); +} + + + +/** + * Draw screen-aligned textured quad. + * Note: this function allocs/destroys a vertex buffer and isn't especially + * efficient. + */ +void +util_draw_texquad(struct pipe_context *pipe, + float x0, float y0, float x1, float y1, float z) +{ + struct pipe_buffer *vbuf; + uint numAttribs = 2, vertexBytes, i, j; + + vertexBytes = 4 * (4 * numAttribs * sizeof(float)); + + /* XXX create one-time */ + vbuf = pipe_buffer_create(pipe->screen, 32, + PIPE_BUFFER_USAGE_VERTEX, vertexBytes); + if (vbuf) { + float *v = (float *) pipe_buffer_map(pipe->screen, vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); + if (v) { + /* + * Load vertex buffer + */ + for (i = j = 0; i < 4; i++) { + v[j + 2] = z; /* z */ + v[j + 3] = 1.0; /* w */ + v[j + 6] = 0.0; /* r */ + v[j + 7] = 1.0; /* q */ + j += 8; + } + + v[0] = x0; + v[1] = y0; + v[4] = 0.0; /*s*/ + v[5] = 0.0; /*t*/ + + v[8] = x1; + v[9] = y0; + v[12] = 1.0; + v[13] = 0.0; + + v[16] = x1; + v[17] = y1; + v[20] = 1.0; + v[21] = 1.0; + + v[24] = x0; + v[25] = y1; + v[28] = 0.0; + v[29] = 1.0; + + pipe_buffer_unmap(pipe->screen, vbuf); + util_draw_vertex_buffer(pipe, vbuf, PIPE_PRIM_TRIANGLE_FAN, 4, 2); + } + + pipe_buffer_reference(pipe->screen, &vbuf, NULL); + } +} diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h new file mode 100644 index 0000000000..ec4862ead3 --- /dev/null +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -0,0 +1,54 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_DRAWQUAD_H +#define U_DRAWQUAD_H + + +#ifdef __cplusplus +extern "C" { +#endif + +struct pipe_buffer; + +extern void +util_draw_vertex_buffer(struct pipe_context *pipe, + struct pipe_buffer *vbuf, + uint num_attribs, uint num_verts, uint prim_type); + + +extern void +util_draw_texquad(struct pipe_context *pipe, + float x0, float y0, float x1, float y1, float z); + + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c new file mode 100644 index 0000000000..9d305ad763 --- /dev/null +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -0,0 +1,952 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Mipmap generation utility + * + * @author Brian Paul + */ + + +#include "pipe/p_context.h" +#include "pipe/p_debug.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_shader_tokens.h" + +#include "util/u_memory.h" +#include "util/u_draw_quad.h" +#include "util/u_gen_mipmap.h" +#include "util/u_simple_shaders.h" + +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" + +#include "cso_cache/cso_context.h" + + +struct gen_mipmap_state +{ + struct pipe_context *pipe; + struct cso_context *cso; + + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depthstencil; + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state sampler; + struct pipe_viewport_state viewport; + + struct pipe_shader_state vert_shader; + struct pipe_shader_state frag_shader; + void *vs; + void *fs; + + struct pipe_buffer *vbuf; /**< quad vertices */ + float vertices[4][2][4]; /**< vertex/texcoords for quad */ +}; + + + +enum dtype +{ + UBYTE, + UBYTE_3_3_2, + USHORT, + USHORT_4_4_4_4, + USHORT_5_6_5, + USHORT_1_5_5_5_REV, + UINT, + FLOAT, + HALF_FLOAT +}; + + +typedef ushort half_float; + + +#if 0 +extern half_float +float_to_half(float f); + +extern float +half_to_float(half_float h); +#endif + + +/** + * Average together two rows of a source image to produce a single new + * row in the dest image. It's legal for the two source rows to point + * to the same data. The source width must be equal to either the + * dest width or two times the dest width. + * \param datatype GL_UNSIGNED_BYTE, GL_UNSIGNED_SHORT, GL_FLOAT, etc. + * \param comps number of components per pixel (1..4) + */ +static void +do_row(enum dtype datatype, uint comps, int srcWidth, + const void *srcRowA, const void *srcRowB, + int dstWidth, void *dstRow) +{ + const uint k0 = (srcWidth == dstWidth) ? 0 : 1; + const uint colStride = (srcWidth == dstWidth) ? 1 : 2; + + assert(comps >= 1); + assert(comps <= 4); + + /* This assertion is no longer valid with non-power-of-2 textures + assert(srcWidth == dstWidth || srcWidth == 2 * dstWidth); + */ + + if (datatype == UBYTE && comps == 4) { + uint i, j, k; + const ubyte(*rowA)[4] = (const ubyte(*)[4]) srcRowA; + const ubyte(*rowB)[4] = (const ubyte(*)[4]) srcRowB; + ubyte(*dst)[4] = (ubyte(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; + dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4; + } + } + else if (datatype == UBYTE && comps == 3) { + uint i, j, k; + const ubyte(*rowA)[3] = (const ubyte(*)[3]) srcRowA; + const ubyte(*rowB)[3] = (const ubyte(*)[3]) srcRowB; + ubyte(*dst)[3] = (ubyte(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; + } + } + else if (datatype == UBYTE && comps == 2) { + uint i, j, k; + const ubyte(*rowA)[2] = (const ubyte(*)[2]) srcRowA; + const ubyte(*rowB)[2] = (const ubyte(*)[2]) srcRowB; + ubyte(*dst)[2] = (ubyte(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) >> 2; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) >> 2; + } + } + else if (datatype == UBYTE && comps == 1) { + uint i, j, k; + const ubyte *rowA = (const ubyte *) srcRowA; + const ubyte *rowB = (const ubyte *) srcRowB; + ubyte *dst = (ubyte *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) >> 2; + } + } + + else if (datatype == USHORT && comps == 4) { + uint i, j, k; + const ushort(*rowA)[4] = (const ushort(*)[4]) srcRowA; + const ushort(*rowB)[4] = (const ushort(*)[4]) srcRowB; + ushort(*dst)[4] = (ushort(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; + dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4; + } + } + else if (datatype == USHORT && comps == 3) { + uint i, j, k; + const ushort(*rowA)[3] = (const ushort(*)[3]) srcRowA; + const ushort(*rowB)[3] = (const ushort(*)[3]) srcRowB; + ushort(*dst)[3] = (ushort(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; + } + } + else if (datatype == USHORT && comps == 2) { + uint i, j, k; + const ushort(*rowA)[2] = (const ushort(*)[2]) srcRowA; + const ushort(*rowB)[2] = (const ushort(*)[2]) srcRowB; + ushort(*dst)[2] = (ushort(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + } + } + else if (datatype == USHORT && comps == 1) { + uint i, j, k; + const ushort *rowA = (const ushort *) srcRowA; + const ushort *rowB = (const ushort *) srcRowB; + ushort *dst = (ushort *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) / 4; + } + } + + else if (datatype == FLOAT && comps == 4) { + uint i, j, k; + const float(*rowA)[4] = (const float(*)[4]) srcRowA; + const float(*rowB)[4] = (const float(*)[4]) srcRowB; + float(*dst)[4] = (float(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + + rowB[j][0] + rowB[k][0]) * 0.25F; + dst[i][1] = (rowA[j][1] + rowA[k][1] + + rowB[j][1] + rowB[k][1]) * 0.25F; + dst[i][2] = (rowA[j][2] + rowA[k][2] + + rowB[j][2] + rowB[k][2]) * 0.25F; + dst[i][3] = (rowA[j][3] + rowA[k][3] + + rowB[j][3] + rowB[k][3]) * 0.25F; + } + } + else if (datatype == FLOAT && comps == 3) { + uint i, j, k; + const float(*rowA)[3] = (const float(*)[3]) srcRowA; + const float(*rowB)[3] = (const float(*)[3]) srcRowB; + float(*dst)[3] = (float(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + + rowB[j][0] + rowB[k][0]) * 0.25F; + dst[i][1] = (rowA[j][1] + rowA[k][1] + + rowB[j][1] + rowB[k][1]) * 0.25F; + dst[i][2] = (rowA[j][2] + rowA[k][2] + + rowB[j][2] + rowB[k][2]) * 0.25F; + } + } + else if (datatype == FLOAT && comps == 2) { + uint i, j, k; + const float(*rowA)[2] = (const float(*)[2]) srcRowA; + const float(*rowB)[2] = (const float(*)[2]) srcRowB; + float(*dst)[2] = (float(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + + rowB[j][0] + rowB[k][0]) * 0.25F; + dst[i][1] = (rowA[j][1] + rowA[k][1] + + rowB[j][1] + rowB[k][1]) * 0.25F; + } + } + else if (datatype == FLOAT && comps == 1) { + uint i, j, k; + const float *rowA = (const float *) srcRowA; + const float *rowB = (const float *) srcRowB; + float *dst = (float *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) * 0.25F; + } + } + +#if 0 + else if (datatype == HALF_FLOAT && comps == 4) { + uint i, j, k, comp; + const half_float(*rowA)[4] = (const half_float(*)[4]) srcRowA; + const half_float(*rowB)[4] = (const half_float(*)[4]) srcRowB; + half_float(*dst)[4] = (half_float(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + for (comp = 0; comp < 4; comp++) { + float aj, ak, bj, bk; + aj = half_to_float(rowA[j][comp]); + ak = half_to_float(rowA[k][comp]); + bj = half_to_float(rowB[j][comp]); + bk = half_to_float(rowB[k][comp]); + dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F); + } + } + } + else if (datatype == HALF_FLOAT && comps == 3) { + uint i, j, k, comp; + const half_float(*rowA)[3] = (const half_float(*)[3]) srcRowA; + const half_float(*rowB)[3] = (const half_float(*)[3]) srcRowB; + half_float(*dst)[3] = (half_float(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + for (comp = 0; comp < 3; comp++) { + float aj, ak, bj, bk; + aj = half_to_float(rowA[j][comp]); + ak = half_to_float(rowA[k][comp]); + bj = half_to_float(rowB[j][comp]); + bk = half_to_float(rowB[k][comp]); + dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F); + } + } + } + else if (datatype == HALF_FLOAT && comps == 2) { + uint i, j, k, comp; + const half_float(*rowA)[2] = (const half_float(*)[2]) srcRowA; + const half_float(*rowB)[2] = (const half_float(*)[2]) srcRowB; + half_float(*dst)[2] = (half_float(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + for (comp = 0; comp < 2; comp++) { + float aj, ak, bj, bk; + aj = half_to_float(rowA[j][comp]); + ak = half_to_float(rowA[k][comp]); + bj = half_to_float(rowB[j][comp]); + bk = half_to_float(rowB[k][comp]); + dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F); + } + } + } + else if (datatype == HALF_FLOAT && comps == 1) { + uint i, j, k; + const half_float *rowA = (const half_float *) srcRowA; + const half_float *rowB = (const half_float *) srcRowB; + half_float *dst = (half_float *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + float aj, ak, bj, bk; + aj = half_to_float(rowA[j]); + ak = half_to_float(rowA[k]); + bj = half_to_float(rowB[j]); + bk = half_to_float(rowB[k]); + dst[i] = float_to_half((aj + ak + bj + bk) * 0.25F); + } + } +#endif + + else if (datatype == UINT && comps == 1) { + uint i, j, k; + const uint *rowA = (const uint *) srcRowA; + const uint *rowB = (const uint *) srcRowB; + uint *dst = (uint *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = rowA[j] / 4 + rowA[k] / 4 + rowB[j] / 4 + rowB[k] / 4; + } + } + + else if (datatype == USHORT_5_6_5 && comps == 3) { + uint i, j, k; + const ushort *rowA = (const ushort *) srcRowA; + const ushort *rowB = (const ushort *) srcRowB; + ushort *dst = (ushort *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x1f; + const int rowAr1 = rowA[k] & 0x1f; + const int rowBr0 = rowB[j] & 0x1f; + const int rowBr1 = rowB[k] & 0x1f; + const int rowAg0 = (rowA[j] >> 5) & 0x3f; + const int rowAg1 = (rowA[k] >> 5) & 0x3f; + const int rowBg0 = (rowB[j] >> 5) & 0x3f; + const int rowBg1 = (rowB[k] >> 5) & 0x3f; + const int rowAb0 = (rowA[j] >> 11) & 0x1f; + const int rowAb1 = (rowA[k] >> 11) & 0x1f; + const int rowBb0 = (rowB[j] >> 11) & 0x1f; + const int rowBb1 = (rowB[k] >> 11) & 0x1f; + const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + dst[i] = (blue << 11) | (green << 5) | red; + } + } + else if (datatype == USHORT_4_4_4_4 && comps == 4) { + uint i, j, k; + const ushort *rowA = (const ushort *) srcRowA; + const ushort *rowB = (const ushort *) srcRowB; + ushort *dst = (ushort *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0xf; + const int rowAr1 = rowA[k] & 0xf; + const int rowBr0 = rowB[j] & 0xf; + const int rowBr1 = rowB[k] & 0xf; + const int rowAg0 = (rowA[j] >> 4) & 0xf; + const int rowAg1 = (rowA[k] >> 4) & 0xf; + const int rowBg0 = (rowB[j] >> 4) & 0xf; + const int rowBg1 = (rowB[k] >> 4) & 0xf; + const int rowAb0 = (rowA[j] >> 8) & 0xf; + const int rowAb1 = (rowA[k] >> 8) & 0xf; + const int rowBb0 = (rowB[j] >> 8) & 0xf; + const int rowBb1 = (rowB[k] >> 8) & 0xf; + const int rowAa0 = (rowA[j] >> 12) & 0xf; + const int rowAa1 = (rowA[k] >> 12) & 0xf; + const int rowBa0 = (rowB[j] >> 12) & 0xf; + const int rowBa1 = (rowB[k] >> 12) & 0xf; + const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2; + dst[i] = (alpha << 12) | (blue << 8) | (green << 4) | red; + } + } + else if (datatype == USHORT_1_5_5_5_REV && comps == 4) { + uint i, j, k; + const ushort *rowA = (const ushort *) srcRowA; + const ushort *rowB = (const ushort *) srcRowB; + ushort *dst = (ushort *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x1f; + const int rowAr1 = rowA[k] & 0x1f; + const int rowBr0 = rowB[j] & 0x1f; + const int rowBr1 = rowB[k] & 0xf; + const int rowAg0 = (rowA[j] >> 5) & 0x1f; + const int rowAg1 = (rowA[k] >> 5) & 0x1f; + const int rowBg0 = (rowB[j] >> 5) & 0x1f; + const int rowBg1 = (rowB[k] >> 5) & 0x1f; + const int rowAb0 = (rowA[j] >> 10) & 0x1f; + const int rowAb1 = (rowA[k] >> 10) & 0x1f; + const int rowBb0 = (rowB[j] >> 10) & 0x1f; + const int rowBb1 = (rowB[k] >> 10) & 0x1f; + const int rowAa0 = (rowA[j] >> 15) & 0x1; + const int rowAa1 = (rowA[k] >> 15) & 0x1; + const int rowBa0 = (rowB[j] >> 15) & 0x1; + const int rowBa1 = (rowB[k] >> 15) & 0x1; + const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2; + dst[i] = (alpha << 15) | (blue << 10) | (green << 5) | red; + } + } + else if (datatype == UBYTE_3_3_2 && comps == 3) { + uint i, j, k; + const ubyte *rowA = (const ubyte *) srcRowA; + const ubyte *rowB = (const ubyte *) srcRowB; + ubyte *dst = (ubyte *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x3; + const int rowAr1 = rowA[k] & 0x3; + const int rowBr0 = rowB[j] & 0x3; + const int rowBr1 = rowB[k] & 0x3; + const int rowAg0 = (rowA[j] >> 2) & 0x7; + const int rowAg1 = (rowA[k] >> 2) & 0x7; + const int rowBg0 = (rowB[j] >> 2) & 0x7; + const int rowBg1 = (rowB[k] >> 2) & 0x7; + const int rowAb0 = (rowA[j] >> 5) & 0x7; + const int rowAb1 = (rowA[k] >> 5) & 0x7; + const int rowBb0 = (rowB[j] >> 5) & 0x7; + const int rowBb1 = (rowB[k] >> 5) & 0x7; + const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + dst[i] = (blue << 5) | (green << 2) | red; + } + } + else { + debug_printf("bad format in do_row()"); + } +} + + +static void +format_to_type_comps(enum pipe_format pformat, + enum dtype *datatype, uint *comps) +{ + switch (pformat) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + *datatype = UBYTE; + *comps = 4; + return; + case PIPE_FORMAT_A1R5G5B5_UNORM: + *datatype = USHORT_1_5_5_5_REV; + *comps = 4; + return; + case PIPE_FORMAT_A4R4G4B4_UNORM: + *datatype = USHORT_4_4_4_4; + *comps = 4; + return; + case PIPE_FORMAT_R5G6B5_UNORM: + *datatype = USHORT_5_6_5; + *comps = 3; + return; + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + *datatype = UBYTE; + *comps = 1; + return; + case PIPE_FORMAT_A8L8_UNORM: + *datatype = UBYTE; + *comps = 2; + return; + default: + assert(0); + *datatype = UBYTE; + *comps = 0; + break; + } +} + + +static void +reduce_1d(enum pipe_format pformat, + int srcWidth, const ubyte *srcPtr, + int dstWidth, ubyte *dstPtr) +{ + enum dtype datatype; + uint comps; + + format_to_type_comps(pformat, &datatype, &comps); + + /* we just duplicate the input row, kind of hack, saves code */ + do_row(datatype, comps, + srcWidth, srcPtr, srcPtr, + dstWidth, dstPtr); +} + + +/** + * Strides are in bytes. If zero, it'll be computed as width * bpp. + */ +static void +reduce_2d(enum pipe_format pformat, + int srcWidth, int srcHeight, + int srcRowStride, const ubyte *srcPtr, + int dstWidth, int dstHeight, + int dstRowStride, ubyte *dstPtr) +{ + enum dtype datatype; + uint comps; + const int bpt = pf_get_size(pformat); + const ubyte *srcA, *srcB; + ubyte *dst; + int row; + + format_to_type_comps(pformat, &datatype, &comps); + + if (!srcRowStride) + srcRowStride = bpt * srcWidth; + + if (!dstRowStride) + dstRowStride = bpt * dstWidth; + + /* Compute src and dst pointers */ + srcA = srcPtr; + if (srcHeight > 1) + srcB = srcA + srcRowStride; + else + srcB = srcA; + dst = dstPtr; + + for (row = 0; row < dstHeight; row++) { + do_row(datatype, comps, + srcWidth, srcA, srcB, + dstWidth, dst); + srcA += 2 * srcRowStride; + srcB += 2 * srcRowStride; + dst += dstRowStride; + } +} + + +static void +make_1d_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + const uint zslice = 0; + uint dstLevel; + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + struct pipe_surface *srcSurf, *dstSurf; + void *srcMap, *dstMap; + + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); + + srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, + PIPE_BUFFER_USAGE_CPU_READ) + + srcSurf->offset); + dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE) + + dstSurf->offset); + + reduce_1d(pt->format, + srcSurf->width, srcMap, + dstSurf->width, dstMap); + + pipe_buffer_unmap(screen, srcSurf->buffer); + pipe_buffer_unmap(screen, dstSurf->buffer); + + pipe_surface_reference(&srcSurf, NULL); + pipe_surface_reference(&dstSurf, NULL); + } +} + + +static void +make_2d_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + const uint zslice = 0; + uint dstLevel; + + assert(pt->block.width == 1); + assert(pt->block.height == 1); + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + struct pipe_surface *srcSurf, *dstSurf; + ubyte *srcMap, *dstMap; + + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); + + srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, + PIPE_BUFFER_USAGE_CPU_READ) + + srcSurf->offset); + dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE) + + dstSurf->offset); + + reduce_2d(pt->format, + srcSurf->width, srcSurf->height, + srcSurf->stride, srcMap, + dstSurf->width, dstSurf->height, + dstSurf->stride, dstMap); + + pipe_buffer_unmap(screen, srcSurf->buffer); + pipe_buffer_unmap(screen, dstSurf->buffer); + + pipe_surface_reference(&srcSurf, NULL); + pipe_surface_reference(&dstSurf, NULL); + } +} + + +static void +make_3d_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ +} + + +static void +fallback_gen_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ + switch (pt->target) { + case PIPE_TEXTURE_1D: + make_1d_mipmap(ctx, pt, face, baseLevel, lastLevel); + break; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: + make_2d_mipmap(ctx, pt, face, baseLevel, lastLevel); + break; + case PIPE_TEXTURE_3D: + make_3d_mipmap(ctx, pt, face, baseLevel, lastLevel); + break; + default: + assert(0); + } +} + + +/** + * Create a mipmap generation context. + * The idea is to create one of these and re-use it each time we need to + * generate a mipmap. + */ +struct gen_mipmap_state * +util_create_gen_mipmap(struct pipe_context *pipe, + struct cso_context *cso) +{ + struct gen_mipmap_state *ctx; + uint i; + + ctx = CALLOC_STRUCT(gen_mipmap_state); + if (!ctx) + return NULL; + + ctx->pipe = pipe; + ctx->cso = cso; + + /* disabled blending/masking */ + memset(&ctx->blend, 0, sizeof(ctx->blend)); + ctx->blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + ctx->blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + ctx->blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + ctx->blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + ctx->blend.colormask = PIPE_MASK_RGBA; + + /* no-op depth/stencil/alpha */ + memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil)); + + /* rasterizer */ + memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer)); + ctx->rasterizer.front_winding = PIPE_WINDING_CW; + ctx->rasterizer.cull_mode = PIPE_WINDING_NONE; + ctx->rasterizer.bypass_clipping = 1; + /*ctx->rasterizer.bypass_vs = 1;*/ + ctx->rasterizer.gl_rasterization_rules = 1; + + /* sampler state */ + memset(&ctx->sampler, 0, sizeof(ctx->sampler)); + ctx->sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + ctx->sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + ctx->sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; + ctx->sampler.normalized_coords = 1; + + /* viewport state (identity, verts are in wincoords) */ + ctx->viewport.scale[0] = 1.0; + ctx->viewport.scale[1] = 1.0; + ctx->viewport.scale[2] = 1.0; + ctx->viewport.scale[3] = 1.0; + ctx->viewport.translate[0] = 0.0; + ctx->viewport.translate[1] = 0.0; + ctx->viewport.translate[2] = 0.0; + ctx->viewport.translate[3] = 0.0; + + /* vertex shader */ + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indexes[] = { 0, 0 }; + ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, + semantic_indexes, + &ctx->vert_shader); + } + + /* fragment shader */ + ctx->fs = util_make_fragment_tex_shader(pipe, &ctx->frag_shader); + + ctx->vbuf = pipe_buffer_create(pipe->screen, + 32, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(ctx->vertices)); + if (!ctx->vbuf) { + FREE(ctx); + return NULL; + } + + /* vertex data that doesn't change */ + for (i = 0; i < 4; i++) { + ctx->vertices[i][0][2] = 0.0f; /* z */ + ctx->vertices[i][0][3] = 1.0f; /* w */ + ctx->vertices[i][1][2] = 0.0f; /* r */ + ctx->vertices[i][1][3] = 1.0f; /* q */ + } + + return ctx; +} + + +static void +set_vertex_data(struct gen_mipmap_state *ctx, float width, float height) +{ + void *buf; + + ctx->vertices[0][0][0] = 0.0f; /*x*/ + ctx->vertices[0][0][1] = 0.0f; /*y*/ + ctx->vertices[0][1][0] = 0.0f; /*s*/ + ctx->vertices[0][1][1] = 0.0f; /*t*/ + + ctx->vertices[1][0][0] = width; + ctx->vertices[1][0][1] = 0.0f; + ctx->vertices[1][1][0] = 1.0f; + ctx->vertices[1][1][1] = 0.0f; + + ctx->vertices[2][0][0] = width; + ctx->vertices[2][0][1] = height; + ctx->vertices[2][1][0] = 1.0f; + ctx->vertices[2][1][1] = 1.0f; + + ctx->vertices[3][0][0] = 0.0f; + ctx->vertices[3][0][1] = height; + ctx->vertices[3][1][0] = 0.0f; + ctx->vertices[3][1][1] = 1.0f; + + buf = pipe_buffer_map(ctx->pipe->screen, ctx->vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); + + memcpy(buf, ctx->vertices, sizeof(ctx->vertices)); + + pipe_buffer_unmap(ctx->pipe->screen, ctx->vbuf); +} + + + +/** + * Destroy a mipmap generation context + */ +void +util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + + pipe->delete_vs_state(pipe, ctx->vs); + pipe->delete_fs_state(pipe, ctx->fs); + + FREE((void*) ctx->vert_shader.tokens); + FREE((void*) ctx->frag_shader.tokens); + + pipe_buffer_reference(pipe->screen, &ctx->vbuf, NULL); + + FREE(ctx); +} + + +/** + * Generate mipmap images. It's assumed all needed texture memory is + * already allocated. + * + * \param pt the texture to generate mipmap levels for + * \param face which cube face to generate mipmaps for (0 for non-cube maps) + * \param baseLevel the first mipmap level to use as a src + * \param lastLevel the last mipmap level to generate + * \param filter the minification filter used to generate mipmap levels with + * \param filter one of PIPE_TEX_FILTER_LINEAR, PIPE_TEX_FILTER_NEAREST + */ +void +util_gen_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel, uint filter) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_framebuffer_state fb; + uint dstLevel; + uint zslice = 0; + + /* check if we can render in the texture's format */ + if (!screen->is_format_supported(screen, pt->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { + fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel); + return; + } + + /* save state (restored below) */ + cso_save_blend(ctx->cso); + cso_save_depth_stencil_alpha(ctx->cso); + cso_save_rasterizer(ctx->cso); + cso_save_samplers(ctx->cso); + cso_save_sampler_textures(ctx->cso); + cso_save_framebuffer(ctx->cso); + cso_save_fragment_shader(ctx->cso); + cso_save_vertex_shader(ctx->cso); + cso_save_viewport(ctx->cso); + + /* bind our state */ + cso_set_blend(ctx->cso, &ctx->blend); + cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); + cso_set_rasterizer(ctx->cso, &ctx->rasterizer); + cso_set_viewport(ctx->cso, &ctx->viewport); + + cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_vertex_shader_handle(ctx->cso, ctx->vs); + + /* init framebuffer state */ + memset(&fb, 0, sizeof(fb)); + fb.num_cbufs = 1; + + /* set min/mag to same filter for faster sw speed */ + ctx->sampler.mag_img_filter = filter; + ctx->sampler.min_img_filter = filter; + + /* + * XXX for small mipmap levels, it may be faster to use the software + * fallback path... + */ + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + + struct pipe_surface *surf = + screen->get_tex_surface(screen, pt, face, dstLevel, zslice, + PIPE_BUFFER_USAGE_GPU_WRITE); + + /* + * Setup framebuffer / dest surface + */ + fb.cbufs[0] = surf; + fb.width = pt->width[dstLevel]; + fb.height = pt->height[dstLevel]; + cso_set_framebuffer(ctx->cso, &fb); + + /* + * Setup sampler state + * Note: we should only have to set the min/max LOD clamps to ensure + * we grab texels from the right mipmap level. But some hardware + * has trouble with min clamping so we also set the lod_bias to + * try to work around that. + */ + ctx->sampler.min_lod = ctx->sampler.max_lod = (float) srcLevel; + ctx->sampler.lod_bias = (float) srcLevel; + cso_single_sampler(ctx->cso, 0, &ctx->sampler); + cso_single_sampler_done(ctx->cso); + + cso_set_sampler_textures(ctx->cso, 1, &pt); + + /* quad coords in window coords (bypassing clipping, viewport mapping) */ + set_vertex_data(ctx, + (float) pt->width[dstLevel], + (float) pt->height[dstLevel]); + util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + + /* need to signal that the texture has changed _after_ rendering to it */ + pipe_surface_reference( &surf, NULL ); + } + + /* restore state we changed */ + cso_restore_blend(ctx->cso); + cso_restore_depth_stencil_alpha(ctx->cso); + cso_restore_rasterizer(ctx->cso); + cso_restore_samplers(ctx->cso); + cso_restore_sampler_textures(ctx->cso); + cso_restore_framebuffer(ctx->cso); + cso_restore_fragment_shader(ctx->cso); + cso_restore_vertex_shader(ctx->cso); + cso_restore_viewport(ctx->cso); +} diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h new file mode 100644 index 0000000000..3277024f07 --- /dev/null +++ b/src/gallium/auxiliary/util/u_gen_mipmap.h @@ -0,0 +1,65 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_GENMIPMAP_H +#define U_GENMIPMAP_H + +#include "pipe/p_state.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pipe_context; +struct pipe_texture; +struct cso_context; + +struct gen_mipmap_state; + + +extern struct gen_mipmap_state * +util_create_gen_mipmap(struct pipe_context *pipe, struct cso_context *cso); + + +extern void +util_destroy_gen_mipmap(struct gen_mipmap_state *ctx); + + + +extern void +util_gen_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel, uint filter); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c new file mode 100644 index 0000000000..2c40011923 --- /dev/null +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -0,0 +1,275 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Generic handle table implementation. + * + * @author José Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" + +#include "util/u_memory.h" +#include "util/u_handle_table.h" + + +#define HANDLE_TABLE_INITIAL_SIZE 16 + + +struct handle_table +{ + /** Object array. Empty handles have a null object */ + void **objects; + + /** Number of objects the handle can currently hold */ + unsigned size; + /** Number of consecutive objects allocated at the start of the table */ + unsigned filled; + + /** Optional object destructor */ + void (*destroy)(void *object); +}; + + +struct handle_table * +handle_table_create(void) +{ + struct handle_table *ht; + + ht = MALLOC_STRUCT(handle_table); + if(!ht) + return NULL; + + ht->objects = (void **)CALLOC(HANDLE_TABLE_INITIAL_SIZE, sizeof(void *)); + if(!ht->objects) { + FREE(ht); + return NULL; + } + + ht->size = HANDLE_TABLE_INITIAL_SIZE; + ht->filled = 0; + + ht->destroy = NULL; + + return ht; +} + + +void +handle_table_set_destroy(struct handle_table *ht, + void (*destroy)(void *object)) +{ + assert(ht); + ht->destroy = destroy; +} + + +/** + * Resize the table if necessary + */ +static INLINE int +handle_table_resize(struct handle_table *ht, + unsigned minimum_size) +{ + unsigned new_size; + void **new_objects; + + if(ht->size > minimum_size) + return ht->size; + + new_size = ht->size; + while(!(new_size > minimum_size)) + new_size *= 2; + assert(new_size); + + new_objects = (void **)REALLOC((void *)ht->objects, + ht->size*sizeof(void *), + new_size*sizeof(void *)); + if(!new_objects) + return 0; + + memset(new_objects + ht->size, 0, (new_size - ht->size)*sizeof(void *)); + + ht->size = new_size; + ht->objects = new_objects; + + return ht->size; +} + + +unsigned +handle_table_add(struct handle_table *ht, + void *object) +{ + unsigned index; + unsigned handle; + + assert(ht); + assert(object); + if(!object) + return 0; + + /* linear search for an empty handle */ + while(ht->filled < ht->size) { + if(!ht->objects[ht->filled]) + break; + ++ht->filled; + } + + index = ht->filled; + handle = index + 1; + + /* check integer overflow */ + if(!handle) + return 0; + + /* grow the table if necessary */ + if(!handle_table_resize(ht, index)) + return 0; + + assert(!ht->objects[index]); + ht->objects[index] = object; + ++ht->filled; + + return handle; +} + + +unsigned +handle_table_set(struct handle_table *ht, + unsigned handle, + void *object) +{ + unsigned index; + + assert(ht); + assert(handle); + if(!handle) + return 0; + + assert(object); + if(!object) + return 0; + + index = handle - 1; + + /* grow the table if necessary */ + if(!handle_table_resize(ht, index)) + return 0; + + if(ht->objects[index] && ht->destroy) + ht->destroy(ht->objects[index]); + + ht->objects[index] = object; + + return handle; +} + + +void * +handle_table_get(struct handle_table *ht, + unsigned handle) +{ + void *object; + + assert(ht); + assert(handle); + if(!handle || handle > ht->size) + return NULL; + + object = ht->objects[handle - 1]; + + return object; +} + + +void +handle_table_remove(struct handle_table *ht, + unsigned handle) +{ + void *object; + unsigned index; + + assert(ht); + assert(handle); + if(!handle || handle > ht->size) + return; + + index = handle - 1; + object = ht->objects[index]; + if(!object) + return; + + if(ht->destroy) + ht->destroy(object); + + ht->objects[index] = NULL; + if(index < ht->filled) + ht->filled = index; +} + + +unsigned +handle_table_get_next_handle(struct handle_table *ht, + unsigned handle) +{ + unsigned index; + + for(index = handle; index < ht->size; ++index) { + if(ht->objects[index]) + return index + 1; + } + + return 0; +} + + +unsigned +handle_table_get_first_handle(struct handle_table *ht) +{ + return handle_table_get_next_handle(ht, 0); +} + + +void +handle_table_destroy(struct handle_table *ht) +{ + unsigned index; + assert(ht); + + if(ht->destroy) + for(index = 0; index < ht->size; ++index) + if(ht->objects[index]) + ht->destroy(ht->objects[index]); + + FREE(ht->objects); + FREE(ht); +} + diff --git a/src/gallium/auxiliary/util/u_handle_table.h b/src/gallium/auxiliary/util/u_handle_table.h new file mode 100644 index 0000000000..d080135c9f --- /dev/null +++ b/src/gallium/auxiliary/util/u_handle_table.h @@ -0,0 +1,116 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Generic handle table. + * + * @author José Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef U_HANDLE_TABLE_H_ +#define U_HANDLE_TABLE_H_ + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Abstract data type to map integer handles to objects. + * + * Also referred as "pointer array". + */ +struct handle_table; + + +struct handle_table * +handle_table_create(void); + + +/** + * Set an optional destructor callback. + * + * If set, it will be called during handle_table_remove and + * handle_table_destroy calls. + */ +void +handle_table_set_destroy(struct handle_table *ht, + void (*destroy)(void *object)); + + +/** + * Add a new object. + * + * Returns a zero handle on failure (out of memory). + */ +unsigned +handle_table_add(struct handle_table *ht, + void *object); + +/** + * Returns zero on failure (out of memory). + */ +unsigned +handle_table_set(struct handle_table *ht, + unsigned handle, + void *object); + +/** + * Fetch an existing object. + * + * Returns NULL for an invalid handle. + */ +void * +handle_table_get(struct handle_table *ht, + unsigned handle); + + +void +handle_table_remove(struct handle_table *ht, + unsigned handle); + + +void +handle_table_destroy(struct handle_table *ht); + + +unsigned +handle_table_get_first_handle(struct handle_table *ht); + + +unsigned +handle_table_get_next_handle(struct handle_table *ht, + unsigned handle); + + +#ifdef __cplusplus +} +#endif + +#endif /* U_HANDLE_TABLE_H_ */ diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c new file mode 100644 index 0000000000..0bc8de9632 --- /dev/null +++ b/src/gallium/auxiliary/util/u_hash_table.c @@ -0,0 +1,280 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * General purpose hash table implementation. + * + * Just uses the cso_hash for now, but it might be better switch to a linear + * probing hash table implementation at some point -- as it is said they have + * better lookup and cache performance and it appears to be possible to write + * a lock-free implementation of such hash tables . + * + * @author José Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" + +#include "cso_cache/cso_hash.h" + +#include "util/u_memory.h" +#include "util/u_hash_table.h" + + +struct hash_table +{ + struct cso_hash *cso; + + /** Hash function */ + unsigned (*hash)(void *key); + + /** Compare two keys */ + int (*compare)(void *key1, void *key2); + + /* TODO: key, value destructors? */ +}; + + +struct hash_table_item +{ + void *key; + void *value; +}; + + +static INLINE struct hash_table_item * +hash_table_item(struct cso_hash_iter iter) +{ + return (struct hash_table_item *)cso_hash_iter_data(iter); +} + + +struct hash_table * +hash_table_create(unsigned (*hash)(void *key), + int (*compare)(void *key1, void *key2)) +{ + struct hash_table *ht; + + ht = MALLOC_STRUCT(hash_table); + if(!ht) + return NULL; + + ht->cso = cso_hash_create(); + if(!ht->cso) { + FREE(ht); + return NULL; + } + + ht->hash = hash; + ht->compare = compare; + + return ht; +} + + +static INLINE struct cso_hash_iter +hash_table_find_iter(struct hash_table *ht, + void *key, + unsigned key_hash) +{ + struct cso_hash_iter iter; + struct hash_table_item *item; + + iter = cso_hash_find(ht->cso, key_hash); + while (!cso_hash_iter_is_null(iter)) { + item = (struct hash_table_item *)cso_hash_iter_data(iter); + if (!ht->compare(item->key, key)) + break; + iter = cso_hash_iter_next(iter); + } + + return iter; +} + + +static INLINE struct hash_table_item * +hash_table_find_item(struct hash_table *ht, + void *key, + unsigned key_hash) +{ + struct cso_hash_iter iter; + struct hash_table_item *item; + + iter = cso_hash_find(ht->cso, key_hash); + while (!cso_hash_iter_is_null(iter)) { + item = (struct hash_table_item *)cso_hash_iter_data(iter); + if (!ht->compare(item->key, key)) + return item; + iter = cso_hash_iter_next(iter); + } + + return NULL; +} + + +enum pipe_error +hash_table_set(struct hash_table *ht, + void *key, + void *value) +{ + unsigned key_hash; + struct hash_table_item *item; + struct cso_hash_iter iter; + + assert(ht); + + key_hash = ht->hash(key); + + item = hash_table_find_item(ht, key, key_hash); + if(item) { + /* TODO: key/value destruction? */ + item->value = value; + return PIPE_OK; + } + + item = MALLOC_STRUCT(hash_table_item); + if(!item) + return PIPE_ERROR_OUT_OF_MEMORY; + + item->key = key; + item->value = value; + + iter = cso_hash_insert(ht->cso, key_hash, item); + if(cso_hash_iter_is_null(iter)) { + FREE(item); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + return PIPE_OK; +} + + +void * +hash_table_get(struct hash_table *ht, + void *key) +{ + unsigned key_hash; + struct hash_table_item *item; + + assert(ht); + + key_hash = ht->hash(key); + + item = hash_table_find_item(ht, key, key_hash); + if(!item) + return NULL; + + return item->value; +} + + +void +hash_table_remove(struct hash_table *ht, + void *key) +{ + unsigned key_hash; + struct cso_hash_iter iter; + struct hash_table_item *item; + + assert(ht); + + key_hash = ht->hash(key); + + iter = hash_table_find_iter(ht, key, key_hash); + if(cso_hash_iter_is_null(iter)) + return; + + item = hash_table_item(iter); + assert(item); + FREE(item); + + cso_hash_erase(ht->cso, iter); +} + + +void +hash_table_clear(struct hash_table *ht) +{ + struct cso_hash_iter iter; + struct hash_table_item *item; + + assert(ht); + + iter = cso_hash_first_node(ht->cso); + while (!cso_hash_iter_is_null(iter)) { + item = (struct hash_table_item *)cso_hash_take(ht->cso, cso_hash_iter_key(iter)); + FREE(item); + iter = cso_hash_first_node(ht->cso); + } +} + + +enum pipe_error +hash_table_foreach(struct hash_table *ht, + enum pipe_error (*callback)(void *key, void *value, void *data), + void *data) +{ + struct cso_hash_iter iter; + struct hash_table_item *item; + enum pipe_error result; + + assert(ht); + + iter = cso_hash_first_node(ht->cso); + while (!cso_hash_iter_is_null(iter)) { + item = (struct hash_table_item *)cso_hash_iter_data(iter); + result = callback(item->key, item->value, data); + if(result != PIPE_OK) + return result; + iter = cso_hash_iter_next(iter); + } + + return PIPE_OK; +} + + +void +hash_table_destroy(struct hash_table *ht) +{ + struct cso_hash_iter iter; + struct hash_table_item *item; + + assert(ht); + + iter = cso_hash_first_node(ht->cso); + while (!cso_hash_iter_is_null(iter)) { + item = (struct hash_table_item *)cso_hash_iter_data(iter); + FREE(item); + iter = cso_hash_iter_next(iter); + } + + cso_hash_delete(ht->cso); + + FREE(ht); +} diff --git a/src/gallium/auxiliary/util/u_hash_table.h b/src/gallium/auxiliary/util/u_hash_table.h new file mode 100644 index 0000000000..feee881582 --- /dev/null +++ b/src/gallium/auxiliary/util/u_hash_table.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * General purpose hash table. + * + * @author José Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef U_HASH_TABLE_H_ +#define U_HASH_TABLE_H_ + + +#include "pipe/p_error.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Generic purpose hash table. + */ +struct hash_table; + + +/** + * Create an hash table. + * + * @param hash hash function + * @param compare should return 0 for two equal keys. + */ +struct hash_table * +hash_table_create(unsigned (*hash)(void *key), + int (*compare)(void *key1, void *key2)); + + +enum pipe_error +hash_table_set(struct hash_table *ht, + void *key, + void *value); + +void * +hash_table_get(struct hash_table *ht, + void *key); + + +void +hash_table_remove(struct hash_table *ht, + void *key); + + +void +hash_table_clear(struct hash_table *ht); + + +enum pipe_error +hash_table_foreach(struct hash_table *ht, + enum pipe_error (*callback)(void *key, void *value, void *data), + void *data); + +void +hash_table_destroy(struct hash_table *ht); + + +#ifdef __cplusplus +} +#endif + +#endif /* U_HASH_TABLE_H_ */ diff --git a/src/gallium/auxiliary/util/u_keymap.c b/src/gallium/auxiliary/util/u_keymap.c new file mode 100644 index 0000000000..01b17ddb1b --- /dev/null +++ b/src/gallium/auxiliary/util/u_keymap.c @@ -0,0 +1,309 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Key lookup/associative container. + * + * Like Jose's u_hash_table, based on CSO cache code for now. + * + * Author: Brian Paul + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_error.h" + +#include "cso_cache/cso_hash.h" + +#include "util/u_memory.h" +#include "util/u_keymap.h" + + +struct keymap +{ + struct cso_hash *cso; + unsigned key_size; + unsigned max_entries; /* XXX not obeyed net */ + unsigned num_entries; + keymap_delete_func delete_func; +}; + + +struct keymap_item +{ + void *key, *value; +}; + + +/** + * This the default key-delete function used when the client doesn't + * provide one. + */ +static void +default_delete_func(const struct keymap *map, + const void *key, void *data, void *user) +{ + FREE((void*) data); +} + + +static INLINE struct keymap_item * +hash_table_item(struct cso_hash_iter iter) +{ + return (struct keymap_item *) cso_hash_iter_data(iter); +} + + +/** + * Return 4-byte hash key for a block of bytes. + */ +static unsigned +hash(const void *key, unsigned keySize) +{ + unsigned i, hash; + + keySize /= 4; /* convert from bytes to uints */ + + hash = 0; + for (i = 0; i < keySize; i++) { + hash ^= (i + 1) * ((const unsigned *) key)[i]; + } + + /*hash = hash ^ (hash >> 11) ^ (hash >> 22);*/ + + return hash; +} + + +/** + * Create a new map. + * \param keySize size of the keys in bytes + * \param maxEntries max number of entries to allow (~0 = infinity) + * \param deleteFunc optional callback to call when entries + * are deleted/replaced + */ +struct keymap * +util_new_keymap(unsigned keySize, unsigned maxEntries, + keymap_delete_func deleteFunc) +{ + struct keymap *map = MALLOC_STRUCT(keymap); + if (!map) + return NULL; + + map->cso = cso_hash_create(); + if (!map->cso) { + FREE(map); + return NULL; + } + + map->max_entries = maxEntries; + map->num_entries = 0; + map->key_size = keySize; + map->delete_func = deleteFunc ? deleteFunc : default_delete_func; + + return map; +} + + +/** + * Delete/free a keymap and all entries. The deleteFunc that was given at + * create time will be called for each entry. + * \param user user-provided pointer passed through to the delete callback + */ +void +util_delete_keymap(struct keymap *map, void *user) +{ + util_keymap_remove_all(map, user); + cso_hash_delete(map->cso); + FREE(map); +} + + +static INLINE struct cso_hash_iter +hash_table_find_iter(const struct keymap *map, const void *key, + unsigned key_hash) +{ + struct cso_hash_iter iter; + struct keymap_item *item; + + iter = cso_hash_find(map->cso, key_hash); + while (!cso_hash_iter_is_null(iter)) { + item = (struct keymap_item *) cso_hash_iter_data(iter); + if (!memcmp(item->key, key, map->key_size)) + break; + iter = cso_hash_iter_next(iter); + } + + return iter; +} + + +static INLINE struct keymap_item * +hash_table_find_item(const struct keymap *map, const void *key, + unsigned key_hash) +{ + struct cso_hash_iter iter = hash_table_find_iter(map, key, key_hash); + if (cso_hash_iter_is_null(iter)) { + return NULL; + } + else { + return hash_table_item(iter); + } +} + + +/** + * Insert a new key + data pointer into the table. + * Note: we create a copy of the key, but not the data! + * If the key is already present in the table, replace the existing + * entry (calling the delete callback on the previous entry). + * If the maximum capacity of the map is reached an old entry + * will be deleted (the delete callback will be called). + */ +boolean +util_keymap_insert(struct keymap *map, const void *key, + const void *data, void *user) +{ + unsigned key_hash; + struct keymap_item *item; + struct cso_hash_iter iter; + + assert(map); + + key_hash = hash(key, map->key_size); + + item = hash_table_find_item(map, key, key_hash); + if (item) { + /* call delete callback for old entry/item */ + map->delete_func(map, item->key, item->value, user); + item->value = (void *) data; + return TRUE; + } + + item = MALLOC_STRUCT(keymap_item); + if (!item) + return FALSE; + + item->key = mem_dup(key, map->key_size); + item->value = (void *) data; + + iter = cso_hash_insert(map->cso, key_hash, item); + if (cso_hash_iter_is_null(iter)) { + FREE(item); + return FALSE; + } + + map->num_entries++; + + return TRUE; +} + + +/** + * Look up a key in the map and return the associated data pointer. + */ +const void * +util_keymap_lookup(const struct keymap *map, const void *key) +{ + unsigned key_hash; + struct keymap_item *item; + + assert(map); + + key_hash = hash(key, map->key_size); + + item = hash_table_find_item(map, key, key_hash); + if (!item) + return NULL; + + return item->value; +} + + +/** + * Remove an entry from the map. + * The delete callback will be called if the given key/entry is found. + * \param user passed to the delete callback as the last param. + */ +void +util_keymap_remove(struct keymap *map, const void *key, void *user) +{ + unsigned key_hash; + struct cso_hash_iter iter; + struct keymap_item *item; + + assert(map); + + key_hash = hash(key, map->key_size); + + iter = hash_table_find_iter(map, key, key_hash); + if (cso_hash_iter_is_null(iter)) + return; + + item = hash_table_item(iter); + assert(item); + map->delete_func(map, item->key, item->value, user); + FREE(item->key); + FREE(item); + + map->num_entries--; + + cso_hash_erase(map->cso, iter); +} + + +/** + * Remove all entries from the map, calling the delete callback for each. + * \param user passed to the delete callback as the last param. + */ +void +util_keymap_remove_all(struct keymap *map, void *user) +{ + struct cso_hash_iter iter; + struct keymap_item *item; + + assert(map); + + iter = cso_hash_first_node(map->cso); + while (!cso_hash_iter_is_null(iter)) { + item = (struct keymap_item *) + cso_hash_take(map->cso, cso_hash_iter_key(iter)); + map->delete_func(map, item->key, item->value, user); + FREE(item->key); + FREE(item); + iter = cso_hash_first_node(map->cso); + } +} + + +extern void +util_keymap_info(const struct keymap *map) +{ + debug_printf("Keymap %p: %u of max %u entries\n", + (void *) map, map->num_entries, map->max_entries); +} diff --git a/src/gallium/auxiliary/util/u_keymap.h b/src/gallium/auxiliary/util/u_keymap.h new file mode 100644 index 0000000000..8d60a76fc3 --- /dev/null +++ b/src/gallium/auxiliary/util/u_keymap.h @@ -0,0 +1,68 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_KEYMAP_H +#define U_KEYMAP_H + +#include "pipe/p_compiler.h" + + +/** opaque keymap type */ +struct keymap; + + +/** Delete/callback function type */ +typedef void (*keymap_delete_func)(const struct keymap *map, + const void *key, void *data, + void *user); + + +extern struct keymap * +util_new_keymap(unsigned keySize, unsigned maxEntries, + keymap_delete_func deleteFunc); + +extern void +util_delete_keymap(struct keymap *map, void *user); + +extern boolean +util_keymap_insert(struct keymap *map, const void *key, + const void *data, void *user); + +extern const void * +util_keymap_lookup(const struct keymap *map, const void *key); + +extern void +util_keymap_remove(struct keymap *map, const void *key, void *user); + +extern void +util_keymap_remove_all(struct keymap *map, void *user); + +extern void +util_keymap_info(const struct keymap *map); + + +#endif /* U_KEYMAP_H */ diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c new file mode 100644 index 0000000000..5b3cab4642 --- /dev/null +++ b/src/gallium/auxiliary/util/u_math.c @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#include "util/u_math.h" + + +/** 2^x, for x in [-1.0, 1.0[ */ +float pow2_table[POW2_TABLE_SIZE]; + + +static void +init_pow2_table(void) +{ + int i; + for (i = 0; i < POW2_TABLE_SIZE; i++) + pow2_table[i] = (float) pow(2.0, (i - POW2_TABLE_OFFSET) / POW2_TABLE_SCALE); +} + + +/** log2(x), for x in [1.0, 2.0[ */ +float log2_table[LOG2_TABLE_SIZE]; + + +static void +init_log2_table(void) +{ + unsigned i; + for (i = 0; i < LOG2_TABLE_SIZE; i++) + log2_table[i] = (float) log2(1.0 + i * (1.0 / LOG2_TABLE_SIZE)); +} + + +/** + * One time init for math utilities. + */ +void +util_init_math(void) +{ + static boolean initialized = FALSE; + if (!initialized) { + init_pow2_table(); + init_log2_table(); + initialized = TRUE; + } +} + + diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h new file mode 100644 index 0000000000..be7303e550 --- /dev/null +++ b/src/gallium/auxiliary/util/u_math.h @@ -0,0 +1,423 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Math utilities and approximations for common math functions. + * Reduced precision is usually acceptable in shaders... + * + * "fast" is used in the names of functions which are low-precision, + * or at least lower-precision than the normal C lib functions. + */ + + +#ifndef U_MATH_H +#define U_MATH_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +__inline double ceil(double val) +{ + double ceil_val; + + if((val - (long) val) == 0) { + ceil_val = val; + } + else { + if(val > 0) { + ceil_val = (long) val + 1; + } + else { + ceil_val = (long) val; + } + } + + return ceil_val; +} + +#ifndef PIPE_SUBSYSTEM_WINDOWS_CE +__inline double floor(double val) +{ + double floor_val; + + if((val - (long) val) == 0) { + floor_val = val; + } + else { + if(val > 0) { + floor_val = (long) val; + } + else { + floor_val = (long) val - 1; + } + } + + return floor_val; +} +#endif + +#pragma function(pow) +__inline double __cdecl pow(double val, double exponent) +{ + /* XXX */ + assert(0); + return 0; +} + +#pragma function(log) +__inline double __cdecl log(double val) +{ + /* XXX */ + assert(0); + return 0; +} + +#pragma function(atan2) +__inline double __cdecl atan2(double val) +{ + /* XXX */ + assert(0); + return 0; +} +#else +#include <math.h> +#include <stdarg.h> +#endif + + +#if defined(_MSC_VER) +#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + +static INLINE float cosf( float f ) +{ + return (float) cos( (double) f ); +} + +static INLINE float sinf( float f ) +{ + return (float) sin( (double) f ); +} + +static INLINE float ceilf( float f ) +{ + return (float) ceil( (double) f ); +} + +static INLINE float floorf( float f ) +{ + return (float) floor( (double) f ); +} + +static INLINE float powf( float f, float g ) +{ + return (float) pow( (double) f, (double) g ); +} + +static INLINE float sqrtf( float f ) +{ + return (float) sqrt( (double) f ); +} + +static INLINE float fabsf( float f ) +{ + return (float) fabs( (double) f ); +} + +static INLINE float logf( float f ) +{ + return (float) log( (double) f ); +} + +#else +/* Work-around an extra semi-colon in VS 2005 logf definition */ +#ifdef logf +#undef logf +#define logf(x) ((float)log((double)(x))) +#endif /* logf */ +#endif +#endif /* _MSC_VER */ + + + + + +#define POW2_TABLE_SIZE_LOG2 9 +#define POW2_TABLE_SIZE (1 << POW2_TABLE_SIZE_LOG2) +#define POW2_TABLE_OFFSET (POW2_TABLE_SIZE/2) +#define POW2_TABLE_SCALE ((float)(POW2_TABLE_SIZE/2)) +extern float pow2_table[POW2_TABLE_SIZE]; + + + +extern void +util_init_math(void); + + +union fi { + float f; + int32_t i; + uint32_t ui; +}; + + +/** + * Fast version of 2^x + * Identity: exp2(a + b) = exp2(a) * exp2(b) + * Let ipart = int(x) + * Let fpart = x - ipart; + * So, exp2(x) = exp2(ipart) * exp2(fpart) + * Compute exp2(ipart) with i << ipart + * Compute exp2(fpart) with lookup table. + */ +static INLINE float +util_fast_exp2(float x) +{ + int32_t ipart; + float fpart, mpart; + union fi epart; + + if(x > 129.00000f) + return 3.402823466e+38f; + + if(x < -126.99999f) + return 0.0f; + + ipart = (int32_t) x; + fpart = x - (float) ipart; + + /* same as + * epart.f = (float) (1 << ipart) + * but faster and without integer overflow for ipart > 31 */ + epart.i = (ipart + 127 ) << 23; + + mpart = pow2_table[POW2_TABLE_OFFSET + (int)(fpart * POW2_TABLE_SCALE)]; + + return epart.f * mpart; +} + + +/** + * Fast approximation to exp(x). + */ +static INLINE float +util_fast_exp(float x) +{ + const float k = 1.44269f; /* = log2(e) */ + return util_fast_exp2(k * x); +} + + +#define LOG2_TABLE_SIZE_LOG2 8 +#define LOG2_TABLE_SIZE (1 << LOG2_TABLE_SIZE_LOG2) +extern float log2_table[LOG2_TABLE_SIZE]; + + +static INLINE float +util_fast_log2(float x) +{ + union fi num; + float epart, mpart; + num.f = x; + epart = (float)(((num.i & 0x7f800000) >> 23) - 127); + mpart = log2_table[(num.i & 0x007fffff) >> (23 - LOG2_TABLE_SIZE_LOG2)]; + return epart + mpart; +} + + +static INLINE float +util_fast_pow(float x, float y) +{ + return util_fast_exp2(util_fast_log2(x) * y); +} + + + +/** + * Floor(x), returned as int. + */ +static INLINE int +util_ifloor(float f) +{ + int ai, bi; + double af, bf; + union fi u; + af = (3 << 22) + 0.5 + (double)f; + bf = (3 << 22) + 0.5 - (double)f; + u.f = (float) af; ai = u.i; + u.f = (float) bf; bi = u.i; + return (ai - bi) >> 1; +} + + +/** + * Round float to nearest int. + */ +static INLINE int +util_iround(float f) +{ +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) + int r; + __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st"); + return r; +#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) + int r; + _asm { + fld f + fistp r + } + return r; +#else + if (f >= 0.0f) + return (int) (f + 0.5f); + else + return (int) (f - 0.5f); +#endif +} + + + +#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) +/** + * Find first bit set in word. Least significant bit is 1. + * Return 0 if no bits set. + */ +static INLINE +unsigned ffs( unsigned u ) +{ + unsigned i; + + if( u == 0 ) { + return 0; + } + + __asm bsf eax, [u] + __asm inc eax + __asm mov [i], eax + + return i; +} +#endif + + +/** + * Return float bits. + */ +static INLINE unsigned +fui( float f ) +{ + union fi fi; + fi.f = f; + return fi.ui; +} + + + +static INLINE float +ubyte_to_float(ubyte ub) +{ + return (float) ub * (1.0f / 255.0f); +} + + +/** + * Convert float in [0,1] to ubyte in [0,255] with clamping. + */ +static INLINE ubyte +float_to_ubyte(float f) +{ + const int ieee_0996 = 0x3f7f0000; /* 0.996 or so */ + union fi tmp; + + tmp.f = f; + if (tmp.i < 0) { + return (ubyte) 0; + } + else if (tmp.i >= ieee_0996) { + return (ubyte) 255; + } + else { + tmp.f = tmp.f * (255.0f/256.0f) + 32768.0f; + return (ubyte) tmp.i; + } +} + + + +#define CLAMP( X, MIN, MAX ) ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) ) + +#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) +#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) + + +static INLINE int +align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + + +#ifndef COPY_4V +#define COPY_4V( DST, SRC ) \ +do { \ + (DST)[0] = (SRC)[0]; \ + (DST)[1] = (SRC)[1]; \ + (DST)[2] = (SRC)[2]; \ + (DST)[3] = (SRC)[3]; \ +} while (0) +#endif + + +#ifndef COPY_4FV +#define COPY_4FV( DST, SRC ) COPY_4V(DST, SRC) +#endif + + +#ifndef ASSIGN_4V +#define ASSIGN_4V( DST, V0, V1, V2, V3 ) \ +do { \ + (DST)[0] = (V0); \ + (DST)[1] = (V1); \ + (DST)[2] = (V2); \ + (DST)[3] = (V3); \ +} while (0) +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* U_MATH_H */ diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h new file mode 100644 index 0000000000..857102719d --- /dev/null +++ b/src/gallium/auxiliary/util/u_memory.h @@ -0,0 +1,232 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Memory functions + */ + + +#ifndef U_MEMORY_H +#define U_MEMORY_H + + +#include "util/u_pointer.h" +#include "pipe/p_debug.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Define ENOMEM for WINCE */ +#if (_WIN32_WCE < 600) +#ifndef ENOMEM +#define ENOMEM 12 +#endif +#endif + + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) + +/* memory debugging */ + +#include "pipe/p_debug.h" + +#define MALLOC( _size ) \ + debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) +#define CALLOC( _count, _size ) \ + debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size ) +#define FREE( _ptr ) \ + debug_free( __FILE__, __LINE__, __FUNCTION__, _ptr ) +#define REALLOC( _ptr, _old_size, _size ) \ + debug_realloc( __FILE__, __LINE__, __FUNCTION__, _ptr, _old_size, _size ) + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +void * __stdcall +EngAllocMem( + unsigned long Flags, + unsigned long MemSize, + unsigned long Tag ); + +void __stdcall +EngFreeMem( + void *Mem ); + +#define MALLOC( _size ) EngAllocMem( 0, _size, 'D3AG' ) +#define _FREE( _ptr ) EngFreeMem( _ptr ) + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + +void * +ExAllocatePool( + unsigned long PoolType, + size_t NumberOfBytes); + +void +ExFreePool(void *P); + +#define MALLOC(_size) ExAllocatePool(0, _size) +#define _FREE(_ptr) ExFreePool(_ptr) + +#else + +#define MALLOC( SIZE ) malloc( SIZE ) +#define CALLOC( COUNT, SIZE ) calloc( COUNT, SIZE ) +#define FREE( PTR ) free( PTR ) +#define REALLOC( OLDPTR, OLDSIZE, NEWSIZE ) realloc( OLDPTR, NEWSIZE ) + +#endif + + +#ifndef CALLOC +static INLINE void * +CALLOC( unsigned count, unsigned size ) +{ + void *ptr = MALLOC( count * size ); + if( ptr ) { + memset( ptr, 0, count * size ); + } + return ptr; +} +#endif /* !CALLOC */ + +#ifndef FREE +static INLINE void +FREE( void *ptr ) +{ + if( ptr ) { + _FREE( ptr ); + } +} +#endif /* !FREE */ + +#ifndef REALLOC +static INLINE void * +REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) +{ + void *new_ptr = NULL; + + if (new_size != 0) { + unsigned copy_size = old_size < new_size ? old_size : new_size; + new_ptr = MALLOC( new_size ); + if (new_ptr && old_ptr && copy_size) { + memcpy( new_ptr, old_ptr, copy_size ); + } + } + + FREE( old_ptr ); + return new_ptr; +} +#endif /* !REALLOC */ + + +#define MALLOC_STRUCT(T) (struct T *) MALLOC(sizeof(struct T)) + +#define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T)) + + +/** + * Return memory on given byte alignment + */ +static INLINE void * +align_malloc(size_t bytes, uint alignment) +{ +#if defined(HAVE_POSIX_MEMALIGN) + void *mem; + alignment = (alignment + (uint)sizeof(void*) - 1) & ~((uint)sizeof(void*) - 1); + if(posix_memalign(& mem, alignment, bytes) != 0) + return NULL; + return mem; +#else + char *ptr, *buf; + + assert( alignment > 0 ); + + ptr = (char *) MALLOC(bytes + alignment + sizeof(void *)); + if (!ptr) + return NULL; + + buf = (char *) align_pointer( ptr + sizeof(void *), alignment ); + *(char **)(buf - sizeof(void *)) = ptr; + + return buf; +#endif /* defined(HAVE_POSIX_MEMALIGN) */ +} + +/** + * Free memory returned by align_malloc(). + */ +static INLINE void +align_free(void *ptr) +{ +#if defined(HAVE_POSIX_MEMALIGN) + FREE(ptr); +#else + void **cubbyHole = (void **) ((char *) ptr - sizeof(void *)); + void *realAddr = *cubbyHole; + FREE(realAddr); +#endif /* defined(HAVE_POSIX_MEMALIGN) */ +} + + +/** + * Duplicate a block of memory. + */ +static INLINE void * +mem_dup(const void *src, uint size) +{ + void *dup = MALLOC(size); + if (dup) + memcpy(dup, src, size); + return dup; +} + + +/** + * Number of elements in an array. + */ +#ifndef Elements +#define Elements(x) (sizeof(x)/sizeof((x)[0])) +#endif + + +/** + * Offset of a field in a struct, in bytes. + */ +#define Offset(TYPE, MEMBER) ((unsigned)&(((TYPE *)NULL)->MEMBER)) + + + +#ifdef __cplusplus +} +#endif + + +#endif /* U_MEMORY_H */ diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c new file mode 100644 index 0000000000..0f51dd5977 --- /dev/null +++ b/src/gallium/auxiliary/util/u_mm.c @@ -0,0 +1,283 @@ +/************************************************************************** + * + * Copyright (C) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" + +#include "util/u_memory.h" +#include "util/u_mm.h" + + +void +mmDumpMemInfo(const struct mem_block *heap) +{ + debug_printf("Memory heap %p:\n", (void *)heap); + if (heap == 0) { + debug_printf(" heap == 0\n"); + } else { + const struct mem_block *p; + + for(p = heap->next; p != heap; p = p->next) { + debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + p->free ? 'F':'.', + p->reserved ? 'R':'.'); + } + + debug_printf("\nFree list:\n"); + + for(p = heap->next_free; p != heap; p = p->next_free) { + debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + p->free ? 'F':'.', + p->reserved ? 'R':'.'); + } + + } + debug_printf("End of memory blocks\n"); +} + +struct mem_block * +mmInit(int ofs, int size) +{ + struct mem_block *heap, *block; + + if (size <= 0) + return NULL; + + heap = CALLOC_STRUCT(mem_block); + if (!heap) + return NULL; + + block = CALLOC_STRUCT(mem_block); + if (!block) { + FREE(heap); + return NULL; + } + + heap->next = block; + heap->prev = block; + heap->next_free = block; + heap->prev_free = block; + + block->heap = heap; + block->next = heap; + block->prev = heap; + block->next_free = heap; + block->prev_free = heap; + + block->ofs = ofs; + block->size = size; + block->free = 1; + + return heap; +} + + +static struct mem_block * +SliceBlock(struct mem_block *p, + int startofs, int size, + int reserved, int alignment) +{ + struct mem_block *newblock; + + /* break left [p, newblock, p->next], then p = newblock */ + if (startofs > p->ofs) { + newblock = CALLOC_STRUCT(mem_block); + if (!newblock) + return NULL; + newblock->ofs = startofs; + newblock->size = p->size - (startofs - p->ofs); + newblock->free = 1; + newblock->heap = p->heap; + + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + + newblock->next_free = p->next_free; + newblock->prev_free = p; + p->next_free->prev_free = newblock; + p->next_free = newblock; + + p->size -= newblock->size; + p = newblock; + } + + /* break right, also [p, newblock, p->next] */ + if (size < p->size) { + newblock = CALLOC_STRUCT(mem_block); + if (!newblock) + return NULL; + newblock->ofs = startofs + size; + newblock->size = p->size - size; + newblock->free = 1; + newblock->heap = p->heap; + + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + + newblock->next_free = p->next_free; + newblock->prev_free = p; + p->next_free->prev_free = newblock; + p->next_free = newblock; + + p->size = size; + } + + /* p = middle block */ + p->free = 0; + + /* Remove p from the free list: + */ + p->next_free->prev_free = p->prev_free; + p->prev_free->next_free = p->next_free; + + p->next_free = 0; + p->prev_free = 0; + + p->reserved = reserved; + return p; +} + + +struct mem_block * +mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) +{ + struct mem_block *p; + const int mask = (1 << align2)-1; + int startofs = 0; + int endofs; + + if (!heap || align2 < 0 || size <= 0) + return NULL; + + for (p = heap->next_free; p != heap; p = p->next_free) { + assert(p->free); + + startofs = (p->ofs + mask) & ~mask; + if ( startofs < startSearch ) { + startofs = startSearch; + } + endofs = startofs+size; + if (endofs <= (p->ofs+p->size)) + break; + } + + if (p == heap) + return NULL; + + assert(p->free); + p = SliceBlock(p,startofs,size,0,mask+1); + + return p; +} + + +struct mem_block * +mmFindBlock(struct mem_block *heap, int start) +{ + struct mem_block *p; + + for (p = heap->next; p != heap; p = p->next) { + if (p->ofs == start) + return p; + } + + return NULL; +} + + +static INLINE int +Join2Blocks(struct mem_block *p) +{ + /* XXX there should be some assertions here */ + + /* NOTE: heap->free == 0 */ + + if (p->free && p->next->free) { + struct mem_block *q = p->next; + + assert(p->ofs + p->size == q->ofs); + p->size += q->size; + + p->next = q->next; + q->next->prev = p; + + q->next_free->prev_free = q->prev_free; + q->prev_free->next_free = q->next_free; + + FREE(q); + return 1; + } + return 0; +} + +int +mmFreeMem(struct mem_block *b) +{ + if (!b) + return 0; + + if (b->free) { + debug_printf("block already free\n"); + return -1; + } + if (b->reserved) { + debug_printf("block is reserved\n"); + return -1; + } + + b->free = 1; + b->next_free = b->heap->next_free; + b->prev_free = b->heap; + b->next_free->prev_free = b; + b->prev_free->next_free = b; + + Join2Blocks(b); + if (b->prev != b->heap) + Join2Blocks(b->prev); + + return 0; +} + + +void +mmDestroy(struct mem_block *heap) +{ + struct mem_block *p; + + if (!heap) + return; + + for (p = heap->next; p != heap; ) { + struct mem_block *next = p->next; + FREE(p); + p = next; + } + + FREE(heap); +} diff --git a/src/gallium/auxiliary/util/u_mm.h b/src/gallium/auxiliary/util/u_mm.h new file mode 100644 index 0000000000..b226b101cb --- /dev/null +++ b/src/gallium/auxiliary/util/u_mm.h @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright (C) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Memory manager code. Primarily used by device drivers to manage texture + * heaps, etc. + */ + + +#ifndef _U_MM_H_ +#define _U_MM_H_ + + +struct mem_block { + struct mem_block *next, *prev; + struct mem_block *next_free, *prev_free; + struct mem_block *heap; + int ofs,size; + unsigned int free:1; + unsigned int reserved:1; +}; + + + +/** + * input: total size in bytes + * return: a heap pointer if OK, NULL if error + */ +extern struct mem_block *mmInit(int ofs, int size); + +/** + * Allocate 'size' bytes with 2^align2 bytes alignment, + * restrict the search to free memory after 'startSearch' + * depth and back buffers should be in different 4mb banks + * to get better page hits if possible + * input: size = size of block + * align2 = 2^align2 bytes alignment + * startSearch = linear offset from start of heap to begin search + * return: pointer to the allocated block, 0 if error + */ +extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2, + int startSearch); + +/** + * Free block starts at offset + * input: pointer to a block + * return: 0 if OK, -1 if error + */ +extern int mmFreeMem(struct mem_block *b); + +/** + * Free block starts at offset + * input: pointer to a heap, start offset + * return: pointer to a block + */ +extern struct mem_block *mmFindBlock(struct mem_block *heap, int start); + +/** + * destroy MM + */ +extern void mmDestroy(struct mem_block *mmInit); + +/** + * For debuging purpose. + */ +extern void mmDumpMemInfo(const struct mem_block *mmInit); + +#endif diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h new file mode 100644 index 0000000000..e0e8aa8e9f --- /dev/null +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -0,0 +1,472 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Functions to produce packed colors/Z from floats. + */ + + +#ifndef U_PACK_COLOR_H +#define U_PACK_COLOR_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "util/u_math.h" + + +/** + * Pack ubyte R,G,B,A into dest pixel. + */ +static INLINE void +util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, + enum pipe_format format, void *dest) +{ + switch (format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + { + uint *d = (uint *) dest; + *d = (r << 24) | (g << 16) | (b << 8) | a; + } + return; + case PIPE_FORMAT_R8G8B8X8_UNORM: + { + uint *d = (uint *) dest; + *d = (r << 24) | (g << 16) | (b << 8) | 0xff; + } + return; + case PIPE_FORMAT_A8R8G8B8_UNORM: + { + uint *d = (uint *) dest; + *d = (a << 24) | (r << 16) | (g << 8) | b; + } + return; + case PIPE_FORMAT_X8R8G8B8_UNORM: + { + uint *d = (uint *) dest; + *d = (0xff << 24) | (r << 16) | (g << 8) | b; + } + return; + case PIPE_FORMAT_B8G8R8A8_UNORM: + { + uint *d = (uint *) dest; + *d = (b << 24) | (g << 16) | (r << 8) | a; + } + return; + case PIPE_FORMAT_B8G8R8X8_UNORM: + { + uint *d = (uint *) dest; + *d = (b << 24) | (g << 16) | (r << 8) | 0xff; + } + return; + case PIPE_FORMAT_R5G6B5_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); + } + return; + case PIPE_FORMAT_A1R5G5B5_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); + } + return; + case PIPE_FORMAT_A4R4G4B4_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); + } + return; + case PIPE_FORMAT_A8_UNORM: + { + ubyte *d = (ubyte *) dest; + *d = a; + } + return; + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: + { + ubyte *d = (ubyte *) dest; + *d = r; + } + return; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + { + float *d = (float *) dest; + d[0] = (float)r / 255.0f; + d[1] = (float)g / 255.0f; + d[2] = (float)b / 255.0f; + d[3] = (float)a / 255.0f; + } + return; + case PIPE_FORMAT_R32G32B32_FLOAT: + { + float *d = (float *) dest; + d[0] = (float)r / 255.0f; + d[1] = (float)g / 255.0f; + d[2] = (float)b / 255.0f; + } + return; + + /* XXX lots more cases to add */ + default: + debug_print_format("gallium: unhandled format in util_pack_color_ub()", format); + assert(0); + } +} + + +/** + * Unpack RGBA from a packed pixel, returning values as ubytes in [0,255]. + */ +static INLINE void +util_unpack_color_ub(enum pipe_format format, const void *src, + ubyte *r, ubyte *g, ubyte *b, ubyte *a) +{ + switch (format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + { + uint p = ((const uint *) src)[0]; + *r = (ubyte) ((p >> 24) & 0xff); + *g = (ubyte) ((p >> 16) & 0xff); + *b = (ubyte) ((p >> 8) & 0xff); + *a = (ubyte) ((p >> 0) & 0xff); + } + return; + case PIPE_FORMAT_R8G8B8X8_UNORM: + { + uint p = ((const uint *) src)[0]; + *r = (ubyte) ((p >> 24) & 0xff); + *g = (ubyte) ((p >> 16) & 0xff); + *b = (ubyte) ((p >> 8) & 0xff); + *a = (ubyte) 0xff; + } + return; + case PIPE_FORMAT_A8R8G8B8_UNORM: + { + uint p = ((const uint *) src)[0]; + *r = (ubyte) ((p >> 16) & 0xff); + *g = (ubyte) ((p >> 8) & 0xff); + *b = (ubyte) ((p >> 0) & 0xff); + *a = (ubyte) ((p >> 24) & 0xff); + } + return; + case PIPE_FORMAT_X8R8G8B8_UNORM: + { + uint p = ((const uint *) src)[0]; + *r = (ubyte) ((p >> 16) & 0xff); + *g = (ubyte) ((p >> 8) & 0xff); + *b = (ubyte) ((p >> 0) & 0xff); + *a = (ubyte) 0xff; + } + return; + case PIPE_FORMAT_B8G8R8A8_UNORM: + { + uint p = ((const uint *) src)[0]; + *r = (ubyte) ((p >> 8) & 0xff); + *g = (ubyte) ((p >> 16) & 0xff); + *b = (ubyte) ((p >> 24) & 0xff); + *a = (ubyte) ((p >> 0) & 0xff); + } + return; + case PIPE_FORMAT_B8G8R8X8_UNORM: + { + uint p = ((const uint *) src)[0]; + *r = (ubyte) ((p >> 8) & 0xff); + *g = (ubyte) ((p >> 16) & 0xff); + *b = (ubyte) ((p >> 24) & 0xff); + *a = (ubyte) 0xff; + } + return; + case PIPE_FORMAT_R5G6B5_UNORM: + { + ushort p = ((const ushort *) src)[0]; + *r = (ubyte) (((p >> 8) & 0xf8) | ((p >> 13) & 0x7)); + *g = (ubyte) (((p >> 3) & 0xfc) | ((p >> 9) & 0x3)); + *b = (ubyte) (((p << 3) & 0xf8) | ((p >> 2) & 0x7)); + *a = (ubyte) 0xff; + } + return; + case PIPE_FORMAT_A1R5G5B5_UNORM: + { + ushort p = ((const ushort *) src)[0]; + *r = (ubyte) (((p >> 7) & 0xf8) | ((p >> 12) & 0x7)); + *g = (ubyte) (((p >> 2) & 0xf8) | ((p >> 7) & 0x7)); + *b = (ubyte) (((p << 3) & 0xf8) | ((p >> 2) & 0x7)); + *a = (ubyte) (0xff * (p >> 15)); + } + return; + case PIPE_FORMAT_A4R4G4B4_UNORM: + { + ushort p = ((const ushort *) src)[0]; + *r = (ubyte) (((p >> 4) & 0xf0) | ((p >> 8) & 0xf)); + *g = (ubyte) (((p >> 0) & 0xf0) | ((p >> 4) & 0xf)); + *b = (ubyte) (((p << 4) & 0xf0) | ((p >> 0) & 0xf)); + *a = (ubyte) (((p >> 8) & 0xf0) | ((p >> 12) & 0xf)); + } + return; + case PIPE_FORMAT_A8_UNORM: + { + ubyte p = ((const ubyte *) src)[0]; + *r = *g = *b = (ubyte) 0xff; + *a = p; + } + return; + case PIPE_FORMAT_L8_UNORM: + { + ubyte p = ((const ubyte *) src)[0]; + *r = *g = *b = p; + *a = (ubyte) 0xff; + } + return; + case PIPE_FORMAT_I8_UNORM: + { + ubyte p = ((const ubyte *) src)[0]; + *r = *g = *b = *a = p; + } + return; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + { + const float *p = (const float *) src; + *r = float_to_ubyte(p[0]); + *g = float_to_ubyte(p[1]); + *b = float_to_ubyte(p[2]); + *a = float_to_ubyte(p[3]); + } + return; + case PIPE_FORMAT_R32G32B32_FLOAT: + { + const float *p = (const float *) src; + *r = float_to_ubyte(p[0]); + *g = float_to_ubyte(p[1]); + *b = float_to_ubyte(p[2]); + *a = (ubyte) 0xff; + } + return; + + case PIPE_FORMAT_R32G32_FLOAT: + { + const float *p = (const float *) src; + *r = float_to_ubyte(p[0]); + *g = float_to_ubyte(p[1]); + *b = *a = (ubyte) 0xff; + } + return; + + case PIPE_FORMAT_R32_FLOAT: + { + const float *p = (const float *) src; + *r = float_to_ubyte(p[0]); + *g = *b = *a = (ubyte) 0xff; + } + return; + + /* XXX lots more cases to add */ + default: + debug_print_format("gallium: unhandled format in util_unpack_color_ub()", + format); + assert(0); + } +} + + + +/** + * Note rgba outside [0,1] will be clamped for int pixel formats. + */ +static INLINE void +util_pack_color(const float rgba[4], enum pipe_format format, void *dest) +{ + ubyte r, g, b, a; + + if (pf_size_x(format) <= 8) { + /* format uses 8-bit components or less */ + r = float_to_ubyte(rgba[0]); + g = float_to_ubyte(rgba[1]); + b = float_to_ubyte(rgba[2]); + a = float_to_ubyte(rgba[3]); + } + + switch (format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + { + uint *d = (uint *) dest; + *d = (r << 24) | (g << 16) | (b << 8) | a; + } + return; + case PIPE_FORMAT_R8G8B8X8_UNORM: + { + uint *d = (uint *) dest; + *d = (r << 24) | (g << 16) | (b << 8) | 0xff; + } + return; + case PIPE_FORMAT_A8R8G8B8_UNORM: + { + uint *d = (uint *) dest; + *d = (a << 24) | (r << 16) | (g << 8) | b; + } + return; + case PIPE_FORMAT_X8R8G8B8_UNORM: + { + uint *d = (uint *) dest; + *d = (0xff << 24) | (r << 16) | (g << 8) | b; + } + return; + case PIPE_FORMAT_B8G8R8A8_UNORM: + { + uint *d = (uint *) dest; + *d = (b << 24) | (g << 16) | (r << 8) | a; + } + return; + case PIPE_FORMAT_B8G8R8X8_UNORM: + { + uint *d = (uint *) dest; + *d = (b << 24) | (g << 16) | (r << 8) | 0xff; + } + return; + case PIPE_FORMAT_R5G6B5_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); + } + return; + case PIPE_FORMAT_A1R5G5B5_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); + } + return; + case PIPE_FORMAT_A4R4G4B4_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); + } + return; + case PIPE_FORMAT_A8_UNORM: + { + ubyte *d = (ubyte *) dest; + *d = a; + } + return; + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: + { + ubyte *d = (ubyte *) dest; + *d = r; + } + return; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + { + float *d = (float *) dest; + d[0] = rgba[0]; + d[1] = rgba[1]; + d[2] = rgba[2]; + d[3] = rgba[3]; + } + return; + case PIPE_FORMAT_R32G32B32_FLOAT: + { + float *d = (float *) dest; + d[0] = rgba[0]; + d[1] = rgba[1]; + d[2] = rgba[2]; + } + return; + /* XXX lots more cases to add */ + default: + debug_print_format("gallium: unhandled format in util_pack_color()", format); + assert(0); + } +} + + +/** + * Note: it's assumed that z is in [0,1] + */ +static INLINE uint +util_pack_z(enum pipe_format format, double z) +{ + if (z == 0.0) + return 0; + + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + if (z == 1.0) + return 0xffff; + return (uint) (z * 0xffff); + case PIPE_FORMAT_Z32_UNORM: + /* special-case to avoid overflow */ + if (z == 1.0) + return 0xffffffff; + return (uint) (z * 0xffffffff); + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + if (z == 1.0) + return 0xffffff; + return (uint) (z * 0xffffff); + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + if (z == 1.0) + return 0xffffff00; + return ((uint) (z * 0xffffff)) << 8; + default: + debug_print_format("gallium: unhandled format in util_pack_z()", format); + assert(0); + return 0; + } +} + + +/** + * Pack 4 ubytes into a 4-byte word + */ +static INLINE unsigned +pack_ub4(ubyte b0, ubyte b1, ubyte b2, ubyte b3) +{ + return ((((unsigned int)b0) << 0) | + (((unsigned int)b1) << 8) | + (((unsigned int)b2) << 16) | + (((unsigned int)b3) << 24)); +} + + +/** + * Pack/convert 4 floats into one 4-byte word. + */ +static INLINE unsigned +pack_ui32_float4(float a, float b, float c, float d) +{ + return pack_ub4( float_to_ubyte(a), + float_to_ubyte(b), + float_to_ubyte(c), + float_to_ubyte(d) ); +} + + + +#endif /* U_PACK_COLOR_H */ diff --git a/src/gallium/auxiliary/util/u_pointer.h b/src/gallium/auxiliary/util/u_pointer.h new file mode 100644 index 0000000000..e1af9f11cb --- /dev/null +++ b/src/gallium/auxiliary/util/u_pointer.h @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_POINTER_H +#define U_POINTER_H + +#include "pipe/p_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static INLINE intptr_t +pointer_to_intptr( const void *p ) +{ + union { + const void *p; + intptr_t i; + } pi; + pi.p = p; + return pi.i; +} + +static INLINE void * +intptr_to_pointer( intptr_t i ) +{ + union { + void *p; + intptr_t i; + } pi; + pi.i = i; + return pi.p; +} + +static INLINE uintptr_t +pointer_to_uintptr( const void *ptr ) +{ + union { + const void *p; + uintptr_t u; + } pu; + pu.p = ptr; + return pu.u; +} + +static INLINE void * +uintptr_to_pointer( uintptr_t u ) +{ + union { + void *p; + uintptr_t u; + } pu; + pu.u = u; + return pu.p; +} + +/** + * Return a pointer aligned to next multiple of N bytes. + */ +static INLINE void * +align_pointer( const void *unaligned, uintptr_t alignment ) +{ + uintptr_t aligned = (pointer_to_uintptr( unaligned ) + alignment - 1) & ~(alignment - 1); + return uintptr_to_pointer( aligned ); +} + + +/** + * Return a pointer aligned to next multiple of 16 bytes. + */ +static INLINE void * +align16( void *unaligned ) +{ + return align_pointer( unaligned, 16 ); +} + + + +#ifdef __cplusplus +} +#endif + +#endif /* U_POINTER_H */ diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c new file mode 100644 index 0000000000..f5619ef791 --- /dev/null +++ b/src/gallium/auxiliary/util/u_rect.c @@ -0,0 +1,328 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Rectangle-related helper functions. + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "util/u_rect.h" + + +/** + * Copy 2D rect from one place to another. + * Position and sizes are in pixels. + * src_pitch may be negative to do vertical flip of pixels from source. + */ +void +pipe_copy_rect(ubyte * dst, + const struct pipe_format_block *block, + unsigned dst_stride, + unsigned dst_x, + unsigned dst_y, + unsigned width, + unsigned height, + const ubyte * src, + int src_stride, + unsigned src_x, + int src_y) +{ + unsigned i; + int src_stride_pos = src_stride < 0 ? -src_stride : src_stride; + + assert(block->size > 0); + assert(block->width > 0); + assert(block->height > 0); + assert(src_x >= 0); + assert(src_y >= 0); + assert(dst_x >= 0); + assert(dst_y >= 0); + + dst_x /= block->width; + dst_y /= block->height; + width = (width + block->width - 1)/block->width; + height = (height + block->height - 1)/block->height; + src_x /= block->width; + src_y /= block->height; + + dst += dst_x * block->size; + src += src_x * block->size; + dst += dst_y * dst_stride; + src += src_y * src_stride_pos; + width *= block->size; + + if (width == dst_stride && width == src_stride) + memcpy(dst, src, height * width); + else { + for (i = 0; i < height; i++) { + memcpy(dst, src, width); + dst += dst_stride; + src += src_stride; + } + } +} + +void +pipe_fill_rect(ubyte * dst, + const struct pipe_format_block *block, + unsigned dst_stride, + unsigned dst_x, + unsigned dst_y, + unsigned width, + unsigned height, + uint32_t value) +{ + unsigned i, j; + unsigned width_size; + + assert(block->size > 0); + assert(block->width > 0); + assert(block->height > 0); + assert(dst_x >= 0); + assert(dst_y >= 0); + + dst_x /= block->width; + dst_y /= block->height; + width = (width + block->width - 1)/block->width; + height = (height + block->height - 1)/block->height; + + dst += dst_x * block->size; + dst += dst_y * dst_stride; + width_size = width * block->size; + + switch (block->size) { + case 1: + if(dst_stride == width_size) + memset(dst, (ubyte) value, height * width_size); + else { + for (i = 0; i < height; i++) { + memset(dst, (ubyte) value, width_size); + dst += dst_stride; + } + } + break; + case 2: + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) + *row++ = (uint16_t) value; + dst += dst_stride; + } + break; + case 4: + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst; + for (j = 0; j < width; j++) + *row++ = value; + dst += dst_stride; + } + break; + default: + assert(0); + break; + } +} + + + +/** + * Fallback function for pipe->surface_copy(). + * Note: (X,Y)=(0,0) is always the upper-left corner. + * if do_flip, flip the image vertically on its way from src rect to dst rect. + * XXX should probably put this in new u_surface.c file... + */ +void +util_surface_copy(struct pipe_context *pipe, + boolean do_flip, + struct pipe_surface *dst, + unsigned dst_x, unsigned dst_y, + struct pipe_surface *src, + unsigned src_x, unsigned src_y, + unsigned w, unsigned h) +{ + struct pipe_screen *screen = pipe->screen; + struct pipe_surface *new_src = NULL, *new_dst = NULL; + void *dst_map; + const void *src_map; + + assert(dst->block.size == src->block.size); + assert(dst->block.width == src->block.width); + assert(dst->block.height == src->block.height); + + if ((src->usage & PIPE_BUFFER_USAGE_CPU_READ) == 0) { + /* Need to create new src surface which is CPU readable */ + assert(src->texture); + if (!src->texture) + return; + new_src = screen->get_tex_surface(screen, + src->texture, + src->face, + src->level, + src->zslice, + PIPE_BUFFER_USAGE_CPU_READ); + src = new_src; + } + + if ((dst->usage & PIPE_BUFFER_USAGE_CPU_WRITE) == 0) { + /* Need to create new dst surface which is CPU writable */ + assert(dst->texture); + if (!dst->texture) + return; + new_dst = screen->get_tex_surface(screen, + dst->texture, + dst->face, + dst->level, + dst->zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); + dst = new_dst; + } + + src_map = pipe->screen->surface_map(screen, + src, PIPE_BUFFER_USAGE_CPU_READ); + dst_map = pipe->screen->surface_map(screen, + dst, PIPE_BUFFER_USAGE_CPU_WRITE); + + assert(src_map); + assert(dst_map); + + if (src_map && dst_map) { + /* If do_flip, invert src_y position and pass negative src stride */ + pipe_copy_rect(dst_map, + &dst->block, + dst->stride, + dst_x, dst_y, + w, h, + src_map, + do_flip ? -(int) src->stride : src->stride, + src_x, src_y); + } + + pipe->screen->surface_unmap(pipe->screen, src); + pipe->screen->surface_unmap(pipe->screen, dst); + + if (new_src) + screen->tex_surface_release(screen, &new_src); + if (new_dst) + screen->tex_surface_release(screen, &new_dst); +} + + + +static void * +get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) +{ + return (char *)dst_map + + y / dst->block.height * dst->stride + + x / dst->block.width * dst->block.size; +} + + +#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) + + +/** + * Fallback for pipe->surface_fill() function. + * XXX should probably put this in new u_surface.c file... + */ +void +util_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + struct pipe_screen *screen = pipe->screen; + struct pipe_surface *new_dst = NULL; + void *dst_map; + + if ((dst->usage & PIPE_BUFFER_USAGE_CPU_WRITE) == 0) { + /* Need to create new dst surface which is CPU writable */ + assert(dst->texture); + if (!dst->texture) + return; + new_dst = screen->get_tex_surface(screen, + dst->texture, + dst->face, + dst->level, + dst->zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); + dst = new_dst; + } + + dst_map = pipe->screen->surface_map(screen, + dst, PIPE_BUFFER_USAGE_CPU_WRITE); + + assert(dst_map); + + if (dst_map) { + assert(dst->stride > 0); + + switch (dst->block.size) { + case 1: + case 2: + case 4: + pipe_fill_rect(dst_map, &dst->block, dst->stride, + dstx, dsty, width, height, value); + break; + case 8: + { + /* expand the 4-byte clear value to an 8-byte value */ + ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty); + ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); + ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); + ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); + ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); + unsigned i, j; + val0 = (val0 << 8) | val0; + val1 = (val1 << 8) | val1; + val2 = (val2 << 8) | val2; + val3 = (val3 << 8) | val3; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + row[j*4+0] = val0; + row[j*4+1] = val1; + row[j*4+2] = val2; + row[j*4+3] = val3; + } + row += dst->stride/2; + } + } + break; + default: + assert(0); + break; + } + } + + pipe->screen->surface_unmap(pipe->screen, dst); + + if (new_dst) + screen->tex_surface_release(screen, &new_dst); +} diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h new file mode 100644 index 0000000000..59e842e16d --- /dev/null +++ b/src/gallium/auxiliary/util/u_rect.h @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Pipe copy/fill rect helpers. + */ + + +#ifndef U_RECT_H +#define U_RECT_H + + +#include "pipe/p_format.h" + +struct pipe_context; +struct pipe_surface; + + +extern void +pipe_copy_rect(ubyte * dst, const struct pipe_format_block *block, + unsigned dst_stride, unsigned dst_x, unsigned dst_y, + unsigned width, unsigned height, const ubyte * src, + int src_stride, unsigned src_x, int src_y); + +extern void +pipe_fill_rect(ubyte * dst, const struct pipe_format_block *block, + unsigned dst_stride, unsigned dst_x, unsigned dst_y, + unsigned width, unsigned height, uint32_t value); + + +extern void +util_surface_copy(struct pipe_context *pipe, + boolean do_flip, + struct pipe_surface *dst, + unsigned dst_x, unsigned dst_y, + struct pipe_surface *src, + unsigned src_x, unsigned src_y, + unsigned w, unsigned h); + +extern void +util_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value); + + +#endif /* U_RECT_H */ diff --git a/src/gallium/auxiliary/util/u_simple_list.h b/src/gallium/auxiliary/util/u_simple_list.h new file mode 100644 index 0000000000..f5f43b0faa --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_list.h @@ -0,0 +1,197 @@ +/** + * \file simple_list.h + * Simple macros for type-safe, intrusive lists. + * + * Intended to work with a list sentinal which is created as an empty + * list. Insert & delete are O(1). + * + * \author + * (C) 1997, Keith Whitwell + */ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef _U_SIMPLE_LIST_H_ +#define _U_SIMPLE_LIST_H_ + +/** + * Remove an element from list. + * + * \param elem element to remove. + */ +#define remove_from_list(elem) \ +do { \ + (elem)->next->prev = (elem)->prev; \ + (elem)->prev->next = (elem)->next; \ +} while (0) + +/** + * Insert an element to the list head. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_head(list, elem) \ +do { \ + (elem)->prev = list; \ + (elem)->next = (list)->next; \ + (list)->next->prev = elem; \ + (list)->next = elem; \ +} while(0) + +/** + * Insert an element to the list tail. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_tail(list, elem) \ +do { \ + (elem)->next = list; \ + (elem)->prev = (list)->prev; \ + (list)->prev->next = elem; \ + (list)->prev = elem; \ +} while(0) + +/** + * Move an element to the list head. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_head(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_head(list, elem); \ +} while (0) + +/** + * Move an element to the list tail. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_tail(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_tail(list, elem); \ +} while (0) + +/** + * Make a empty list empty. + * + * \param sentinal list (sentinal element). + */ +#define make_empty_list(sentinal) \ +do { \ + (sentinal)->next = sentinal; \ + (sentinal)->prev = sentinal; \ +} while (0) + +/** + * Get list first element. + * + * \param list list. + * + * \return pointer to first element. + */ +#define first_elem(list) ((list)->next) + +/** + * Get list last element. + * + * \param list list. + * + * \return pointer to last element. + */ +#define last_elem(list) ((list)->prev) + +/** + * Get next element. + * + * \param elem element. + * + * \return pointer to next element. + */ +#define next_elem(elem) ((elem)->next) + +/** + * Get previous element. + * + * \param elem element. + * + * \return pointer to previous element. + */ +#define prev_elem(elem) ((elem)->prev) + +/** + * Test whether element is at end of the list. + * + * \param list list. + * \param elem element. + * + * \return non-zero if element is at end of list, or zero otherwise. + */ +#define at_end(list, elem) ((elem) == (list)) + +/** + * Test if a list is empty. + * + * \param list list. + * + * \return non-zero if list empty, or zero otherwise. + */ +#define is_empty_list(list) ((list)->next == (list)) + +/** + * Walk through the elements of a list. + * + * \param ptr pointer to the current element. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach(ptr, list) \ + for( ptr=(list)->next ; ptr!=list ; ptr=(ptr)->next ) + +/** + * Walk through the elements of a list. + * + * Same as #foreach but lets you unlink the current value during a list + * traversal. Useful for freeing a list, element by element. + * + * \param ptr pointer to the current element. + * \param t temporary pointer. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach_s(ptr, t, list) \ + for(ptr=(list)->next,t=(ptr)->next; list != ptr; ptr=t, t=(t)->next) + +#endif /* _U_SIMPLE_LIST_H_ */ diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c new file mode 100644 index 0000000000..f06d13c2c4 --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -0,0 +1,361 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Simple vertex/fragment shader generators. + * + * @author Brian Paul + */ + + +#include "pipe/p_context.h" +#include "pipe/p_debug.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_shader_tokens.h" + +#include "util/u_memory.h" +#include "util/u_simple_shaders.h" + +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" + + + +/** + * Make simple vertex pass-through shader. + */ +void * +util_make_vertex_passthrough_shader(struct pipe_context *pipe, + uint num_attribs, + const uint *semantic_names, + const uint *semantic_indexes, + struct pipe_shader_state *shader) + +{ + uint maxTokens = 100; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + const uint procType = TGSI_PROCESSOR_VERTEX; + uint ti, i; + + tokens = (struct tgsi_token *) MALLOC(maxTokens * sizeof(tokens[0])); + + /* shader header + */ + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + + processor = (struct tgsi_processor *) &tokens[2]; + *processor = tgsi_build_processor( procType, header ); + + ti = 3; + + /* declare inputs */ + for (i = 0; i < num_attribs; i++) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = semantic_names[i]; + decl.Semantic.SemanticIndex = semantic_indexes[i]; + + decl.DeclarationRange.First = + decl.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + } + + /* declare outputs */ + for (i = 0; i < num_attribs; i++) { + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = semantic_names[i]; + decl.Semantic.SemanticIndex = semantic_indexes[i]; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = i; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + } + + /* emit MOV instructions */ + for (i = 0; i < num_attribs; i++) { + /* MOVE out[i], in[i]; */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + } + + /* END instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + +#if 0 /*debug*/ + tgsi_dump(tokens, 0); +#endif + + shader->tokens = tokens; + /*shader->num_tokens = ti;*/ + + return pipe->create_vs_state(pipe, shader); +} + + + + +/** + * Make simple fragment texture shader: + * TEX OUT[0], IN[0], SAMP[0], 2D; + * END; + */ +void * +util_make_fragment_tex_shader(struct pipe_context *pipe, + struct pipe_shader_state *shader) +{ + uint maxTokens = 100; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + const uint procType = TGSI_PROCESSOR_FRAGMENT; + uint ti; + + tokens = (struct tgsi_token *) MALLOC(maxTokens * sizeof(tokens[0])); + + /* shader header + */ + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + + processor = (struct tgsi_processor *) &tokens[2]; + *processor = tgsi_build_processor( procType, header ); + + ti = 3; + + /* declare TEX[0] input */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + /* XXX this could be linear... */ + decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* declare color[0] output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* declare sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* TEX instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + + /* END instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + +#if 0 /*debug*/ + tgsi_dump(tokens, 0); +#endif + + shader->tokens = tokens; + /*shader->num_tokens = ti;*/ + + return pipe->create_fs_state(pipe, shader); +} + + + + + +/** + * Make simple fragment color pass-through shader. + */ +void * +util_make_fragment_passthrough_shader(struct pipe_context *pipe, + struct pipe_shader_state *shader) +{ + uint maxTokens = 40; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + const uint procType = TGSI_PROCESSOR_FRAGMENT; + uint ti; + + tokens = (struct tgsi_token *) MALLOC(maxTokens * sizeof(tokens[0])); + + /* shader header + */ + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + + processor = (struct tgsi_processor *) &tokens[2]; + *processor = tgsi_build_processor( procType, header ); + + ti = 3; + + /* declare input */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* declare output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + + /* MOVE out[0], in[0]; */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + + /* END instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + + assert(ti < maxTokens); + +#if 0 /*debug*/ + tgsi_dump(tokens, 0); +#endif + + shader->tokens = tokens; + /*shader->num_tokens = ti;*/ + + return pipe->create_fs_state(pipe, shader); +} + diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h new file mode 100644 index 0000000000..8ca4977d71 --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_shaders.h @@ -0,0 +1,68 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef U_SIMPLE_SHADERS_H +#define U_SIMPLE_SHADERS_H + + +#include "pipe/p_compiler.h" + + +struct pipe_context; +struct pipe_shader_state; + + +#ifdef __cplusplus +extern "C" { +#endif + + +extern void * +util_make_vertex_passthrough_shader(struct pipe_context *pipe, + uint num_attribs, + const uint *semantic_names, + const uint *semantic_indexes, + struct pipe_shader_state *shader); + + +extern void * +util_make_fragment_tex_shader(struct pipe_context *pipe, + struct pipe_shader_state *shader); + + +extern void * +util_make_fragment_passthrough_shader(struct pipe_context *pipe, + struct pipe_shader_state *shader); + + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c new file mode 100644 index 0000000000..0d54299b28 --- /dev/null +++ b/src/gallium/auxiliary/util/u_snprintf.c @@ -0,0 +1,1497 @@ +/* + * Copyright (c) 1995 Patrick Powell. + * + * This code is based on code written by Patrick Powell <papowell@astart.com>. + * It may be used for any purpose as long as this notice remains intact on all + * source code distributions. + */ + +/* + * Copyright (c) 2008 Holger Weiss. + * + * This version of the code is maintained by Holger Weiss <holger@jhweiss.de>. + * My changes to the code may freely be used, modified and/or redistributed for + * any purpose. It would be nice if additions and fixes to this file (including + * trivial code cleanups) would be sent back in order to let me include them in + * the version available at <http://www.jhweiss.de/software/snprintf.html>. + * However, this is not a requirement for using or redistributing (possibly + * modified) versions of this file, nor is leaving this notice intact mandatory. + */ + +/* + * History + * + * 2008-01-20 Holger Weiss <holger@jhweiss.de> for C99-snprintf 1.1: + * + * Fixed the detection of infinite floating point values on IRIX (and + * possibly other systems) and applied another few minor cleanups. + * + * 2008-01-06 Holger Weiss <holger@jhweiss.de> for C99-snprintf 1.0: + * + * Added a lot of new features, fixed many bugs, and incorporated various + * improvements done by Andrew Tridgell <tridge@samba.org>, Russ Allbery + * <rra@stanford.edu>, Hrvoje Niksic <hniksic@xemacs.org>, Damien Miller + * <djm@mindrot.org>, and others for the Samba, INN, Wget, and OpenSSH + * projects. The additions include: support the "e", "E", "g", "G", and + * "F" conversion specifiers (and use conversion style "f" or "F" for the + * still unsupported "a" and "A" specifiers); support the "hh", "ll", "j", + * "t", and "z" length modifiers; support the "#" flag and the (non-C99) + * "'" flag; use localeconv(3) (if available) to get both the current + * locale's decimal point character and the separator between groups of + * digits; fix the handling of various corner cases of field width and + * precision specifications; fix various floating point conversion bugs; + * handle infinite and NaN floating point values; don't attempt to write to + * the output buffer (which may be NULL) if a size of zero was specified; + * check for integer overflow of the field width, precision, and return + * values and during the floating point conversion; use the OUTCHAR() macro + * instead of a function for better performance; provide asprintf(3) and + * vasprintf(3) functions; add new test cases. The replacement functions + * have been renamed to use an "rpl_" prefix, the function calls in the + * main project (and in this file) must be redefined accordingly for each + * replacement function which is needed (by using Autoconf or other means). + * Various other minor improvements have been applied and the coding style + * was cleaned up for consistency. + * + * 2007-07-23 Holger Weiss <holger@jhweiss.de> for Mutt 1.5.13: + * + * C99 compliant snprintf(3) and vsnprintf(3) functions return the number + * of characters that would have been written to a sufficiently sized + * buffer (excluding the '\0'). The original code simply returned the + * length of the resulting output string, so that's been fixed. + * + * 1998-03-05 Michael Elkins <me@mutt.org> for Mutt 0.90.8: + * + * The original code assumed that both snprintf(3) and vsnprintf(3) were + * missing. Some systems only have snprintf(3) but not vsnprintf(3), so + * the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF. + * + * 1998-01-27 Thomas Roessler <roessler@does-not-exist.org> for Mutt 0.89i: + * + * The PGP code was using unsigned hexadecimal formats. Unfortunately, + * unsigned formats simply didn't work. + * + * 1997-10-22 Brandon Long <blong@fiction.net> for Mutt 0.87.1: + * + * Ok, added some minimal floating point support, which means this probably + * requires libm on most operating systems. Don't yet support the exponent + * (e,E) and sigfig (g,G). Also, fmtint() was pretty badly broken, it just + * wasn't being exercised in ways which showed it, so that's been fixed. + * Also, formatted the code to Mutt conventions, and removed dead code left + * over from the original. Also, there is now a builtin-test, run with: + * gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm && ./snprintf + * + * 2996-09-15 Brandon Long <blong@fiction.net> for Mutt 0.43: + * + * This was ugly. It is still ugly. I opted out of floating point + * numbers, but the formatter understands just about everything from the + * normal C string format, at least as far as I can tell from the Solaris + * 2.5 printf(3S) man page. + */ + +/* + * ToDo + * + * - Add wide character support. + * - Add support for "%a" and "%A" conversions. + * - Create test routines which predefine the expected results. Our test cases + * usually expose bugs in system implementations rather than in ours :-) + */ + +/* + * Usage + * + * 1) The following preprocessor macros should be defined to 1 if the feature or + * file in question is available on the target system (by using Autoconf or + * other means), though basic functionality should be available as long as + * HAVE_STDARG_H and HAVE_STDLIB_H are defined correctly: + * + * HAVE_VSNPRINTF + * HAVE_SNPRINTF + * HAVE_VASPRINTF + * HAVE_ASPRINTF + * HAVE_STDARG_H + * HAVE_STDDEF_H + * HAVE_STDINT_H + * HAVE_STDLIB_H + * HAVE_INTTYPES_H + * HAVE_LOCALE_H + * HAVE_LOCALECONV + * HAVE_LCONV_DECIMAL_POINT + * HAVE_LCONV_THOUSANDS_SEP + * HAVE_LONG_DOUBLE + * HAVE_LONG_LONG_INT + * HAVE_UNSIGNED_LONG_LONG_INT + * HAVE_INTMAX_T + * HAVE_UINTMAX_T + * HAVE_UINTPTR_T + * HAVE_PTRDIFF_T + * HAVE_VA_COPY + * HAVE___VA_COPY + * + * 2) The calls to the functions which should be replaced must be redefined + * throughout the project files (by using Autoconf or other means): + * + * #define vsnprintf rpl_vsnprintf + * #define snprintf rpl_snprintf + * #define vasprintf rpl_vasprintf + * #define asprintf rpl_asprintf + * + * 3) The required replacement functions should be declared in some header file + * included throughout the project files: + * + * #if HAVE_CONFIG_H + * #include <config.h> + * #endif + * #if HAVE_STDARG_H + * #include <stdarg.h> + * #if !HAVE_VSNPRINTF + * int rpl_vsnprintf(char *, size_t, const char *, va_list); + * #endif + * #if !HAVE_SNPRINTF + * int rpl_snprintf(char *, size_t, const char *, ...); + * #endif + * #if !HAVE_VASPRINTF + * int rpl_vasprintf(char **, const char *, va_list); + * #endif + * #if !HAVE_ASPRINTF + * int rpl_asprintf(char **, const char *, ...); + * #endif + * #endif + * + * Autoconf macros for handling step 1 and step 2 are available at + * <http://www.jhweiss.de/software/snprintf.html>. + */ + +#include "pipe/p_config.h" + +#if HAVE_CONFIG_H +#include <config.h> +#else +#ifdef WIN32 +#define vsnprintf util_vsnprintf +#define snprintf util_snprintf +#define HAVE_VSNPRINTF 0 +#define HAVE_SNPRINTF 0 +#define HAVE_VASPRINTF 1 /* not needed */ +#define HAVE_ASPRINTF 1 /* not needed */ +#define HAVE_STDARG_H 1 +#define HAVE_STDDEF_H 1 +#define HAVE_STDINT_H 0 +#define HAVE_STDLIB_H 1 +#define HAVE_INTTYPES_H 0 +#define HAVE_LOCALE_H 0 +#define HAVE_LOCALECONV 0 +#define HAVE_LCONV_DECIMAL_POINT 0 +#define HAVE_LCONV_THOUSANDS_SEP 0 +#define HAVE_LONG_DOUBLE 0 +#define HAVE_LONG_LONG_INT 1 +#define HAVE_UNSIGNED_LONG_LONG_INT 1 +#define HAVE_INTMAX_T 0 +#define HAVE_UINTMAX_T 0 +#define HAVE_UINTPTR_T 1 +#define HAVE_PTRDIFF_T 1 +#define HAVE_VA_COPY 0 +#define HAVE___VA_COPY 0 +#else +#define HAVE_VSNPRINTF 1 +#define HAVE_SNPRINTF 1 +#define HAVE_VASPRINTF 1 +#define HAVE_ASPRINTF 1 +#endif +#endif /* HAVE_CONFIG_H */ + +#if !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || !HAVE_VASPRINTF +#include <stdio.h> /* For NULL, size_t, vsnprintf(3), and vasprintf(3). */ +#ifdef VA_START +#undef VA_START +#endif /* defined(VA_START) */ +#ifdef VA_SHIFT +#undef VA_SHIFT +#endif /* defined(VA_SHIFT) */ +#if HAVE_STDARG_H +#include <stdarg.h> +#define VA_START(ap, last) va_start(ap, last) +#define VA_SHIFT(ap, value, type) /* No-op for ANSI C. */ +#else /* Assume <varargs.h> is available. */ +#include <varargs.h> +#define VA_START(ap, last) va_start(ap) /* "last" is ignored. */ +#define VA_SHIFT(ap, value, type) value = va_arg(ap, type) +#endif /* HAVE_STDARG_H */ + +#if !HAVE_VASPRINTF +#if HAVE_STDLIB_H +#include <stdlib.h> /* For malloc(3). */ +#endif /* HAVE_STDLIB_H */ +#ifdef VA_COPY +#undef VA_COPY +#endif /* defined(VA_COPY) */ +#ifdef VA_END_COPY +#undef VA_END_COPY +#endif /* defined(VA_END_COPY) */ +#if HAVE_VA_COPY +#define VA_COPY(dest, src) va_copy(dest, src) +#define VA_END_COPY(ap) va_end(ap) +#elif HAVE___VA_COPY +#define VA_COPY(dest, src) __va_copy(dest, src) +#define VA_END_COPY(ap) va_end(ap) +#else +#define VA_COPY(dest, src) (void)mymemcpy(&dest, &src, sizeof(va_list)) +#define VA_END_COPY(ap) /* No-op. */ +#define NEED_MYMEMCPY 1 +static void *mymemcpy(void *, void *, size_t); +#endif /* HAVE_VA_COPY */ +#endif /* !HAVE_VASPRINTF */ + +#if !HAVE_VSNPRINTF +#include <limits.h> /* For *_MAX. */ +#if HAVE_INTTYPES_H +#include <inttypes.h> /* For intmax_t (if not defined in <stdint.h>). */ +#endif /* HAVE_INTTYPES_H */ +#if HAVE_LOCALE_H +#include <locale.h> /* For localeconv(3). */ +#endif /* HAVE_LOCALE_H */ +#if HAVE_STDDEF_H +#include <stddef.h> /* For ptrdiff_t. */ +#endif /* HAVE_STDDEF_H */ +#if HAVE_STDINT_H +#include <stdint.h> /* For intmax_t. */ +#endif /* HAVE_STDINT_H */ + +/* Support for unsigned long long int. We may also need ULLONG_MAX. */ +#ifndef ULONG_MAX /* We may need ULONG_MAX as a fallback. */ +#ifdef UINT_MAX +#define ULONG_MAX UINT_MAX +#else +#define ULONG_MAX INT_MAX +#endif /* defined(UINT_MAX) */ +#endif /* !defined(ULONG_MAX) */ +#ifdef ULLONG +#undef ULLONG +#endif /* defined(ULLONG) */ +#if HAVE_UNSIGNED_LONG_LONG_INT +#define ULLONG unsigned long long int +#ifndef ULLONG_MAX +#define ULLONG_MAX ULONG_MAX +#endif /* !defined(ULLONG_MAX) */ +#else +#define ULLONG unsigned long int +#ifdef ULLONG_MAX +#undef ULLONG_MAX +#endif /* defined(ULLONG_MAX) */ +#define ULLONG_MAX ULONG_MAX +#endif /* HAVE_LONG_LONG_INT */ + +/* Support for uintmax_t. We also need UINTMAX_MAX. */ +#ifdef UINTMAX_T +#undef UINTMAX_T +#endif /* defined(UINTMAX_T) */ +#if HAVE_UINTMAX_T || defined(uintmax_t) +#define UINTMAX_T uintmax_t +#ifndef UINTMAX_MAX +#define UINTMAX_MAX ULLONG_MAX +#endif /* !defined(UINTMAX_MAX) */ +#else +#define UINTMAX_T ULLONG +#ifdef UINTMAX_MAX +#undef UINTMAX_MAX +#endif /* defined(UINTMAX_MAX) */ +#define UINTMAX_MAX ULLONG_MAX +#endif /* HAVE_UINTMAX_T || defined(uintmax_t) */ + +/* Support for long double. */ +#ifndef LDOUBLE +#if HAVE_LONG_DOUBLE +#define LDOUBLE long double +#else +#define LDOUBLE double +#endif /* HAVE_LONG_DOUBLE */ +#endif /* !defined(LDOUBLE) */ + +/* Support for long long int. */ +#ifndef LLONG +#if HAVE_LONG_LONG_INT +#define LLONG long long int +#else +#define LLONG long int +#endif /* HAVE_LONG_LONG_INT */ +#endif /* !defined(LLONG) */ + +/* Support for intmax_t. */ +#ifndef INTMAX_T +#if HAVE_INTMAX_T || defined(intmax_t) +#define INTMAX_T intmax_t +#else +#define INTMAX_T LLONG +#endif /* HAVE_INTMAX_T || defined(intmax_t) */ +#endif /* !defined(INTMAX_T) */ + +/* Support for uintptr_t. */ +#ifndef UINTPTR_T +#if HAVE_UINTPTR_T || defined(uintptr_t) +#define UINTPTR_T uintptr_t +#else +#define UINTPTR_T unsigned long int +#endif /* HAVE_UINTPTR_T || defined(uintptr_t) */ +#endif /* !defined(UINTPTR_T) */ + +/* WinCE5.0 does not have uintptr_t defined */ +#if (_WIN32_WCE < 600) +#ifdef UINTPTR_T +#undef UINTPTR_T +#endif +#define UINTPTR_T unsigned long int +#endif + + +/* Support for ptrdiff_t. */ +#ifndef PTRDIFF_T +#if HAVE_PTRDIFF_T || defined(ptrdiff_t) +#define PTRDIFF_T ptrdiff_t +#else +#define PTRDIFF_T long int +#endif /* HAVE_PTRDIFF_T || defined(ptrdiff_t) */ +#endif /* !defined(PTRDIFF_T) */ + +/* + * We need an unsigned integer type corresponding to ptrdiff_t (cf. C99: + * 7.19.6.1, 7). However, we'll simply use PTRDIFF_T and convert it to an + * unsigned type if necessary. This should work just fine in practice. + */ +#ifndef UPTRDIFF_T +#define UPTRDIFF_T PTRDIFF_T +#endif /* !defined(UPTRDIFF_T) */ + +/* + * We need a signed integer type corresponding to size_t (cf. C99: 7.19.6.1, 7). + * However, we'll simply use size_t and convert it to a signed type if + * necessary. This should work just fine in practice. + */ +#ifndef SSIZE_T +#define SSIZE_T size_t +#endif /* !defined(SSIZE_T) */ + +/* Either ERANGE or E2BIG should be available everywhere. */ +#ifndef ERANGE +#define ERANGE E2BIG +#endif /* !defined(ERANGE) */ +#ifndef EOVERFLOW +#define EOVERFLOW ERANGE +#endif /* !defined(EOVERFLOW) */ + +/* + * Buffer size to hold the octal string representation of UINT128_MAX without + * nul-termination ("3777777777777777777777777777777777777777777"). + */ +#ifdef MAX_CONVERT_LENGTH +#undef MAX_CONVERT_LENGTH +#endif /* defined(MAX_CONVERT_LENGTH) */ +#define MAX_CONVERT_LENGTH 43 + +/* Format read states. */ +#define PRINT_S_DEFAULT 0 +#define PRINT_S_FLAGS 1 +#define PRINT_S_WIDTH 2 +#define PRINT_S_DOT 3 +#define PRINT_S_PRECISION 4 +#define PRINT_S_MOD 5 +#define PRINT_S_CONV 6 + +/* Format flags. */ +#define PRINT_F_MINUS (1 << 0) +#define PRINT_F_PLUS (1 << 1) +#define PRINT_F_SPACE (1 << 2) +#define PRINT_F_NUM (1 << 3) +#define PRINT_F_ZERO (1 << 4) +#define PRINT_F_QUOTE (1 << 5) +#define PRINT_F_UP (1 << 6) +#define PRINT_F_UNSIGNED (1 << 7) +#define PRINT_F_TYPE_G (1 << 8) +#define PRINT_F_TYPE_E (1 << 9) + +/* Conversion flags. */ +#define PRINT_C_CHAR 1 +#define PRINT_C_SHORT 2 +#define PRINT_C_LONG 3 +#define PRINT_C_LLONG 4 +#define PRINT_C_LDOUBLE 5 +#define PRINT_C_SIZE 6 +#define PRINT_C_PTRDIFF 7 +#define PRINT_C_INTMAX 8 + +#ifndef MAX +#define MAX(x, y) ((x >= y) ? x : y) +#endif /* !defined(MAX) */ +#ifndef CHARTOINT +#define CHARTOINT(ch) (ch - '0') +#endif /* !defined(CHARTOINT) */ +#ifndef ISDIGIT +#define ISDIGIT(ch) ('0' <= (unsigned char)ch && (unsigned char)ch <= '9') +#endif /* !defined(ISDIGIT) */ +#ifndef ISNAN +#define ISNAN(x) (x != x) +#endif /* !defined(ISNAN) */ +#ifndef ISINF +#define ISINF(x) (x != 0.0 && x + x == x) +#endif /* !defined(ISINF) */ + +#ifdef OUTCHAR +#undef OUTCHAR +#endif /* defined(OUTCHAR) */ +#define OUTCHAR(str, len, size, ch) \ +do { \ + if (len + 1 < size) \ + str[len] = ch; \ + (len)++; \ +} while (/* CONSTCOND */ 0) + +static void fmtstr(char *, size_t *, size_t, const char *, int, int, int); +static void fmtint(char *, size_t *, size_t, INTMAX_T, int, int, int, int); +static void fmtflt(char *, size_t *, size_t, LDOUBLE, int, int, int, int *); +static void printsep(char *, size_t *, size_t); +static int getnumsep(int); +static int getexponent(LDOUBLE); +static int convert(UINTMAX_T, char *, size_t, int, int); +static UINTMAX_T cast(LDOUBLE); +static UINTMAX_T myround(LDOUBLE); +static LDOUBLE mypow10(int); + +int +util_vsnprintf(char *str, size_t size, const char *format, va_list args) +{ + LDOUBLE fvalue; + INTMAX_T value; + unsigned char cvalue; + const char *strvalue; + INTMAX_T *intmaxptr; + PTRDIFF_T *ptrdiffptr; + SSIZE_T *sizeptr; + LLONG *llongptr; + long int *longptr; + int *intptr; + short int *shortptr; + signed char *charptr; + size_t len = 0; + int overflow = 0; + int base = 0; + int cflags = 0; + int flags = 0; + int width = 0; + int precision = -1; + int state = PRINT_S_DEFAULT; + char ch = *format++; + + /* + * C99 says: "If `n' is zero, nothing is written, and `s' may be a null + * pointer." (7.19.6.5, 2) We're forgiving and allow a NULL pointer + * even if a size larger than zero was specified. At least NetBSD's + * snprintf(3) does the same, as well as other versions of this file. + * (Though some of these versions will write to a non-NULL buffer even + * if a size of zero was specified, which violates the standard.) + */ + if (str == NULL && size != 0) + size = 0; + + while (ch != '\0') + switch (state) { + case PRINT_S_DEFAULT: + if (ch == '%') + state = PRINT_S_FLAGS; + else + OUTCHAR(str, len, size, ch); + ch = *format++; + break; + case PRINT_S_FLAGS: + switch (ch) { + case '-': + flags |= PRINT_F_MINUS; + ch = *format++; + break; + case '+': + flags |= PRINT_F_PLUS; + ch = *format++; + break; + case ' ': + flags |= PRINT_F_SPACE; + ch = *format++; + break; + case '#': + flags |= PRINT_F_NUM; + ch = *format++; + break; + case '0': + flags |= PRINT_F_ZERO; + ch = *format++; + break; + case '\'': /* SUSv2 flag (not in C99). */ + flags |= PRINT_F_QUOTE; + ch = *format++; + break; + default: + state = PRINT_S_WIDTH; + break; + } + break; + case PRINT_S_WIDTH: + if (ISDIGIT(ch)) { + ch = CHARTOINT(ch); + if (width > (INT_MAX - ch) / 10) { + overflow = 1; + goto out; + } + width = 10 * width + ch; + ch = *format++; + } else if (ch == '*') { + /* + * C99 says: "A negative field width argument is + * taken as a `-' flag followed by a positive + * field width." (7.19.6.1, 5) + */ + if ((width = va_arg(args, int)) < 0) { + flags |= PRINT_F_MINUS; + width = -width; + } + ch = *format++; + state = PRINT_S_DOT; + } else + state = PRINT_S_DOT; + break; + case PRINT_S_DOT: + if (ch == '.') { + state = PRINT_S_PRECISION; + ch = *format++; + } else + state = PRINT_S_MOD; + break; + case PRINT_S_PRECISION: + if (precision == -1) + precision = 0; + if (ISDIGIT(ch)) { + ch = CHARTOINT(ch); + if (precision > (INT_MAX - ch) / 10) { + overflow = 1; + goto out; + } + precision = 10 * precision + ch; + ch = *format++; + } else if (ch == '*') { + /* + * C99 says: "A negative precision argument is + * taken as if the precision were omitted." + * (7.19.6.1, 5) + */ + if ((precision = va_arg(args, int)) < 0) + precision = -1; + ch = *format++; + state = PRINT_S_MOD; + } else + state = PRINT_S_MOD; + break; + case PRINT_S_MOD: + switch (ch) { + case 'h': + ch = *format++; + if (ch == 'h') { /* It's a char. */ + ch = *format++; + cflags = PRINT_C_CHAR; + } else + cflags = PRINT_C_SHORT; + break; + case 'l': + ch = *format++; + if (ch == 'l') { /* It's a long long. */ + ch = *format++; + cflags = PRINT_C_LLONG; + } else + cflags = PRINT_C_LONG; + break; + case 'L': + cflags = PRINT_C_LDOUBLE; + ch = *format++; + break; + case 'j': + cflags = PRINT_C_INTMAX; + ch = *format++; + break; + case 't': + cflags = PRINT_C_PTRDIFF; + ch = *format++; + break; + case 'z': + cflags = PRINT_C_SIZE; + ch = *format++; + break; + } + state = PRINT_S_CONV; + break; + case PRINT_S_CONV: + switch (ch) { + case 'd': + /* FALLTHROUGH */ + case 'i': + switch (cflags) { + case PRINT_C_CHAR: + value = (signed char)va_arg(args, int); + break; + case PRINT_C_SHORT: + value = (short int)va_arg(args, int); + break; + case PRINT_C_LONG: + value = va_arg(args, long int); + break; + case PRINT_C_LLONG: + value = va_arg(args, LLONG); + break; + case PRINT_C_SIZE: + value = va_arg(args, SSIZE_T); + break; + case PRINT_C_INTMAX: + value = va_arg(args, INTMAX_T); + break; + case PRINT_C_PTRDIFF: + value = va_arg(args, PTRDIFF_T); + break; + default: + value = va_arg(args, int); + break; + } + fmtint(str, &len, size, value, 10, width, + precision, flags); + break; + case 'X': + flags |= PRINT_F_UP; + /* FALLTHROUGH */ + case 'x': + base = 16; + /* FALLTHROUGH */ + case 'o': + if (base == 0) + base = 8; + /* FALLTHROUGH */ + case 'u': + if (base == 0) + base = 10; + flags |= PRINT_F_UNSIGNED; + switch (cflags) { + case PRINT_C_CHAR: + value = (unsigned char)va_arg(args, + unsigned int); + break; + case PRINT_C_SHORT: + value = (unsigned short int)va_arg(args, + unsigned int); + break; + case PRINT_C_LONG: + value = va_arg(args, unsigned long int); + break; + case PRINT_C_LLONG: + value = va_arg(args, ULLONG); + break; + case PRINT_C_SIZE: + value = va_arg(args, size_t); + break; + case PRINT_C_INTMAX: + value = va_arg(args, UINTMAX_T); + break; + case PRINT_C_PTRDIFF: + value = va_arg(args, UPTRDIFF_T); + break; + default: + value = va_arg(args, unsigned int); + break; + } + fmtint(str, &len, size, value, base, width, + precision, flags); + break; + case 'A': + /* Not yet supported, we'll use "%F". */ + /* FALLTHROUGH */ + case 'F': + flags |= PRINT_F_UP; + case 'a': + /* Not yet supported, we'll use "%f". */ + /* FALLTHROUGH */ + case 'f': + if (cflags == PRINT_C_LDOUBLE) + fvalue = va_arg(args, LDOUBLE); + else + fvalue = va_arg(args, double); + fmtflt(str, &len, size, fvalue, width, + precision, flags, &overflow); + if (overflow) + goto out; + break; + case 'E': + flags |= PRINT_F_UP; + /* FALLTHROUGH */ + case 'e': + flags |= PRINT_F_TYPE_E; + if (cflags == PRINT_C_LDOUBLE) + fvalue = va_arg(args, LDOUBLE); + else + fvalue = va_arg(args, double); + fmtflt(str, &len, size, fvalue, width, + precision, flags, &overflow); + if (overflow) + goto out; + break; + case 'G': + flags |= PRINT_F_UP; + /* FALLTHROUGH */ + case 'g': + flags |= PRINT_F_TYPE_G; + if (cflags == PRINT_C_LDOUBLE) + fvalue = va_arg(args, LDOUBLE); + else + fvalue = va_arg(args, double); + /* + * If the precision is zero, it is treated as + * one (cf. C99: 7.19.6.1, 8). + */ + if (precision == 0) + precision = 1; + fmtflt(str, &len, size, fvalue, width, + precision, flags, &overflow); + if (overflow) + goto out; + break; + case 'c': + cvalue = (unsigned char)va_arg(args, int); + OUTCHAR(str, len, size, cvalue); + break; + case 's': + strvalue = va_arg(args, char *); + fmtstr(str, &len, size, strvalue, width, + precision, flags); + break; + case 'p': + /* + * C99 says: "The value of the pointer is + * converted to a sequence of printing + * characters, in an implementation-defined + * manner." (C99: 7.19.6.1, 8) + */ + if ((strvalue = va_arg(args, void *)) == NULL) + /* + * We use the glibc format. BSD prints + * "0x0", SysV "0". + */ + fmtstr(str, &len, size, "(nil)", width, + -1, flags); + else { + /* + * We use the BSD/glibc format. SysV + * omits the "0x" prefix (which we emit + * using the PRINT_F_NUM flag). + */ + flags |= PRINT_F_NUM; + flags |= PRINT_F_UNSIGNED; + fmtint(str, &len, size, + (UINTPTR_T)strvalue, 16, width, + precision, flags); + } + break; + case 'n': + switch (cflags) { + case PRINT_C_CHAR: + charptr = va_arg(args, signed char *); + *charptr = (signed char)len; + break; + case PRINT_C_SHORT: + shortptr = va_arg(args, short int *); + *shortptr = (short int)len; + break; + case PRINT_C_LONG: + longptr = va_arg(args, long int *); + *longptr = (long int)len; + break; + case PRINT_C_LLONG: + llongptr = va_arg(args, LLONG *); + *llongptr = (LLONG)len; + break; + case PRINT_C_SIZE: + /* + * C99 says that with the "z" length + * modifier, "a following `n' conversion + * specifier applies to a pointer to a + * signed integer type corresponding to + * size_t argument." (7.19.6.1, 7) + */ + sizeptr = va_arg(args, SSIZE_T *); + *sizeptr = len; + break; + case PRINT_C_INTMAX: + intmaxptr = va_arg(args, INTMAX_T *); + *intmaxptr = len; + break; + case PRINT_C_PTRDIFF: + ptrdiffptr = va_arg(args, PTRDIFF_T *); + *ptrdiffptr = len; + break; + default: + intptr = va_arg(args, int *); + *intptr = len; + break; + } + break; + case '%': /* Print a "%" character verbatim. */ + OUTCHAR(str, len, size, ch); + break; + default: /* Skip other characters. */ + break; + } + ch = *format++; + state = PRINT_S_DEFAULT; + base = cflags = flags = width = 0; + precision = -1; + break; + } +out: + if (len < size) + str[len] = '\0'; + else if (size > 0) + str[size - 1] = '\0'; + + if (overflow || len >= INT_MAX) { + return -1; + } + return (int)len; +} + +static void +fmtstr(char *str, size_t *len, size_t size, const char *value, int width, + int precision, int flags) +{ + int padlen, strln; /* Amount to pad. */ + int noprecision = (precision == -1); + + if (value == NULL) /* We're forgiving. */ + value = "(null)"; + + /* If a precision was specified, don't read the string past it. */ + for (strln = 0; value[strln] != '\0' && + (noprecision || strln < precision); strln++) + continue; + + if ((padlen = width - strln) < 0) + padlen = 0; + if (flags & PRINT_F_MINUS) /* Left justify. */ + padlen = -padlen; + + while (padlen > 0) { /* Leading spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen--; + } + while (*value != '\0' && (noprecision || precision-- > 0)) { + OUTCHAR(str, *len, size, *value); + value++; + } + while (padlen < 0) { /* Trailing spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen++; + } +} + +static void +fmtint(char *str, size_t *len, size_t size, INTMAX_T value, int base, int width, + int precision, int flags) +{ + UINTMAX_T uvalue; + char iconvert[MAX_CONVERT_LENGTH]; + char sign = 0; + char hexprefix = 0; + int spadlen = 0; /* Amount to space pad. */ + int zpadlen = 0; /* Amount to zero pad. */ + int pos; + int separators = (flags & PRINT_F_QUOTE); + int noprecision = (precision == -1); + + if (flags & PRINT_F_UNSIGNED) + uvalue = value; + else { + uvalue = (value >= 0) ? value : -value; + if (value < 0) + sign = '-'; + else if (flags & PRINT_F_PLUS) /* Do a sign. */ + sign = '+'; + else if (flags & PRINT_F_SPACE) + sign = ' '; + } + + pos = convert(uvalue, iconvert, sizeof(iconvert), base, + flags & PRINT_F_UP); + + if (flags & PRINT_F_NUM && uvalue != 0) { + /* + * C99 says: "The result is converted to an `alternative form'. + * For `o' conversion, it increases the precision, if and only + * if necessary, to force the first digit of the result to be a + * zero (if the value and precision are both 0, a single 0 is + * printed). For `x' (or `X') conversion, a nonzero result has + * `0x' (or `0X') prefixed to it." (7.19.6.1, 6) + */ + switch (base) { + case 8: + if (precision <= pos) + precision = pos + 1; + break; + case 16: + hexprefix = (flags & PRINT_F_UP) ? 'X' : 'x'; + break; + } + } + + if (separators) /* Get the number of group separators we'll print. */ + separators = getnumsep(pos); + + zpadlen = precision - pos - separators; + spadlen = width /* Minimum field width. */ + - separators /* Number of separators. */ + - MAX(precision, pos) /* Number of integer digits. */ + - ((sign != 0) ? 1 : 0) /* Will we print a sign? */ + - ((hexprefix != 0) ? 2 : 0); /* Will we print a prefix? */ + + if (zpadlen < 0) + zpadlen = 0; + if (spadlen < 0) + spadlen = 0; + + /* + * C99 says: "If the `0' and `-' flags both appear, the `0' flag is + * ignored. For `d', `i', `o', `u', `x', and `X' conversions, if a + * precision is specified, the `0' flag is ignored." (7.19.6.1, 6) + */ + if (flags & PRINT_F_MINUS) /* Left justify. */ + spadlen = -spadlen; + else if (flags & PRINT_F_ZERO && noprecision) { + zpadlen += spadlen; + spadlen = 0; + } + while (spadlen > 0) { /* Leading spaces. */ + OUTCHAR(str, *len, size, ' '); + spadlen--; + } + if (sign != 0) /* Sign. */ + OUTCHAR(str, *len, size, sign); + if (hexprefix != 0) { /* A "0x" or "0X" prefix. */ + OUTCHAR(str, *len, size, '0'); + OUTCHAR(str, *len, size, hexprefix); + } + while (zpadlen > 0) { /* Leading zeros. */ + OUTCHAR(str, *len, size, '0'); + zpadlen--; + } + while (pos > 0) { /* The actual digits. */ + pos--; + OUTCHAR(str, *len, size, iconvert[pos]); + if (separators > 0 && pos > 0 && pos % 3 == 0) + printsep(str, len, size); + } + while (spadlen < 0) { /* Trailing spaces. */ + OUTCHAR(str, *len, size, ' '); + spadlen++; + } +} + +static void +fmtflt(char *str, size_t *len, size_t size, LDOUBLE fvalue, int width, + int precision, int flags, int *overflow) +{ + LDOUBLE ufvalue; + UINTMAX_T intpart; + UINTMAX_T fracpart; + UINTMAX_T mask; + const char *infnan = NULL; + char iconvert[MAX_CONVERT_LENGTH]; + char fconvert[MAX_CONVERT_LENGTH]; + char econvert[4]; /* "e-12" (without nul-termination). */ + char esign = 0; + char sign = 0; + int leadfraczeros = 0; + int exponent = 0; + int emitpoint = 0; + int omitzeros = 0; + int omitcount = 0; + int padlen = 0; + int epos = 0; + int fpos = 0; + int ipos = 0; + int separators = (flags & PRINT_F_QUOTE); + int estyle = (flags & PRINT_F_TYPE_E); +#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT + struct lconv *lc = localeconv(); +#endif /* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */ + + /* + * AIX' man page says the default is 0, but C99 and at least Solaris' + * and NetBSD's man pages say the default is 6, and sprintf(3) on AIX + * defaults to 6. + */ + if (precision == -1) + precision = 6; + + if (fvalue < 0.0) + sign = '-'; + else if (flags & PRINT_F_PLUS) /* Do a sign. */ + sign = '+'; + else if (flags & PRINT_F_SPACE) + sign = ' '; + + if (ISNAN(fvalue)) + infnan = (flags & PRINT_F_UP) ? "NAN" : "nan"; + else if (ISINF(fvalue)) + infnan = (flags & PRINT_F_UP) ? "INF" : "inf"; + + if (infnan != NULL) { + if (sign != 0) + iconvert[ipos++] = sign; + while (*infnan != '\0') + iconvert[ipos++] = *infnan++; + fmtstr(str, len, size, iconvert, width, ipos, flags); + return; + } + + /* "%e" (or "%E") or "%g" (or "%G") conversion. */ + if (flags & PRINT_F_TYPE_E || flags & PRINT_F_TYPE_G) { + if (flags & PRINT_F_TYPE_G) { + /* + * For "%g" (and "%G") conversions, the precision + * specifies the number of significant digits, which + * includes the digits in the integer part. The + * conversion will or will not be using "e-style" (like + * "%e" or "%E" conversions) depending on the precision + * and on the exponent. However, the exponent can be + * affected by rounding the converted value, so we'll + * leave this decision for later. Until then, we'll + * assume that we're going to do an "e-style" conversion + * (in order to get the exponent calculated). For + * "e-style", the precision must be decremented by one. + */ + precision--; + /* + * For "%g" (and "%G") conversions, trailing zeros are + * removed from the fractional portion of the result + * unless the "#" flag was specified. + */ + if (!(flags & PRINT_F_NUM)) + omitzeros = 1; + } + exponent = getexponent(fvalue); + estyle = 1; + } + +again: + /* + * Sorry, we only support 9, 19, or 38 digits (that is, the number of + * digits of the 32-bit, the 64-bit, or the 128-bit UINTMAX_MAX value + * minus one) past the decimal point due to our conversion method. + */ + switch (sizeof(UINTMAX_T)) { + case 16: + if (precision > 38) + precision = 38; + break; + case 8: + if (precision > 19) + precision = 19; + break; + default: + if (precision > 9) + precision = 9; + break; + } + + ufvalue = (fvalue >= 0.0) ? fvalue : -fvalue; + if (estyle) /* We want exactly one integer digit. */ + ufvalue /= mypow10(exponent); + + if ((intpart = cast(ufvalue)) == UINTMAX_MAX) { + *overflow = 1; + return; + } + + /* + * Factor of ten with the number of digits needed for the fractional + * part. For example, if the precision is 3, the mask will be 1000. + */ +#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + mask = (unsigned long)mypow10(precision); +#else + mask = (UINTMAX_T)mypow10(precision); +#endif + /* + * We "cheat" by converting the fractional part to integer by + * multiplying by a factor of ten. + */ + if ((fracpart = myround(mask * (ufvalue - intpart))) >= mask) { + /* + * For example, ufvalue = 2.99962, intpart = 2, and mask = 1000 + * (because precision = 3). Now, myround(1000 * 0.99962) will + * return 1000. So, the integer part must be incremented by one + * and the fractional part must be set to zero. + */ + intpart++; + fracpart = 0; + if (estyle && intpart == 10) { + /* + * The value was rounded up to ten, but we only want one + * integer digit if using "e-style". So, the integer + * part must be set to one and the exponent must be + * incremented by one. + */ + intpart = 1; + exponent++; + } + } + + /* + * Now that we know the real exponent, we can check whether or not to + * use "e-style" for "%g" (and "%G") conversions. If we don't need + * "e-style", the precision must be adjusted and the integer and + * fractional parts must be recalculated from the original value. + * + * C99 says: "Let P equal the precision if nonzero, 6 if the precision + * is omitted, or 1 if the precision is zero. Then, if a conversion + * with style `E' would have an exponent of X: + * + * - if P > X >= -4, the conversion is with style `f' (or `F') and + * precision P - (X + 1). + * + * - otherwise, the conversion is with style `e' (or `E') and precision + * P - 1." (7.19.6.1, 8) + * + * Note that we had decremented the precision by one. + */ + if (flags & PRINT_F_TYPE_G && estyle && + precision + 1 > exponent && exponent >= -4) { + precision -= exponent; + estyle = 0; + goto again; + } + + if (estyle) { + if (exponent < 0) { + exponent = -exponent; + esign = '-'; + } else + esign = '+'; + + /* + * Convert the exponent. The sizeof(econvert) is 4. So, the + * econvert buffer can hold e.g. "e+99" and "e-99". We don't + * support an exponent which contains more than two digits. + * Therefore, the following stores are safe. + */ + epos = convert(exponent, econvert, 2, 10, 0); + /* + * C99 says: "The exponent always contains at least two digits, + * and only as many more digits as necessary to represent the + * exponent." (7.19.6.1, 8) + */ + if (epos == 1) + econvert[epos++] = '0'; + econvert[epos++] = esign; + econvert[epos++] = (flags & PRINT_F_UP) ? 'E' : 'e'; + } + + /* Convert the integer part and the fractional part. */ + ipos = convert(intpart, iconvert, sizeof(iconvert), 10, 0); + if (fracpart != 0) /* convert() would return 1 if fracpart == 0. */ + fpos = convert(fracpart, fconvert, sizeof(fconvert), 10, 0); + + leadfraczeros = precision - fpos; + + if (omitzeros) { + if (fpos > 0) /* Omit trailing fractional part zeros. */ + while (omitcount < fpos && fconvert[omitcount] == '0') + omitcount++; + else { /* The fractional part is zero, omit it completely. */ + omitcount = precision; + leadfraczeros = 0; + } + precision -= omitcount; + } + + /* + * Print a decimal point if either the fractional part is non-zero + * and/or the "#" flag was specified. + */ + if (precision > 0 || flags & PRINT_F_NUM) + emitpoint = 1; + if (separators) /* Get the number of group separators we'll print. */ + separators = getnumsep(ipos); + + padlen = width /* Minimum field width. */ + - ipos /* Number of integer digits. */ + - epos /* Number of exponent characters. */ + - precision /* Number of fractional digits. */ + - separators /* Number of group separators. */ + - (emitpoint ? 1 : 0) /* Will we print a decimal point? */ + - ((sign != 0) ? 1 : 0); /* Will we print a sign character? */ + + if (padlen < 0) + padlen = 0; + + /* + * C99 says: "If the `0' and `-' flags both appear, the `0' flag is + * ignored." (7.19.6.1, 6) + */ + if (flags & PRINT_F_MINUS) /* Left justifty. */ + padlen = -padlen; + else if (flags & PRINT_F_ZERO && padlen > 0) { + if (sign != 0) { /* Sign. */ + OUTCHAR(str, *len, size, sign); + sign = 0; + } + while (padlen > 0) { /* Leading zeros. */ + OUTCHAR(str, *len, size, '0'); + padlen--; + } + } + while (padlen > 0) { /* Leading spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen--; + } + if (sign != 0) /* Sign. */ + OUTCHAR(str, *len, size, sign); + while (ipos > 0) { /* Integer part. */ + ipos--; + OUTCHAR(str, *len, size, iconvert[ipos]); + if (separators > 0 && ipos > 0 && ipos % 3 == 0) + printsep(str, len, size); + } + if (emitpoint) { /* Decimal point. */ +#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT + if (lc->decimal_point != NULL && *lc->decimal_point != '\0') + OUTCHAR(str, *len, size, *lc->decimal_point); + else /* We'll always print some decimal point character. */ +#endif /* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */ + OUTCHAR(str, *len, size, '.'); + } + while (leadfraczeros > 0) { /* Leading fractional part zeros. */ + OUTCHAR(str, *len, size, '0'); + leadfraczeros--; + } + while (fpos > omitcount) { /* The remaining fractional part. */ + fpos--; + OUTCHAR(str, *len, size, fconvert[fpos]); + } + while (epos > 0) { /* Exponent. */ + epos--; + OUTCHAR(str, *len, size, econvert[epos]); + } + while (padlen < 0) { /* Trailing spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen++; + } +} + +static void +printsep(char *str, size_t *len, size_t size) +{ +#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP + struct lconv *lc = localeconv(); + int i; + + if (lc->thousands_sep != NULL) + for (i = 0; lc->thousands_sep[i] != '\0'; i++) + OUTCHAR(str, *len, size, lc->thousands_sep[i]); + else +#endif /* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */ + OUTCHAR(str, *len, size, ','); +} + +static int +getnumsep(int digits) +{ + int separators = (digits - ((digits % 3 == 0) ? 1 : 0)) / 3; +#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP + int strln; + struct lconv *lc = localeconv(); + + /* We support an arbitrary separator length (including zero). */ + if (lc->thousands_sep != NULL) { + for (strln = 0; lc->thousands_sep[strln] != '\0'; strln++) + continue; + separators *= strln; + } +#endif /* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */ + return separators; +} + +static int +getexponent(LDOUBLE value) +{ + LDOUBLE tmp = (value >= 0.0) ? value : -value; + int exponent = 0; + + /* + * We check for 99 > exponent > -99 in order to work around possible + * endless loops which could happen (at least) in the second loop (at + * least) if we're called with an infinite value. However, we checked + * for infinity before calling this function using our ISINF() macro, so + * this might be somewhat paranoid. + */ + while (tmp < 1.0 && tmp > 0.0 && --exponent > -99) + tmp *= 10; + while (tmp >= 10.0 && ++exponent < 99) + tmp /= 10; + + return exponent; +} + +static int +convert(UINTMAX_T value, char *buf, size_t size, int base, int caps) +{ + const char *digits = caps ? "0123456789ABCDEF" : "0123456789abcdef"; + size_t pos = 0; + + /* We return an unterminated buffer with the digits in reverse order. */ + do { + buf[pos++] = digits[value % base]; + value /= base; + } while (value != 0 && pos < size); + + return (int)pos; +} + +static UINTMAX_T +cast(LDOUBLE value) +{ + UINTMAX_T result; + + /* + * We check for ">=" and not for ">" because if UINTMAX_MAX cannot be + * represented exactly as an LDOUBLE value (but is less than LDBL_MAX), + * it may be increased to the nearest higher representable value for the + * comparison (cf. C99: 6.3.1.4, 2). It might then equal the LDOUBLE + * value although converting the latter to UINTMAX_T would overflow. + */ + if (value >= UINTMAX_MAX) + return UINTMAX_MAX; + +#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + result = (unsigned long)value; +#else + result = (UINTMAX_T)value; +#endif + /* + * At least on NetBSD/sparc64 3.0.2 and 4.99.30, casting long double to + * an integer type converts e.g. 1.9 to 2 instead of 1 (which violates + * the standard). Sigh. + */ + return (result <= value) ? result : result - 1; +} + +static UINTMAX_T +myround(LDOUBLE value) +{ + UINTMAX_T intpart = cast(value); + + return ((value -= intpart) < 0.5) ? intpart : intpart + 1; +} + +static LDOUBLE +mypow10(int exponent) +{ + LDOUBLE result = 1; + + while (exponent > 0) { + result *= 10; + exponent--; + } + while (exponent < 0) { + result /= 10; + exponent++; + } + return result; +} +#endif /* !HAVE_VSNPRINTF */ + +#if !HAVE_VASPRINTF +#if NEED_MYMEMCPY +void * +mymemcpy(void *dst, void *src, size_t len) +{ + const char *from = src; + char *to = dst; + + /* No need for optimization, we use this only to replace va_copy(3). */ + while (len-- > 0) + *to++ = *from++; + return dst; +} +#endif /* NEED_MYMEMCPY */ + +int +util_vasprintf(char **ret, const char *format, va_list ap) +{ + size_t size; + int len; + va_list aq; + + VA_COPY(aq, ap); + len = vsnprintf(NULL, 0, format, aq); + VA_END_COPY(aq); + if (len < 0 || (*ret = malloc(size = len + 1)) == NULL) + return -1; + return vsnprintf(*ret, size, format, ap); +} +#endif /* !HAVE_VASPRINTF */ + +#if !HAVE_SNPRINTF +#if HAVE_STDARG_H +int +util_snprintf(char *str, size_t size, const char *format, ...) +#else +int +util_snprintf(va_alist) va_dcl +#endif /* HAVE_STDARG_H */ +{ +#if !HAVE_STDARG_H + char *str; + size_t size; + char *format; +#endif /* HAVE_STDARG_H */ + va_list ap; + int len; + + VA_START(ap, format); + VA_SHIFT(ap, str, char *); + VA_SHIFT(ap, size, size_t); + VA_SHIFT(ap, format, const char *); + len = vsnprintf(str, size, format, ap); + va_end(ap); + return len; +} +#endif /* !HAVE_SNPRINTF */ + +#if !HAVE_ASPRINTF +#if HAVE_STDARG_H +int +util_asprintf(char **ret, const char *format, ...) +#else +int +util_asprintf(va_alist) va_dcl +#endif /* HAVE_STDARG_H */ +{ +#if !HAVE_STDARG_H + char **ret; + char *format; +#endif /* HAVE_STDARG_H */ + va_list ap; + int len; + + VA_START(ap, format); + VA_SHIFT(ap, ret, char **); + VA_SHIFT(ap, format, const char *); + len = vasprintf(ret, format, ap); + va_end(ap); + return len; +} +#endif /* !HAVE_ASPRINTF */ +#else /* Dummy declaration to avoid empty translation unit warnings. */ +int main(void); +#endif /* !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || [...] */ + + +/* vim: set joinspaces textwidth=80: */ diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h new file mode 100644 index 0000000000..e2a8491e62 --- /dev/null +++ b/src/gallium/auxiliary/util/u_sse.h @@ -0,0 +1,77 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * SSE intrinsics portability header. + * + * Although the SSE intrinsics are support by all modern x86 and x86-64 + * compilers, there are some intrisincs missing in some implementations + * (especially older MSVC versions). This header abstracts that away. + */ + +#ifndef U_SSE_H_ +#define U_SSE_H_ + +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_SSE) + +#include <xmmintrin.h> +#include <emmintrin.h> + + +/* MSVC before VC8 does not support the _mm_castxxx_yyy */ +#if defined(_MSC_VER) && _MSC_VER < 1500 + +union __declspec(align(16)) m128_types { + __m128 m128; + __m128i m128i; + __m128d m128d; +}; + +static __inline __m128 +_mm_castsi128_ps(__m128i a) +{ + union m128_types u; + u.m128i = a; + return u.m128; +} + +static __inline __m128i +_mm_castps_si128(__m128 a) +{ + union m128_types u; + u.m128 = a; + return u.m128i; +} + +#endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ + +#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ + +#endif /* U_SSE_H_ */ diff --git a/src/gallium/auxiliary/util/u_stream.h b/src/gallium/auxiliary/util/u_stream.h new file mode 100644 index 0000000000..a9d0f0121a --- /dev/null +++ b/src/gallium/auxiliary/util/u_stream.h @@ -0,0 +1,61 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Cross-platform sequential access stream abstraction. + */ + +#ifndef U_STREAM_H +#define U_STREAM_H + + +#include "pipe/p_compiler.h" + + +struct util_stream; + + +/** + * Create a stream + * @param filename relative or absolute path (necessary for windows) + * @param optional maximum file size (0 for a growable size). + */ +struct util_stream * +util_stream_create(const char *filename, size_t max_size); + +boolean +util_stream_write(struct util_stream *stream, const void *data, size_t size); + +void +util_stream_flush(struct util_stream *stream); + +void +util_stream_close(struct util_stream *stream); + + +#endif /* U_STREAM_H */ diff --git a/src/gallium/auxiliary/util/u_stream_stdc.c b/src/gallium/auxiliary/util/u_stream_stdc.c new file mode 100644 index 0000000000..ca80bef0f3 --- /dev/null +++ b/src/gallium/auxiliary/util/u_stream_stdc.c @@ -0,0 +1,106 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Stream implementation based on the Standard C Library. + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) + +#include <stdio.h> + +#include "util/u_memory.h" + +#include "u_stream.h" + + +struct util_stream +{ + FILE *file; +}; + + +struct util_stream * +util_stream_create(const char *filename, size_t max_size) +{ + struct util_stream *stream; + + (void)max_size; + + stream = CALLOC_STRUCT(util_stream); + if(!stream) + goto error1; + + stream->file = fopen(filename, "w"); + if(!stream->file) + goto error2; + + return stream; + +error2: + FREE(stream); +error1: + return NULL; +} + + +boolean +util_stream_write(struct util_stream *stream, const void *data, size_t size) +{ + if(!stream) + return FALSE; + + return fwrite(data, size, 1, stream->file) == size ? TRUE : FALSE; +} + + +void +util_stream_flush(struct util_stream *stream) +{ + if(!stream) + return; + + fflush(stream->file); +} + + +void +util_stream_close(struct util_stream *stream) +{ + if(!stream) + return; + + fclose(stream->file); + + FREE(stream); +} + + +#endif diff --git a/src/gallium/auxiliary/util/u_stream_wd.c b/src/gallium/auxiliary/util/u_stream_wd.c new file mode 100644 index 0000000000..864489e775 --- /dev/null +++ b/src/gallium/auxiliary/util/u_stream_wd.c @@ -0,0 +1,224 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Stream implementation for the Windows Display driver. + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +#include <windows.h> +#include <winddi.h> + +#include "util/u_memory.h" +#include "util/u_string.h" + +#include "u_stream.h" + + +#define MAP_FILE_SIZE (4*1024*1024) + + +struct util_stream +{ + char filename[MAX_PATH + 1]; + WCHAR wFileName[MAX_PATH + 1]; + boolean growable; + size_t map_size; + ULONG_PTR iFile; + char *pMap; + size_t written; + unsigned suffix; +}; + + +static INLINE boolean +util_stream_map(struct util_stream *stream) +{ + ULONG BytesInUnicodeString; + static char filename[MAX_PATH + 1]; + unsigned filename_len; + + if(stream->growable) + filename_len = util_snprintf(filename, + sizeof(filename), + "%s.%04x", + stream->filename, + stream->suffix++); + else + filename_len = util_snprintf(filename, + sizeof(filename), + "%s", + stream->filename); + + EngMultiByteToUnicodeN( + stream->wFileName, + sizeof(stream->wFileName), + &BytesInUnicodeString, + filename, + filename_len); + + stream->pMap = EngMapFile(stream->wFileName, stream->map_size, &stream->iFile); + if(!stream->pMap) + return FALSE; + + memset(stream->pMap, 0, stream->map_size); + stream->written = 0; + + return TRUE; +} + + +static INLINE void +util_stream_unmap(struct util_stream *stream) +{ + EngUnmapFile(stream->iFile); + if(stream->written < stream->map_size) { + /* Truncate file size */ + stream->pMap = EngMapFile(stream->wFileName, stream->written, &stream->iFile); + if(stream->pMap) + EngUnmapFile(stream->iFile); + } + + stream->pMap = NULL; +} + + +static INLINE void +util_stream_full_qualified_filename(char *dst, size_t size, const char *src) +{ + boolean need_drive, need_root; + + if((('A' <= src[0] && src[0] <= 'Z') || ('a' <= src[0] && src[0] <= 'z')) && src[1] == ':') { + need_drive = FALSE; + need_root = src[2] == '\\' ? FALSE : TRUE; + } + else { + need_drive = TRUE; + need_root = src[0] == '\\' ? FALSE : TRUE; + } + + util_snprintf(dst, size, + "\\??\\%s%s%s", + need_drive ? "C:" : "", + need_root ? "\\" : "", + src); +} + + +struct util_stream * +util_stream_create(const char *filename, size_t max_size) +{ + struct util_stream *stream; + + stream = CALLOC_STRUCT(util_stream); + if(!stream) + goto error1; + + util_stream_full_qualified_filename(stream->filename, + sizeof(stream->filename), + filename); + + if(max_size) { + stream->growable = FALSE; + stream->map_size = max_size; + } + else { + stream->growable = TRUE; + stream->map_size = MAP_FILE_SIZE; + } + + if(!util_stream_map(stream)) + goto error2; + + return stream; + +error2: + FREE(stream); +error1: + return NULL; +} + + +static INLINE void +util_stream_copy(struct util_stream *stream, const char *data, size_t size) +{ + assert(stream->written + size <= stream->map_size); + memcpy(stream->pMap + stream->written, data, size); + stream->written += size; +} + + +boolean +util_stream_write(struct util_stream *stream, const void *data, size_t size) +{ + if(!stream) + return FALSE; + + if(!stream->pMap) + return FALSE; + + while(stream->written + size > stream->map_size) { + size_t step = stream->map_size - stream->written; + util_stream_copy(stream, data, step); + data = (const char *)data + step; + size -= step; + + util_stream_unmap(stream); + if(!stream->growable || !util_stream_map(stream)) + return FALSE; + } + + util_stream_copy(stream, data, size); + + return TRUE; +} + + +void +util_stream_flush(struct util_stream *stream) +{ + (void)stream; +} + + +void +util_stream_close(struct util_stream *stream) +{ + if(!stream) + return; + + util_stream_unmap(stream); + + FREE(stream); +} + + +#endif diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h new file mode 100644 index 0000000000..08c89bbf77 --- /dev/null +++ b/src/gallium/auxiliary/util/u_string.h @@ -0,0 +1,220 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Platform independent functions for string manipulation. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef U_STRING_H_ +#define U_STRING_H_ + +#if !defined(WIN32) && !defined(XF86_LIBC_H) +#include <stdio.h> +#endif +#include <stddef.h> +#include <stdarg.h> + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifdef WIN32 + +int util_vsnprintf(char *, size_t, const char *, va_list); +int util_snprintf(char *str, size_t size, const char *format, ...); + +static INLINE void +util_vsprintf(char *str, const char *format, va_list ap) +{ + util_vsnprintf(str, (size_t)-1, format, ap); +} + +static INLINE void +util_sprintf(char *str, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + util_vsnprintf(str, (size_t)-1, format, ap); + va_end(ap); +} + +static INLINE char * +util_strchr(const char *s, char c) +{ + while(*s) { + if(*s == c) + return (char *)s; + ++s; + } + return NULL; +} + +static INLINE char* +util_strncat(char *dst, const char *src, size_t n) +{ + char *p = dst + strlen(dst); + const char *q = src; + size_t i; + + for (i = 0; i < n && *q != '\0'; ++i) + *p++ = *q++; + *p = '\0'; + + return dst; +} + +static INLINE int +util_strcmp(const char *s1, const char *s2) +{ + unsigned char u1, u2; + + while (1) { + u1 = (unsigned char) *s1++; + u2 = (unsigned char) *s2++; + if (u1 != u2) + return u1 - u2; + if (u1 == '\0') + return 0; + } + return 0; +} + +static INLINE int +util_strncmp(const char *s1, const char *s2, size_t n) +{ + unsigned char u1, u2; + + while (n-- > 0) { + u1 = (unsigned char) *s1++; + u2 = (unsigned char) *s2++; + if (u1 != u2) + return u1 - u2; + if (u1 == '\0') + return 0; + } + return 0; +} + +static INLINE char * +util_strstr(const char *haystack, const char *needle) +{ + const char *p = haystack; + int len = strlen(needle); + + for (; (p = util_strchr(p, *needle)) != 0; p++) { + if (util_strncmp(p, needle, len) == 0) { + return (char *)p; + } + } + return NULL; +} + +static INLINE void * +util_memmove(void *dest, const void *src, size_t n) +{ + char *p = (char *)dest; + const char *q = (const char *)src; + if (dest < src) { + while (n--) + *p++ = *q++; + } + else + { + p += n; + q += n; + while (n--) + *--p = *--q; + } + return dest; +} + + +#else + +#define util_vsnprintf vsnprintf +#define util_snprintf snprintf +#define util_vsprintf vsprintf +#define util_sprintf sprintf +#define util_strchr strchr +#define util_strcmp strcmp +#define util_strncmp strncmp +#define util_strncat strncat +#define util_strstr strstr +#define util_memmove memmove + +#endif + + +/** + * Printable string buffer + */ +struct util_strbuf +{ + char *str; + char *ptr; + size_t left; +}; + + +static INLINE void +util_strbuf_init(struct util_strbuf *sbuf, char *str, size_t size) +{ + sbuf->str = str; + sbuf->str[0] = 0; + sbuf->ptr = sbuf->str; + sbuf->left = size; +} + + +static INLINE void +util_strbuf_printf(struct util_strbuf *sbuf, const char *format, ...) +{ + if(sbuf->left > 1) { + size_t written; + va_list ap; + va_start(ap, format); + written = util_vsnprintf(sbuf->ptr, sbuf->left, format, ap); + va_end(ap); + sbuf->ptr += written; + sbuf->left -= written; + } +} + + + +#ifdef __cplusplus +} +#endif + +#endif /* U_STRING_H_ */ diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c new file mode 100644 index 0000000000..853c503f4f --- /dev/null +++ b/src/gallium/auxiliary/util/u_tile.c @@ -0,0 +1,1169 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * RGBA/float tile get/put functions. + * Usable both by drivers and state trackers. + * Surfaces should already be in a mapped state. + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_rect.h" +#include "util/u_tile.h" + + +/** + * Move raw block of pixels from surface to user memory. + * This should be usable by any hw driver that has mappable surfaces. + */ +void +pipe_get_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + void *dst, int dst_stride) +{ + const void *src; + + if (dst_stride == 0) + dst_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + src = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); + assert(src); + if(!src) + return; + + pipe_copy_rect(dst, &ps->block, dst_stride, 0, 0, w, h, src, ps->stride, x, y); + + pipe_surface_unmap(ps); +} + + +/** + * Move raw block of pixels from user memory to surface. + * This should be usable by any hw driver that has mappable surfaces. + */ +void +pipe_put_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const void *src, int src_stride) +{ + void *dst; + + if (src_stride == 0) + src_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + dst = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(dst); + if(!dst) + return; + + pipe_copy_rect(dst, &ps->block, ps->stride, x, y, w, h, src, src_stride, 0, 0); + + pipe_surface_unmap(ps); +} + + + + +/** Convert short in [-32768,32767] to GLfloat in [-1.0,1.0] */ +#define SHORT_TO_FLOAT(S) ((2.0F * (S) + 1.0F) * (1.0F/65535.0F)) + +#define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ + us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) ) + + + +/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ + +static void +a8r8g8b8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); + pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); + pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); + pRow[3] = ubyte_to_float((pixel >> 24) & 0xff); + } + p += dst_stride; + } +} + + +static void +a8r8g8b8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + *dst++ = (a << 24) | (r << 16) | (g << 8) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ + +static void +x8r8g8b8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); + pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); + pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); + pRow[3] = ubyte_to_float(0xff); + } + p += dst_stride; + } +} + + +static void +x8r8g8b8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/ + +static void +b8g8r8a8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = ubyte_to_float((pixel >> 8) & 0xff); + pRow[1] = ubyte_to_float((pixel >> 16) & 0xff); + pRow[2] = ubyte_to_float((pixel >> 24) & 0xff); + pRow[3] = ubyte_to_float((pixel >> 0) & 0xff); + } + p += dst_stride; + } +} + + +static void +b8g8r8a8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + *dst++ = (b << 24) | (g << 16) | (r << 8) | a; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/ + +static void +a1r5g5b5_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 10) & 0x1f) * (1.0f / 31.0f); + pRow[1] = ((pixel >> 5) & 0x1f) * (1.0f / 31.0f); + pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f); + pRow[3] = ((pixel >> 15) ) * 1.0f; + } + p += dst_stride; + } +} + + +static void +a1r5g5b5_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + r = r >> 3; /* 5 bits */ + g = g >> 3; /* 5 bits */ + b = b >> 3; /* 5 bits */ + a = a >> 7; /* 1 bit */ + *dst++ = (a << 15) | (r << 10) | (g << 5) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/ + +static void +a4r4g4b4_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 8) & 0xf) * (1.0f / 15.0f); + pRow[1] = ((pixel >> 4) & 0xf) * (1.0f / 15.0f); + pRow[2] = ((pixel ) & 0xf) * (1.0f / 15.0f); + pRow[3] = ((pixel >> 12) ) * (1.0f / 15.0f); + } + p += dst_stride; + } +} + + +static void +a4r4g4b4_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + r >>= 4; + g >>= 4; + b >>= 4; + a >>= 4; + *dst++ = (a << 12) | (r << 16) | (g << 4) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_R5G6B5_UNORM ***/ + +static void +r5g6b5_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 11) & 0x1f) * (1.0f / 31.0f); + pRow[1] = ((pixel >> 5) & 0x3f) * (1.0f / 63.0f); + pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f); + pRow[3] = 1.0f; + } + p += dst_stride; + } +} + + +static void +r5g6b5_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + uint r = (uint) (CLAMP(pRow[0], 0.0, 1.0) * 31.0); + uint g = (uint) (CLAMP(pRow[1], 0.0, 1.0) * 63.0); + uint b = (uint) (CLAMP(pRow[2], 0.0, 1.0) * 31.0); + *dst++ = (r << 11) | (g << 5) | (b); + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_Z16_UNORM ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z16_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const float scale = 1.0f / 65535.0f; + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = *src++ * scale; + } + p += dst_stride; + } +} + + + + +/*** PIPE_FORMAT_L8_UNORM ***/ + +static void +l8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = ubyte_to_float(*src); + pRow[3] = 1.0; + } + p += dst_stride; + } +} + + +static void +l8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r; + r = float_to_ubyte(pRow[0]); + *dst++ = r; + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_A8_UNORM ***/ + +static void +a8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = 0.0; + pRow[3] = ubyte_to_float(*src); + } + p += dst_stride; + } +} + + +static void +a8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned a; + a = float_to_ubyte(pRow[3]); + *dst++ = a; + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_R16_SNORM ***/ + +static void +r16_get_tile_rgba(const short *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = SHORT_TO_FLOAT(src[0]); + pRow[1] = + pRow[2] = 0.0; + pRow[3] = 1.0; + } + p += dst_stride; + } +} + + +static void +r16_put_tile_rgba(short *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, dst++, pRow += 4) { + UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]); + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/ + +static void +r16g16b16a16_get_tile_rgba(const short *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src += 4, pRow += 4) { + pRow[0] = SHORT_TO_FLOAT(src[0]); + pRow[1] = SHORT_TO_FLOAT(src[1]); + pRow[2] = SHORT_TO_FLOAT(src[2]); + pRow[3] = SHORT_TO_FLOAT(src[3]); + } + p += dst_stride; + } +} + + +static void +r16g16b16a16_put_tile_rgba(short *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, dst += 4, pRow += 4) { + UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]); + UNCLAMPED_FLOAT_TO_SHORT(dst[1], pRow[1]); + UNCLAMPED_FLOAT_TO_SHORT(dst[2], pRow[2]); + UNCLAMPED_FLOAT_TO_SHORT(dst[3], pRow[3]); + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_I8_UNORM ***/ + +static void +i8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = ubyte_to_float(*src); + } + p += dst_stride; + } +} + + +static void +i8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r; + r = float_to_ubyte(pRow[0]); + *dst++ = r; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A8L8_UNORM ***/ + +static void +a8l8_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + ushort p = *src++; + pRow[0] = + pRow[1] = + pRow[2] = ubyte_to_float(p & 0xff); + pRow[3] = ubyte_to_float(p >> 8); + } + p += dst_stride; + } +} + + +static void +a8l8_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, a; + r = float_to_ubyte(pRow[0]); + a = float_to_ubyte(pRow[3]); + *dst++ = (a << 8) | r; + } + p += src_stride; + } +} + + + + +/*** PIPE_FORMAT_Z32_UNORM ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z32_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / (double) 0xffffffff; + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (*src++ * scale); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_S8Z24_UNORM ***/ + +/** + * Return Z component as four float in [0,1]. Stencil part ignored. + */ +static void +s8z24_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / ((1 << 24) - 1); + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (scale * (*src++ & 0xffffff)); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_Z24S8_UNORM ***/ + +/** + * Return Z component as four float in [0,1]. Stencil part ignored. + */ +static void +z24s8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / ((1 << 24) - 1); + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (scale * (*src++ >> 8)); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/ + +/** + * Convert YCbCr (or YCrCb) to RGBA. + */ +static void +ycbcr_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride, + boolean rev) +{ + const float scale = 1.0f / 255.0f; + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + /* do two texels at a time */ + for (j = 0; j < (w & ~1); j += 2, src += 2) { + const ushort t0 = src[0]; + const ushort t1 = src[1]; + const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ + const ubyte y1 = (t1 >> 8) & 0xff; /* luminance */ + ubyte cb, cr; + float r, g, b; + + if (rev) { + cb = t1 & 0xff; /* chroma U */ + cr = t0 & 0xff; /* chroma V */ + } + else { + cb = t0 & 0xff; /* chroma U */ + cr = t1 & 0xff; /* chroma V */ + } + + /* even pixel: y0,cr,cb */ + r = 1.164f * (y0-16) + 1.596f * (cr-128); + g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y0-16) + 2.018f * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + + /* odd pixel: use y1,cr,cb */ + r = 1.164f * (y1-16) + 1.596f * (cr-128); + g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y1-16) + 2.018f * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + + } + /* do the last texel */ + if (w & 1) { + const ushort t0 = src[0]; + const ushort t1 = src[1]; + const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ + ubyte cb, cr; + float r, g, b; + + if (rev) { + cb = t1 & 0xff; /* chroma U */ + cr = t0 & 0xff; /* chroma V */ + } + else { + cb = t0 & 0xff; /* chroma U */ + cr = t1 & 0xff; /* chroma V */ + } + + /* even pixel: y0,cr,cb */ + r = 1.164f * (y0-16) + 1.596f * (cr-128); + g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y0-16) + 2.018f * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + } + p += dst_stride; + } +} + + +void +pipe_tile_raw_to_rgba(enum pipe_format format, + void *src, + uint w, uint h, + float *dst, unsigned dst_stride) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + a1r5g5b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + a4r4g4b4_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_L8_UNORM: + l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A8_UNORM: + a8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_I8_UNORM: + i8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A8L8_UNORM: + a8l8_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_R16_SNORM: + r16_get_tile_rgba((short *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_R16G16B16A16_SNORM: + r16g16b16a16_get_tile_rgba((short *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_Z16_UNORM: + z16_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_Z32_UNORM: + z32_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_Z24S8_UNORM: + z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_YCBCR: + ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, FALSE); + break; + case PIPE_FORMAT_YCBCR_REV: + ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, TRUE); + break; + default: + assert(0); + } +} + + +void +pipe_get_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + float *p) +{ + unsigned dst_stride = w * 4; + void *packed; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); + + if (!packed) + return; + + if(ps->format == PIPE_FORMAT_YCBCR || ps->format == PIPE_FORMAT_YCBCR_REV) + assert((x & 1) == 0); + + pipe_get_tile_raw(ps, x, y, w, h, packed, 0); + + pipe_tile_raw_to_rgba(ps->format, packed, w, h, p, dst_stride); + + FREE(packed); +} + + +void +pipe_put_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const float *p) +{ + unsigned src_stride = w * 4; + void *packed; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); + + if (!packed) + return; + + switch (ps->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + assert(0); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_L8_UNORM: + l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_A8_UNORM: + a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_I8_UNORM: + i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_A8L8_UNORM: + a8l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R16_SNORM: + r16_put_tile_rgba((short *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R16G16B16A16_SNORM: + r16g16b16a16_put_tile_rgba((short *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_Z16_UNORM: + /*z16_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_Z32_UNORM: + /*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_Z24S8_UNORM: + /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + default: + assert(0); + } + + pipe_put_tile_raw(ps, x, y, w, h, packed, 0); + + FREE(packed); +} + + +/** + * Get a block of Z values, converted to 32-bit range. + */ +void +pipe_get_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + uint *z) +{ + const uint dstStride = w; + ubyte *map; + uint *pDest = z; + uint i, j; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); + if (!map) { + assert(0); + return; + } + + switch (ps->format) { + case PIPE_FORMAT_Z32_UNORM: + { + const uint *pSrc + = (const uint *)(map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, 4 * w); + pDest += dstStride; + pSrc += ps->stride/4; + } + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + { + const uint *pSrc + = (const uint *)(map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 24-bit Z to 32-bit Z */ + pDest[j] = (pSrc[j] << 8) | (pSrc[j] & 0xff); + } + pDest += dstStride; + pSrc += ps->stride/4; + } + } + break; + case PIPE_FORMAT_Z16_UNORM: + { + const ushort *pSrc + = (const ushort *)(map + y * ps->stride + x*2); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 16-bit Z to 32-bit Z */ + pDest[j] = (pSrc[j] << 16) | pSrc[j]; + } + pDest += dstStride; + pSrc += ps->stride/2; + } + } + break; + default: + assert(0); + } + + pipe_surface_unmap(ps); +} + + +void +pipe_put_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const uint *zSrc) +{ + const uint srcStride = w; + const uint *pSrc = zSrc; + ubyte *map; + uint i, j; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); + if (!map) { + assert(0); + return; + } + + switch (ps->format) { + case PIPE_FORMAT_Z32_UNORM: + { + uint *pDest = (uint *) (map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, 4 * w); + pDest += ps->stride/4; + pSrc += srcStride; + } + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + { + uint *pDest = (uint *) (map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 32-bit Z to 24-bit Z (0 stencil) */ + pDest[j] = pSrc[j] >> 8; + } + pDest += ps->stride/4; + pSrc += srcStride; + } + } + break; + case PIPE_FORMAT_Z16_UNORM: + { + ushort *pDest = (ushort *) (map + y * ps->stride + x*2); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 32-bit Z to 16-bit Z */ + pDest[j] = pSrc[j] >> 16; + } + pDest += ps->stride/2; + pSrc += srcStride; + } + } + break; + default: + assert(0); + } + + pipe_surface_unmap(ps); +} + + diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h new file mode 100644 index 0000000000..a8ac805308 --- /dev/null +++ b/src/gallium/auxiliary/util/u_tile.h @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef P_TILE_H +#define P_TILE_H + +#include "pipe/p_compiler.h" + +struct pipe_surface; + + +/** + * Clip tile against surface dims. + * \return TRUE if tile is totally clipped, FALSE otherwise + */ +static INLINE boolean +pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps) +{ + if (x >= ps->width) + return TRUE; + if (y >= ps->height) + return TRUE; + if (x + *w > ps->width) + *w = ps->width - x; + if (y + *h > ps->height) + *h = ps->height - y; + return FALSE; +} + +#ifdef __cplusplus +extern "C" { +#endif + +void +pipe_get_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + void *p, int dst_stride); + +void +pipe_put_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const void *p, int src_stride); + + +void +pipe_get_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + float *p); + +void +pipe_put_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const float *p); + + +void +pipe_get_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + uint *z); + +void +pipe_put_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const uint *z); + +void +pipe_tile_raw_to_rgba(enum pipe_format format, + void *src, + uint w, uint h, + float *dst, unsigned dst_stride); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c new file mode 100644 index 0000000000..bf7d1d1c8d --- /dev/null +++ b/src/gallium/auxiliary/util/u_time.c @@ -0,0 +1,220 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) +#include <sys/time.h> +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) +#include <windows.h> +#include <winddi.h> +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +#include <windows.h> +extern VOID KeQuerySystemTime(PLARGE_INTEGER); +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) +#include <windows.h> +#else +#error Unsupported OS +#endif + +#include "util/u_time.h" + + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + +static int64_t frequency = 0; + +static INLINE void +util_time_get_frequency(void) +{ + if(!frequency) { +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + LONGLONG temp; + EngQueryPerformanceFrequency(&temp); + frequency = temp; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + LARGE_INTEGER temp; + QueryPerformanceFrequency(&temp); + frequency = temp.QuadPart; +#endif + } +} +#endif + + +void +util_time_get(struct util_time *t) +{ +#if defined(PIPE_OS_LINUX) + gettimeofday(&t->tv, NULL); +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + LONGLONG temp; + EngQueryPerformanceCounter(&temp); + t->counter = temp; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + /* Updated every 10 miliseconds, measured in units of 100 nanoseconds. + * http://msdn.microsoft.com/en-us/library/ms801642.aspx */ + LARGE_INTEGER temp; + KeQuerySystemTime(&temp); + t->counter = temp.QuadPart; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + LARGE_INTEGER temp; + QueryPerformanceCounter(&temp); + t->counter = temp.QuadPart; +#endif +} + + +void +util_time_add(const struct util_time *t1, + int64_t usecs, + struct util_time *t2) +{ +#if defined(PIPE_OS_LINUX) + t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000; + t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + util_time_get_frequency(); + t2->counter = t1->counter + (usecs * frequency + INT64_C(999999))/INT64_C(1000000); +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + /* 1 tick = 100 nano seconds. */ + t2->counter = t1->counter + usecs * 10; +#elif + LARGE_INTEGER temp; + LONGLONG freq; + freq = temp.QuadPart; + t2->counter = t1->counter + (usecs * freq)/1000000L; +#endif +} + + +int64_t +util_time_diff(const struct util_time *t1, + const struct util_time *t2) +{ +#if defined(PIPE_OS_LINUX) + return (t2->tv.tv_usec - t1->tv.tv_usec) + + (t2->tv.tv_sec - t1->tv.tv_sec)*1000000; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + util_time_get_frequency(); + return (t2->counter - t1->counter)*INT64_C(1000000)/frequency; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + return (t2->counter - t1->counter)/10; +#endif +} + + + +uint64_t +util_time_micros( void ) +{ + struct util_time t1; + + util_time_get(&t1); + +#if defined(PIPE_OS_LINUX) + return t1.tv.tv_usec + t1.tv.tv_sec*1000000LL; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + util_time_get_frequency(); + return t1.counter*INT64_C(1000000)/frequency; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + return t1.counter/10; +#endif +} + + + +/** + * Compare two time values. + * + * Not publicly available because it does not take in account wrap-arounds. + * Use util_time_timeout instead. + */ +static INLINE int +util_time_compare(const struct util_time *t1, + const struct util_time *t2) +{ +#if defined(PIPE_OS_LINUX) + if (t1->tv.tv_sec < t2->tv.tv_sec) + return -1; + else if(t1->tv.tv_sec > t2->tv.tv_sec) + return 1; + else if (t1->tv.tv_usec < t2->tv.tv_usec) + return -1; + else if(t1->tv.tv_usec > t2->tv.tv_usec) + return 1; + else + return 0; +#elif defined(PIPE_OS_WINDOWS) + if (t1->counter < t2->counter) + return -1; + else if(t1->counter > t2->counter) + return 1; + else + return 0; +#endif +} + + +boolean +util_time_timeout(const struct util_time *start, + const struct util_time *end, + const struct util_time *curr) +{ + if(util_time_compare(start, end) <= 0) + return !(util_time_compare(start, curr) <= 0 && util_time_compare(curr, end) < 0); + else + return !(util_time_compare(start, curr) <= 0 || util_time_compare(curr, end) < 0); +} + + +#if defined(PIPE_SUBSYSYEM_WINDOWS_DISPLAY) +void util_time_sleep(unsigned usecs) +{ + LONGLONG start, curr, end; + + EngQueryPerformanceCounter(&start); + + if(!frequency) + EngQueryPerformanceFrequency(&frequency); + + end = start + (usecs * frequency + 999999LL)/1000000LL; + + do { + EngQueryPerformanceCounter(&curr); + } while(start <= curr && curr < end || + end < start && (curr < end || start <= curr)); +} +#endif diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h new file mode 100644 index 0000000000..35d97d16c7 --- /dev/null +++ b/src/gallium/auxiliary/util/u_time.h @@ -0,0 +1,104 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef U_TIME_H_ +#define U_TIME_H_ + + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) +#include <time.h> /* timeval */ +#include <unistd.h> /* usleep */ +#endif + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Time abstraction. + * + * Do not access this structure directly. Use the provided function instead. + */ +struct util_time +{ +#if defined(PIPE_OS_LINUX) + struct timeval tv; +#else + int64_t counter; +#endif +}; + + +void +util_time_get(struct util_time *t); + +void +util_time_add(const struct util_time *t1, + int64_t usecs, + struct util_time *t2); + +uint64_t +util_time_micros( void ); + +int64_t +util_time_diff(const struct util_time *t1, + const struct util_time *t2); + +/** + * Returns non-zero when the timeout expires. + */ +boolean +util_time_timeout(const struct util_time *start, + const struct util_time *end, + const struct util_time *curr); + +#if defined(PIPE_OS_LINUX) +#define util_time_sleep usleep +#else +void +util_time_sleep(unsigned usecs); +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* U_TIME_H_ */ diff --git a/src/gallium/auxiliary/util/u_timed_winsys.c b/src/gallium/auxiliary/util/u_timed_winsys.c new file mode 100644 index 0000000000..8beb3b4c88 --- /dev/null +++ b/src/gallium/auxiliary/util/u_timed_winsys.c @@ -0,0 +1,346 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> + */ + +#include "pipe/p_winsys.h" +#include "u_timed_winsys.h" +#include "util/u_memory.h" +#include "util/u_time.h" + + +struct timed_winsys { + struct pipe_winsys base; + struct pipe_winsys *backend; + uint64_t last_dump; + struct { + const char *name_key; + double total; + unsigned calls; + } funcs[13]; +}; + + +static struct timed_winsys *timed_winsys( struct pipe_winsys *winsys ) +{ + return (struct timed_winsys *)winsys; +} + + +static uint64_t time_start( void ) +{ + return util_time_micros(); +} + + +static void time_display( struct pipe_winsys *winsys ) +{ + struct timed_winsys *tws = timed_winsys(winsys); + unsigned i; + double overall = 0; + + for (i = 0; i < Elements(tws->funcs); i++) { + if (tws->funcs[i].name_key) { + debug_printf("*** %-25s %5.3fms (%d calls, avg %.3fms)\n", + tws->funcs[i].name_key, + tws->funcs[i].total, + tws->funcs[i].calls, + tws->funcs[i].total / tws->funcs[i].calls); + overall += tws->funcs[i].total; + tws->funcs[i].calls = 0; + tws->funcs[i].total = 0; + } + } + + debug_printf("*** %-25s %5.3fms\n", + "OVERALL WINSYS", + overall); +} + +static void time_finish( struct pipe_winsys *winsys, + long long startval, + unsigned idx, + const char *name ) +{ + struct timed_winsys *tws = timed_winsys(winsys); + uint64_t endval = util_time_micros(); + double elapsed = (endval - startval)/1000.0; + + if (endval - startval > 1000LL) + debug_printf("*** %s %.3f\n", name, elapsed ); + + assert( tws->funcs[idx].name_key == name || + tws->funcs[idx].name_key == NULL); + + tws->funcs[idx].name_key = name; + tws->funcs[idx].total += elapsed; + tws->funcs[idx].calls++; + + if (endval - tws->last_dump > 10LL * 1000LL * 1000LL) { + time_display( winsys ); + tws->last_dump = endval; + } +} + + +/* Pipe has no concept of pools, but the psb driver passes a flag that + * can be mapped onto pools in the backend. + */ +static struct pipe_buffer * +timed_buffer_create(struct pipe_winsys *winsys, + unsigned alignment, + unsigned usage, + unsigned size ) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + uint64_t start = time_start(); + + struct pipe_buffer *buf = backend->buffer_create( backend, alignment, usage, size ); + + time_finish(winsys, start, 0, __FUNCTION__); + + return buf; +} + + + + +static struct pipe_buffer * +timed_user_buffer_create(struct pipe_winsys *winsys, + void *data, + unsigned bytes) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + uint64_t start = time_start(); + + struct pipe_buffer *buf = backend->user_buffer_create( backend, data, bytes ); + + time_finish(winsys, start, 1, __FUNCTION__); + + return buf; +} + + +static void * +timed_buffer_map(struct pipe_winsys *winsys, + struct pipe_buffer *buf, + unsigned flags) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + uint64_t start = time_start(); + + void *map = backend->buffer_map( backend, buf, flags ); + + time_finish(winsys, start, 2, __FUNCTION__); + + return map; +} + + +static void +timed_buffer_unmap(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + uint64_t start = time_start(); + + backend->buffer_unmap( backend, buf ); + + time_finish(winsys, start, 3, __FUNCTION__); +} + + +static void +timed_buffer_destroy(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + uint64_t start = time_start(); + + backend->buffer_destroy( backend, buf ); + + time_finish(winsys, start, 4, __FUNCTION__); +} + + +static void +timed_flush_frontbuffer( struct pipe_winsys *winsys, + struct pipe_surface *surf, + void *context_private) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + uint64_t start = time_start(); + + backend->flush_frontbuffer( backend, surf, context_private ); + + time_finish(winsys, start, 5, __FUNCTION__); +} + + + + +static struct pipe_surface * +timed_surface_alloc(struct pipe_winsys *winsys) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + uint64_t start = time_start(); + + struct pipe_surface *surf = backend->surface_alloc( backend ); + + time_finish(winsys, start, 6, __FUNCTION__); + + return surf; +} + + + +static int +timed_surface_alloc_storage(struct pipe_winsys *winsys, + struct pipe_surface *surf, + unsigned width, unsigned height, + enum pipe_format format, + unsigned flags, + unsigned tex_usage) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + uint64_t start = time_start(); + + int ret = backend->surface_alloc_storage( backend, surf, width, height, + format, flags, tex_usage ); + + time_finish(winsys, start, 7, __FUNCTION__); + + return ret; +} + + +static void +timed_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + uint64_t start = time_start(); + + backend->surface_release( backend, s ); + + time_finish(winsys, start, 8, __FUNCTION__); +} + + + +static const char * +timed_get_name( struct pipe_winsys *winsys ) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + uint64_t start = time_start(); + + const char *ret = backend->get_name( backend ); + + time_finish(winsys, start, 9, __FUNCTION__); + + return ret; +} + +static void +timed_fence_reference(struct pipe_winsys *winsys, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + uint64_t start = time_start(); + + backend->fence_reference( backend, ptr, fence ); + + time_finish(winsys, start, 10, __FUNCTION__); +} + + +static int +timed_fence_signalled( struct pipe_winsys *winsys, + struct pipe_fence_handle *fence, + unsigned flag ) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + uint64_t start = time_start(); + + int ret = backend->fence_signalled( backend, fence, flag ); + + time_finish(winsys, start, 11, __FUNCTION__); + + return ret; +} + +static int +timed_fence_finish( struct pipe_winsys *winsys, + struct pipe_fence_handle *fence, + unsigned flag ) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + uint64_t start = time_start(); + + int ret = backend->fence_finish( backend, fence, flag ); + + time_finish(winsys, start, 12, __FUNCTION__); + + return ret; +} + +static void +timed_winsys_destroy( struct pipe_winsys *winsys ) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + backend->destroy( backend ); + FREE(winsys); +} + + + +struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend ) +{ + struct timed_winsys *ws = CALLOC_STRUCT(timed_winsys); + + ws->base.user_buffer_create = timed_user_buffer_create; + ws->base.buffer_map = timed_buffer_map; + ws->base.buffer_unmap = timed_buffer_unmap; + ws->base.buffer_destroy = timed_buffer_destroy; + ws->base.buffer_create = timed_buffer_create; + ws->base.flush_frontbuffer = timed_flush_frontbuffer; + ws->base.get_name = timed_get_name; + ws->base.surface_alloc = timed_surface_alloc; + ws->base.surface_alloc_storage = timed_surface_alloc_storage; + ws->base.surface_release = timed_surface_release; + ws->base.fence_reference = timed_fence_reference; + ws->base.fence_signalled = timed_fence_signalled; + ws->base.fence_finish = timed_fence_finish; + ws->base.destroy = timed_winsys_destroy; + + ws->backend = backend; + + return &ws->base; +} + diff --git a/src/gallium/auxiliary/util/u_timed_winsys.h b/src/gallium/auxiliary/util/u_timed_winsys.h new file mode 100644 index 0000000000..542365112d --- /dev/null +++ b/src/gallium/auxiliary/util/u_timed_winsys.h @@ -0,0 +1,41 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> + */ + + +#ifndef U_TIMED_WINSYS_H +#define U_TIMED_WINSYS_H + + +struct pipe_winsys; +struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend ); + + +#endif diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile new file mode 100644 index 0000000000..6161cb6ff8 --- /dev/null +++ b/src/gallium/drivers/Makefile @@ -0,0 +1,20 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +SUBDIRS = $(GALLIUM_DRIVER_DIRS) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/drivers/cell/Makefile b/src/gallium/drivers/cell/Makefile new file mode 100644 index 0000000000..47aef7b05f --- /dev/null +++ b/src/gallium/drivers/cell/Makefile @@ -0,0 +1,12 @@ +# Cell Gallium driver Makefile + + +default: + ( cd spu ; make ) + ( cd ppu ; make ) + + + +clean: + ( cd spu ; make clean ) + ( cd ppu ; make clean ) diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h new file mode 100644 index 0000000000..23fb0b0831 --- /dev/null +++ b/src/gallium/drivers/cell/common.h @@ -0,0 +1,338 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Types and tokens which are common to the SPU and PPU code. + */ + + +#ifndef CELL_COMMON_H +#define CELL_COMMON_H + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" + + +/** The standard assert macro doesn't seem to work reliably */ +#define ASSERT(x) \ + if (!(x)) { \ + ubyte *p = NULL; \ + fprintf(stderr, "%s:%d: %s(): assertion %s failed.\n", \ + __FILE__, __LINE__, __FUNCTION__, #x); \ + *p = 0; \ + exit(1); \ + } + + +/** for sanity checking */ +#define ASSERT_ALIGN16(ptr) \ + ASSERT((((unsigned long) (ptr)) & 0xf) == 0); + + +/** round up value to next multiple of 4 */ +#define ROUNDUP4(k) (((k) + 0x3) & ~0x3) + +/** round up value to next multiple of 8 */ +#define ROUNDUP8(k) (((k) + 0x7) & ~0x7) + +/** round up value to next multiple of 16 */ +#define ROUNDUP16(k) (((k) + 0xf) & ~0xf) + + +#define CELL_MAX_SPUS 8 + +#define CELL_MAX_SAMPLERS 4 +#define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */ +#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */ +#define CELL_MAX_WIDTH 1024 /**< max framebuffer width */ +#define CELL_MAX_HEIGHT 1024 /**< max framebuffer width */ + +#define TILE_SIZE 32 + + +/** + * The low byte of a mailbox word contains the command opcode. + * Remaining higher bytes are command specific. + */ +#define CELL_CMD_OPCODE_MASK 0xff + +#define CELL_CMD_EXIT 1 +#define CELL_CMD_CLEAR_SURFACE 2 +#define CELL_CMD_FINISH 3 +#define CELL_CMD_RENDER 4 +#define CELL_CMD_BATCH 5 +#define CELL_CMD_RELEASE_VERTS 6 +#define CELL_CMD_STATE_FRAMEBUFFER 10 +#define CELL_CMD_STATE_FRAGMENT_OPS 11 +#define CELL_CMD_STATE_SAMPLER 12 +#define CELL_CMD_STATE_TEXTURE 13 +#define CELL_CMD_STATE_VERTEX_INFO 14 +#define CELL_CMD_STATE_VIEWPORT 15 +#define CELL_CMD_STATE_UNIFORMS 16 +#define CELL_CMD_STATE_VS_ARRAY_INFO 17 +#define CELL_CMD_STATE_BIND_VS 18 +#define CELL_CMD_STATE_FRAGMENT_PROGRAM 19 +#define CELL_CMD_STATE_ATTRIB_FETCH 20 +#define CELL_CMD_STATE_FS_CONSTANTS 21 +#define CELL_CMD_STATE_RASTERIZER 22 +#define CELL_CMD_VS_EXECUTE 23 +#define CELL_CMD_FLUSH_BUFFER_RANGE 24 +#define CELL_CMD_FENCE 25 + + +/** Command/batch buffers */ +#define CELL_NUM_BUFFERS 4 +#define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */ + +#define CELL_BUFFER_STATUS_FREE 10 +#define CELL_BUFFER_STATUS_USED 20 + +/** Debug flags */ +#define CELL_DEBUG_CHECKER (1 << 0) +#define CELL_DEBUG_ASM (1 << 1) +#define CELL_DEBUG_SYNC (1 << 2) +#define CELL_DEBUG_FRAGMENT_OPS (1 << 3) +#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4) +#define CELL_DEBUG_CMD (1 << 5) +#define CELL_DEBUG_CACHE (1 << 6) + +/** Max instructions for doing per-fragment operations */ +#define SPU_MAX_FRAGMENT_OPS_INSTS 64 + + + +#define CELL_FENCE_IDLE 0 +#define CELL_FENCE_EMITTED 1 +#define CELL_FENCE_SIGNALLED 2 + +struct cell_fence +{ + /** There's a 16-byte status qword per SPU */ + volatile uint status[CELL_MAX_SPUS][4]; +}; + + +/** + * Fence command sent to SPUs. In response, the SPUs will write + * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory. + */ +struct cell_command_fence +{ + uint64_t opcode; /**< CELL_CMD_FENCE */ + struct cell_fence *fence; +}; + + +/** + * Command to specify per-fragment operations state and generated code. + * Note that the dsa, blend, blend_color fields are really only needed + * for the fallback/C per-pixel code. They're not used when we generate + * dynamic SPU fragment code (which is the normal case). + */ +struct cell_command_fragment_ops +{ + uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ + struct pipe_depth_stencil_alpha_state dsa; + struct pipe_blend_state blend; + struct pipe_blend_color blend_color; + unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS]; +}; + + +/** Max instructions for fragment programs */ +#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 512 + +/** + * Command to send a fragment program to SPUs. + */ +struct cell_command_fragment_program +{ + uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */ + uint num_inst; /**< Number of instructions */ + unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; +}; + + +/** + * Tell SPUs about the framebuffer size, location + */ +struct cell_command_framebuffer +{ + uint64_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */ + int width, height; + void *color_start, *depth_start; + enum pipe_format color_format, depth_format; +}; + + +/** + * Tell SPUs about rasterizer state. + */ +struct cell_command_rasterizer +{ + uint64_t opcode; /**< CELL_CMD_STATE_RASTERIZER */ + struct pipe_rasterizer_state rasterizer; +}; + + +/** + * Clear framebuffer to the given value/color. + */ +struct cell_command_clear_surface +{ + uint64_t opcode; /**< CELL_CMD_CLEAR_SURFACE */ + uint surface; /**< Temporary: 0=color, 1=Z */ + uint value; +}; + + +/** + * Array info used by the vertex shader's vertex puller. + */ +struct cell_array_info +{ + uint64_t base; /**< Base address of the 0th element. */ + uint attr; /**< Attribute that this state is for. */ + uint pitch; /**< Byte pitch from one entry to the next. */ + uint size; + uint function_offset; +}; + + +struct cell_attribute_fetch_code +{ + uint64_t base; + uint size; +}; + + +struct cell_buffer_range +{ + uint64_t base; + unsigned size; +}; + + +struct cell_shader_info +{ + uint64_t declarations; + uint64_t instructions; + uint64_t immediates; + + unsigned num_outputs; + unsigned num_declarations; + unsigned num_instructions; + unsigned num_immediates; +}; + + +#define SPU_VERTS_PER_BATCH 64 +struct cell_command_vs +{ + uint64_t opcode; /**< CELL_CMD_VS_EXECUTE */ + uint64_t vOut[SPU_VERTS_PER_BATCH]; + unsigned num_elts; + unsigned elts[SPU_VERTS_PER_BATCH]; + float plane[12][4]; + unsigned nr_planes; + unsigned nr_attrs; +}; + + +struct cell_command_render +{ + uint64_t opcode; /**< CELL_CMD_RENDER */ + uint prim_type; /**< PIPE_PRIM_x */ + uint num_verts; + uint vertex_size; /**< bytes per vertex */ + uint num_indexes; + uint vertex_buf; /**< which cell->buffer[] contains the vertex data */ + float xmin, ymin, xmax, ymax; /* XXX another dummy field */ + uint min_index; + boolean inline_verts; +}; + + +struct cell_command_release_verts +{ + uint64_t opcode; /**< CELL_CMD_RELEASE_VERTS */ + uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ +}; + + +struct cell_command_sampler +{ + uint64_t opcode; /**< CELL_CMD_STATE_SAMPLER */ + uint unit; + struct pipe_sampler_state state; +}; + + +struct cell_command_texture +{ + uint64_t opcode; /**< CELL_CMD_STATE_TEXTURE */ + uint target; /**< PIPE_TEXTURE_x */ + uint unit; + void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */ + ushort width[CELL_MAX_TEXTURE_LEVELS]; + ushort height[CELL_MAX_TEXTURE_LEVELS]; + ushort depth[CELL_MAX_TEXTURE_LEVELS]; +}; + + +#define MAX_SPU_FUNCTIONS 12 +/** + * Used to tell the PPU about the address of particular functions in the + * SPU's address space. + */ +struct cell_spu_function_info +{ + uint num; + char names[MAX_SPU_FUNCTIONS][16]; + uint addrs[MAX_SPU_FUNCTIONS]; + char pad[12]; /**< Pad struct to multiple of 16 bytes (256 currently) */ +}; + + +/** This is the object passed to spe_create_thread() */ +struct cell_init_info +{ + unsigned id; + unsigned num_spus; + unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */ + float inv_timebase; /**< 1.0/timebase, for perf measurement */ + + /** Buffers for command batches, vertex/index data */ + ubyte *buffers[CELL_NUM_BUFFERS]; + uint *buffer_status; /**< points at cell_context->buffer_status */ + + struct cell_spu_function_info *spu_functions; +} ALIGN16_ATTRIB; + + +#endif /* CELL_COMMON_H */ diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile new file mode 100644 index 0000000000..9358a47284 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -0,0 +1,84 @@ +# Gallium3D Cell driver: PPU code + +# This makefile builds the libcell.a library which gets pulled into +# the main libGL.so library + + +TOP = ../../../../.. +include $(TOP)/configs/current + + +# This is the "top-level" cell PPU driver code, will get pulled into libGL.so +# by the winsys Makefile. +CELL_LIB = ../libcell.a + + +# This is the SPU code. We'd like to be able to put this into the libcell.a +# archive with the PPU code, but nesting .a libs doesn't seem to work. +# So, it's pulled into libGL.so in gallium/winsys/xlib/Makefile +SPU_CODE_MODULE = ../spu/g3d_spu.a + + +SOURCES = \ + cell_batch.c \ + cell_clear.c \ + cell_context.c \ + cell_draw_arrays.c \ + cell_fence.c \ + cell_flush.c \ + cell_gen_fragment.c \ + cell_gen_fp.c \ + cell_state_derived.c \ + cell_state_emit.c \ + cell_state_shader.c \ + cell_pipe_state.c \ + cell_screen.c \ + cell_state_vertex.c \ + cell_spu.c \ + cell_surface.c \ + cell_texture.c \ + cell_vbuf.c \ + cell_vertex_fetch.c \ + cell_vertex_shader.c \ + cell_winsys.c + + +OBJECTS = $(SOURCES:.c=.o) \ + +INCLUDE_DIRS = \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/gallium/drivers + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + + + +default: $(CELL_LIB) + + +$(CELL_LIB): $(OBJECTS) $(SPU_CODE_MODULE) +# ar -ru $(CELL_LIB) $(OBJECTS) $(SPU_CODE_MODULE) # doesn't work + ar -ru $(CELL_LIB) $(OBJECTS) + +#$(PROG): $(PPU_OBJECTS) +# $(CC) -o $(PROG) $(PPU_OBJECTS) $(SPU_CODE_MODULE) $(PPU_LFLAGS) + + + +clean: + rm -f *.o *~ $(CELL_LIB) + + + +depend: $(SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null + +include depend + + + diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c new file mode 100644 index 0000000000..448b723d85 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -0,0 +1,302 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_fence.h" +#include "cell_spu.h" + + + +/** + * Search the buffer pool for an empty/free buffer and return its index. + * Buffers are used for storing vertex data, state and commands which + * will be sent to the SPUs. + * If no empty buffers are available, wait for one. + * \return buffer index in [0, CELL_NUM_BUFFERS-1] + */ +uint +cell_get_empty_buffer(struct cell_context *cell) +{ + static uint prev_buffer = 0; + uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS; + uint tries = 0; + + /* Find a buffer that's marked as free by all SPUs */ + while (1) { + uint spu, num_free = 0; + + for (spu = 0; spu < cell->num_spus; spu++) { + if (cell->buffer_status[spu][buf][0] == CELL_BUFFER_STATUS_FREE) { + num_free++; + + if (num_free == cell->num_spus) { + /* found a free buffer, now mark status as used */ + for (spu = 0; spu < cell->num_spus; spu++) { + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; + } + /* + printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries); + */ + prev_buffer = buf; + + /* release tex buffer associated w/ prev use of this batch buf */ + cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]); + + return buf; + } + } + else { + break; + } + } + + /* try next buf */ + buf = (buf + 1) % CELL_NUM_BUFFERS; + + tries++; + if (tries == 100) { + /* + printf("PPU WAITING for buffer...\n"); + */ + } + } +} + + +/** + * Append a fence command to the current batch buffer. + * Note that we're sure there's always room for this because of the + * adjusted size check in cell_batch_free_space(). + */ +static void +emit_fence(struct cell_context *cell) +{ + const uint batch = cell->cur_batch; + const uint size = cell->buffer_size[batch]; + struct cell_command_fence *fence_cmd; + + ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE); + + fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size); + fence_cmd->opcode = CELL_CMD_FENCE; + fence_cmd->fence = &cell->fenced_buffers[batch].fence; +} + + +/** + * Flush the current batch buffer to the SPUs. + * An empty buffer will be found and set as the new current batch buffer + * for subsequent commands/data. + */ +void +cell_batch_flush(struct cell_context *cell) +{ + static boolean flushing = FALSE; + uint batch = cell->cur_batch; + const uint size = cell->buffer_size[batch]; + uint spu, cmd_word; + + assert(!flushing); + + if (size == 0) + return; + + /* Before we use this batch buffer, make sure any fenced texture buffers + * are released. + */ + if (cell->fenced_buffers[batch].head) + emit_fence(cell); + + flushing = TRUE; + + assert(batch < CELL_NUM_BUFFERS); + + /* + printf("cell_batch_dispatch: buf %u at %p, size %u\n", + batch, &cell->buffer[batch][0], size); + */ + + /* + * Build "BATCH" command and send to all SPUs. + */ + cmd_word = CELL_CMD_BATCH | (batch << 8) | (size << 16); + + for (spu = 0; spu < cell->num_spus; spu++) { + assert(cell->buffer_status[spu][batch][0] == CELL_BUFFER_STATUS_USED); + send_mbox_message(cell_global.spe_contexts[spu], cmd_word); + } + + /* When the SPUs are done copying the buffer into their locals stores + * they'll write a BUFFER_STATUS_FREE message into the buffer_status[] + * array indicating that the PPU can re-use the buffer. + */ + + batch = cell_get_empty_buffer(cell); + + cell->buffer_size[batch] = 0; /* empty */ + cell->cur_batch = batch; + + flushing = FALSE; +} + + +/** + * Return the number of bytes free in the current batch buffer. + */ +uint +cell_batch_free_space(const struct cell_context *cell) +{ + uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; + free -= sizeof(struct cell_command_fence); + return free; +} + + +/** + * Append data to the current batch buffer. + * \param data address of block of bytes to append + * \param bytes size of block of bytes + */ +void +cell_batch_append(struct cell_context *cell, const void *data, uint bytes) +{ + uint size; + + ASSERT(bytes % 8 == 0); + ASSERT(bytes <= CELL_BUFFER_SIZE); + ASSERT(cell->cur_batch >= 0); + +#ifdef ASSERT + { + uint spu; + for (spu = 0; spu < cell->num_spus; spu++) { + ASSERT(cell->buffer_status[spu][cell->cur_batch][0] + == CELL_BUFFER_STATUS_USED); + } + } +#endif + + size = cell->buffer_size[cell->cur_batch]; + + if (bytes > cell_batch_free_space(cell)) { + cell_batch_flush(cell); + size = 0; + } + + ASSERT(size + bytes <= CELL_BUFFER_SIZE); + + memcpy(cell->buffer[cell->cur_batch] + size, data, bytes); + + cell->buffer_size[cell->cur_batch] = size + bytes; +} + + +/** + * Allocate space in the current batch buffer for 'bytes' space. + * \return address in batch buffer to put data + */ +void * +cell_batch_alloc(struct cell_context *cell, uint bytes) +{ + return cell_batch_alloc_aligned(cell, bytes, 1); +} + + +/** + * Same as \sa cell_batch_alloc, but return an address at a particular + * alignment. + */ +void * +cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, + uint alignment) +{ + void *pos; + uint size, padbytes; + + ASSERT(bytes % 8 == 0); + ASSERT(bytes <= CELL_BUFFER_SIZE); + ASSERT(alignment > 0); + ASSERT(cell->cur_batch >= 0); + +#ifdef ASSERT + { + uint spu; + for (spu = 0; spu < cell->num_spus; spu++) { + ASSERT(cell->buffer_status[spu][cell->cur_batch][0] + == CELL_BUFFER_STATUS_USED); + } + } +#endif + + size = cell->buffer_size[cell->cur_batch]; + + padbytes = (alignment - (size % alignment)) % alignment; + + if (padbytes + bytes > cell_batch_free_space(cell)) { + cell_batch_flush(cell); + size = 0; + } + else { + size += padbytes; + } + + ASSERT(size % alignment == 0); + ASSERT(size + bytes <= CELL_BUFFER_SIZE); + + pos = (void *) (cell->buffer[cell->cur_batch] + size); + + cell->buffer_size[cell->cur_batch] = size + bytes; + + return pos; +} + + +/** + * One-time init of batch buffers. + */ +void +cell_init_batch_buffers(struct cell_context *cell) +{ + uint spu, buf; + + /* init command, vertex/index buffer info */ + for (buf = 0; buf < CELL_NUM_BUFFERS; buf++) { + cell->buffer_size[buf] = 0; + + /* init batch buffer status values, + * mark 0th buffer as used, rest as free. + */ + for (spu = 0; spu < cell->num_spus; spu++) { + if (buf == 0) + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; + else + cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_FREE; + } + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h new file mode 100644 index 0000000000..f74dd60079 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_batch.h @@ -0,0 +1,61 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_BATCH_H +#define CELL_BATCH_H + +#include "pipe/p_compiler.h" + + +struct cell_context; + + +extern uint +cell_get_empty_buffer(struct cell_context *cell); + +extern void +cell_batch_flush(struct cell_context *cell); + +extern uint +cell_batch_free_space(const struct cell_context *cell); + +extern void +cell_batch_append(struct cell_context *cell, const void *data, uint bytes); + +extern void * +cell_batch_alloc(struct cell_context *cell, uint bytes); + +extern void * +cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, + uint alignment); + +extern void +cell_init_batch_buffers(struct cell_context *cell); + + +#endif /* CELL_BATCH_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c new file mode 100644 index 0000000000..c9c0c721bb --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -0,0 +1,109 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Authors + * Brian Paul + */ + +#include <stdio.h> +#include <assert.h> +#include <stdint.h> +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" +#include "cell/common.h" +#include "cell_clear.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_state.h" + + +/** + * Convert packed pixel from one format to another. + */ +static unsigned +convert_color(enum pipe_format srcFormat, unsigned srcColor, + enum pipe_format dstFormat) +{ + ubyte r, g, b, a; + unsigned dstColor; + + util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a); + util_pack_color_ub(r, g, b, a, dstFormat, &dstColor); + + return dstColor; +} + + + +/** + * Called via pipe->clear() + */ +void +cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + struct pipe_screen *screen = pipe->screen; + struct cell_context *cell = cell_context(pipe); + uint surfIndex; + + if (cell->dirty) + cell_update_derived(cell); + + + if (!cell->cbuf_map[0]) + cell->cbuf_map[0] = screen->surface_map(screen, ps, + PIPE_BUFFER_USAGE_GPU_WRITE); + + if (ps == cell->framebuffer.zsbuf) { + /* clear z/stencil buffer */ + surfIndex = 1; + } + else { + /* clear color buffer */ + surfIndex = 0; + + if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) { + clearValue = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue, + ps->format); + } + } + + + /* Build a CLEAR command and place it in the current batch buffer */ + { + struct cell_command_clear_surface *clr + = (struct cell_command_clear_surface *) + cell_batch_alloc(cell, sizeof(*clr)); + clr->opcode = CELL_CMD_CLEAR_SURFACE; + clr->surface = surfIndex; + clr->value = clearValue; + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_clear.h b/src/gallium/drivers/cell/ppu/cell_clear.h new file mode 100644 index 0000000000..ff47d43f4c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_clear.h @@ -0,0 +1,43 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_CLEAR_H +#define CELL_CLEAR_H + + +struct pipe_context; +struct pipe_surface; + + +extern void +cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + + +#endif /* CELL_CLEAR_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c new file mode 100644 index 0000000000..22d552d8e3 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -0,0 +1,183 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Authors + * Brian Paul + */ + + +#include <stdio.h> + +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "util/u_memory.h" +#include "pipe/p_winsys.h" +#include "pipe/p_screen.h" + +#include "draw/draw_context.h" +#include "draw/draw_private.h" + +#include "cell/common.h" +#include "cell_batch.h" +#include "cell_clear.h" +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_fence.h" +#include "cell_flush.h" +#include "cell_state.h" +#include "cell_surface.h" +#include "cell_spu.h" +#include "cell_pipe_state.h" +#include "cell_texture.h" +#include "cell_vbuf.h" + + + +static void +cell_destroy_context( struct pipe_context *pipe ) +{ + struct cell_context *cell = cell_context(pipe); + + util_delete_keymap(cell->fragment_ops_cache, NULL); + + cell_spu_exit(cell); + + align_free(cell); +} + + +static struct draw_context * +cell_draw_create(struct cell_context *cell) +{ + struct draw_context *draw = draw_create(); + +#if 0 /* broken */ + if (getenv("GALLIUM_CELL_VS")) { + /* plug in SPU-based vertex transformation code */ + draw->shader_queue_flush = cell_vertex_shader_queue_flush; + draw->driver_private = cell; + } +#endif + + return draw; +} + + +static const struct debug_named_value cell_debug_flags[] = { + {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */ + {"asm", CELL_DEBUG_ASM}, /**< dump SPU asm code */ + {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */ + {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/ + {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/ + {"cmd", CELL_DEBUG_CMD}, /**< SPUs dump command buffer info */ + {"cache", CELL_DEBUG_CACHE}, /**< report texture cache stats on exit */ + {NULL, 0} +}; + + +struct pipe_context * +cell_create_context(struct pipe_screen *screen, + struct cell_winsys *cws) +{ + struct cell_context *cell; + uint i; + + /* some fields need to be 16-byte aligned, so align the whole object */ + cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); + if (!cell) + return NULL; + + memset(cell, 0, sizeof(*cell)); + + cell->winsys = cws; + cell->pipe.winsys = screen->winsys; + cell->pipe.screen = screen; + cell->pipe.destroy = cell_destroy_context; + + cell->pipe.clear = cell_clear_surface; + cell->pipe.flush = cell_flush; + +#if 0 + cell->pipe.begin_query = cell_begin_query; + cell->pipe.end_query = cell_end_query; + cell->pipe.wait_query = cell_wait_query; +#endif + + cell_init_draw_functions(cell); + cell_init_state_functions(cell); + cell_init_shader_functions(cell); + cell_init_surface_functions(cell); + cell_init_vertex_functions(cell); + + cell->draw = cell_draw_create(cell); + + /* Create cache of fragment ops generated code */ + cell->fragment_ops_cache = + util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL); + + cell_init_vbuf(cell); + + draw_set_rasterize_stage(cell->draw, cell->vbuf); + + /* convert all points/lines to tris for the time being */ + draw_wide_point_threshold(cell->draw, 0.0); + draw_wide_line_threshold(cell->draw, 0.0); + + /* get env vars or read config file to get debug flags */ + cell->debug_flags = debug_get_flags_option("CELL_DEBUG", + cell_debug_flags, + 0 ); + + for (i = 0; i < CELL_NUM_BUFFERS; i++) + cell_fence_init(&cell->fenced_buffers[i].fence); + + + /* + * SPU stuff + */ + /* This call only works with SDK 3.0. Anyone still using 2.1??? */ + cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1); + cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, 0); + if (cell->debug_flags) { + printf("Cell: found %d Cell(s) with %u SPUs\n", + cell->num_cells, cell->num_spus); + } + if (getenv("CELL_NUM_SPUS")) { + cell->num_spus = atoi(getenv("CELL_NUM_SPUS")); + assert(cell->num_spus > 0); + } + + cell_start_spus(cell); + + cell_init_batch_buffers(cell); + + /* make sure SPU initializations are done before proceeding */ + cell_flush_int(cell, CELL_FLUSH_WAIT); + + return &cell->pipe; +} diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h new file mode 100644 index 0000000000..4491ae8cdf --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -0,0 +1,205 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_CONTEXT_H +#define CELL_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "draw/draw_vertex.h" +#include "draw/draw_vbuf.h" +#include "cell_winsys.h" +#include "cell/common.h" +#include "rtasm/rtasm_ppc_spe.h" +#include "tgsi/tgsi_scan.h" +#include "util/u_keymap.h" + + +struct cell_vbuf_render; + + +/** + * Cell vertex shader state, subclass of pipe_shader_state. + */ +struct cell_vertex_shader_state +{ + struct pipe_shader_state shader; + struct tgsi_shader_info info; + void *draw_data; +}; + + +/** + * Cell fragment shader state, subclass of pipe_shader_state. + */ +struct cell_fragment_shader_state +{ + struct pipe_shader_state shader; + struct tgsi_shader_info info; + struct spe_function code; + void *data; +}; + + +/** + * Key for mapping per-fragment state to cached SPU machine code. + * keymap(cell_fragment_ops_key) => cell_command_fragment_ops + */ +struct cell_fragment_ops_key +{ + struct pipe_blend_state blend; + struct pipe_blend_color blend_color; + struct pipe_depth_stencil_alpha_state dsa; + enum pipe_format color_format; + enum pipe_format zs_format; +}; + + +struct cell_buffer_node; + +/** + * Fenced buffer list. List of buffers which can be unreferenced after + * the fence has been executed/signalled. + */ +struct cell_buffer_list +{ + struct cell_fence fence; + struct cell_buffer_node *head; +}; + + +/** + * Per-context state, subclass of pipe_context. + */ +struct cell_context +{ + struct pipe_context pipe; + + struct cell_winsys *winsys; + + const struct pipe_blend_state *blend; + const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + uint num_samplers; + const struct pipe_depth_stencil_alpha_state *depth_stencil; + const struct pipe_rasterizer_state *rasterizer; + const struct cell_vertex_shader_state *vs; + const struct cell_fragment_shader_state *fs; + + struct spe_function logic_op; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[2]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct cell_texture *texture[PIPE_MAX_SAMPLERS]; + uint num_textures; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + uint num_vertex_buffers; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + uint num_vertex_elements; + + ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; + ubyte *zsbuf_map; + + struct pipe_surface *tex_surf; + uint *tex_map; + + uint dirty; + uint dirty_textures; /* bitmask of texture units */ + uint dirty_samplers; /* bitmask of sampler units */ + + /** Cache of code generated for per-fragment ops */ + struct keymap *fragment_ops_cache; + + /** The primitive drawing context */ + struct draw_context *draw; + struct draw_stage *render_stage; + + /** For post-transformed vertex buffering: */ + struct cell_vbuf_render *vbuf_render; + struct draw_stage *vbuf; + + struct vertex_info vertex_info; + + /** Mapped constant buffers */ + void *mapped_constants[PIPE_SHADER_TYPES]; + + struct cell_spu_function_info spu_functions ALIGN16_ATTRIB; + + uint num_cells, num_spus; + + /** Buffers for command batches, vertex/index data */ + uint buffer_size[CELL_NUM_BUFFERS]; + ubyte buffer[CELL_NUM_BUFFERS][CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + + int cur_batch; /**< which buffer is being filled w/ commands */ + + /** [4] to ensure 16-byte alignment for each status word */ + uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + + + /** Associated with each command/batch buffer is a list of pipe_buffers + * that are fenced. When the last command in a buffer is executed, the + * fence will be signalled, indicating that any pipe_buffers preceeding + * that fence can be unreferenced (and probably freed). + */ + struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS]; + + + struct spe_function attrib_fetch; + unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; + + unsigned debug_flags; +}; + + + + +static INLINE struct cell_context * +cell_context(struct pipe_context *pipe) +{ + return (struct cell_context *) pipe; +} + + +extern struct pipe_context * +cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws); + +extern void +cell_vertex_shader_queue_flush(struct draw_context *draw); + + +/* XXX find a better home for this */ +extern void cell_update_vertex_fetch(struct draw_context *draw); + + +#endif /* CELL_CONTEXT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c new file mode 100644 index 0000000000..880d535320 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -0,0 +1,191 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + * Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" + +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_state.h" +#include "cell_flush.h" + +#include "draw/draw_context.h" + + + +static void +cell_map_constant_buffers(struct cell_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) { + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + cell_flush_buffer_range(sp, sp->mapped_constants[i], + sp->constants[i].buffer->size); + } + } + + draw_set_mapped_constant_buffer(sp->draw, + sp->mapped_constants[PIPE_SHADER_VERTEX], + sp->constants[PIPE_SHADER_VERTEX].size); +} + +static void +cell_unmap_constant_buffers(struct cell_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) + ws->buffer_unmap(ws, sp->constants[i].buffer); + sp->mapped_constants[i] = NULL; + } +} + + + +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + * + * XXX should the element buffer be specified/bound with a separate function? + */ +static boolean +cell_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) +{ + struct cell_context *sp = cell_context(pipe); + struct draw_context *draw = sp->draw; + unsigned i; + + if (sp->dirty) + cell_update_derived( sp ); + +#if 0 + cell_map_surfaces(sp); +#endif + cell_map_constant_buffers(sp); + + /* + * Map vertex buffers + */ + for (i = 0; i < sp->num_vertex_buffers; i++) { + void *buf = pipe_buffer_map(pipe->screen, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + cell_flush_buffer_range(sp, buf, sp->vertex_buffer[i].buffer->size); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes = pipe_buffer_map(pipe->screen, + indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + + /* draw! */ + draw_arrays(draw, mode, start, count); + + /* + * unmap vertex/index buffers - will cause draw module to flush + */ + for (i = 0; i < sp->num_vertex_buffers; i++) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); + } + if (indexBuffer) { + draw_set_mapped_element_buffer(draw, 0, NULL); + pipe_buffer_unmap(pipe->screen, indexBuffer); + } + + /* Note: leave drawing surfaces mapped */ + cell_unmap_constant_buffers(sp); + + return TRUE; +} + + +static boolean +cell_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + return cell_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); +} + + +static boolean +cell_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return cell_draw_elements(pipe, NULL, 0, mode, start, count); +} + + +static void +cell_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) +{ + struct cell_context *cell = cell_context(pipe); + draw_set_edgeflags(cell->draw, edgeflags); +} + + + +void +cell_init_draw_functions(struct cell_context *cell) +{ + cell->pipe.draw_arrays = cell_draw_arrays; + cell->pipe.draw_elements = cell_draw_elements; + cell->pipe.draw_range_elements = cell_draw_range_elements; + cell->pipe.set_edgeflags = cell_set_edgeflags; +} + diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.h b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h new file mode 100644 index 0000000000..148873aa67 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.h @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_DRAW_ARRAYS_H +#define CELL_DRAW_ARRAYS_H + + +extern void +cell_init_draw_functions(struct cell_context *cell); + + +#endif /* CELL_DRAW_ARRAYS_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c new file mode 100644 index 0000000000..ffb3bea12b --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_fence.c @@ -0,0 +1,158 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <unistd.h> +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_fence.h" +#include "cell_texture.h" + + +void +cell_fence_init(struct cell_fence *fence) +{ + uint i; + for (i = 0; i < CELL_MAX_SPUS; i++) { + fence->status[i][0] = CELL_FENCE_IDLE; + } +} + + +boolean +cell_fence_signalled(const struct cell_context *cell, + const struct cell_fence *fence) +{ + uint i; + for (i = 0; i < cell->num_spus; i++) { + //ASSERT(fence->status[i][0] != CELL_FENCE_IDLE); + if (fence->status[i][0] == CELL_FENCE_EMITTED) + return FALSE; + } + return TRUE; +} + + +void +cell_fence_finish(const struct cell_context *cell, + const struct cell_fence *fence) +{ + while (!cell_fence_signalled(cell, fence)) { + usleep(10); + } +} + + + + +struct cell_buffer_node +{ + struct pipe_buffer *buffer; + struct cell_buffer_node *next; +}; + + +static void +cell_add_buffer_to_list(struct cell_context *cell, + struct cell_buffer_list *list, + struct pipe_buffer *buffer) +{ + struct pipe_screen *ps = cell->pipe.screen; + struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node); + /* create new list node which references the buffer, insert at head */ + if (node) { + pipe_buffer_reference(ps, &node->buffer, buffer); + node->next = list->head; + list->head = node; + } +} + + +/** + * Wait for completion of the given fence, then unreference any buffers + * on the list. + * This typically unrefs/frees texture buffers after any rendering which uses + * them has completed. + */ +void +cell_free_fenced_buffers(struct cell_context *cell, + struct cell_buffer_list *list) +{ + if (list->head) { + struct pipe_screen *ps = cell->pipe.screen; + struct cell_buffer_node *node; + + cell_fence_finish(cell, &list->fence); + + /* traverse the list, unreferencing buffers, freeing nodes */ + node = list->head; + while (node) { + struct cell_buffer_node *next = node->next; + assert(node->buffer); + pipe_buffer_unmap(ps, node->buffer); +#if 0 + printf("Unref buffer %p\n", node->buffer); + if (node->buffer->refcount == 1) + printf(" Delete!\n"); +#endif + pipe_buffer_reference(ps, &node->buffer, NULL); + FREE(node); + node = next; + } + list->head = NULL; + } +} + + +/** + * This should be called for each render command. + * Any texture buffers that are current bound will be added to a fenced + * list to be freed later when the fence is executed/signalled. + */ +void +cell_add_fenced_textures(struct cell_context *cell) +{ + struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch]; + uint i; + + for (i = 0; i < cell->num_textures; i++) { + struct cell_texture *ct = cell->texture[i]; + if (ct) { + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + if (ct->tiled_buffer[level]) { +#if 0 + printf("Adding texture %p buffer %p to list\n", + ct, ct->tiled_buffer[level]); +#endif + cell_add_buffer_to_list(cell, list, ct->tiled_buffer[level]); + } + } + } + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h new file mode 100644 index 0000000000..536b4ba411 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_fence.h @@ -0,0 +1,57 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_FENCE_H +#define CELL_FENCE_H + + +extern void +cell_fence_init(struct cell_fence *fence); + + +extern boolean +cell_fence_signalled(const struct cell_context *cell, + const struct cell_fence *fence); + + +extern void +cell_fence_finish(const struct cell_context *cell, + const struct cell_fence *fence); + + + +extern void +cell_free_fenced_buffers(struct cell_context *cell, + struct cell_buffer_list *list); + + +extern void +cell_add_fenced_textures(struct cell_context *cell); + + +#endif /* CELL_FENCE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c new file mode 100644 index 0000000000..6596b72010 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -0,0 +1,111 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_render.h" +#include "draw/draw_context.h" + + +/** + * Called via pipe->flush() + */ +void +cell_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct cell_context *cell = cell_context(pipe); + + if (fence) { + *fence = NULL; + /* XXX: Implement real fencing */ + flags |= CELL_FLUSH_WAIT; + } + + if (flags & PIPE_FLUSH_SWAPBUFFERS) + flags |= CELL_FLUSH_WAIT; + + draw_flush( cell->draw ); + cell_flush_int(cell, flags); +} + + +/** + * Cell internal flush function. Send the current batch buffer to all SPUs. + * If flags & CELL_FLUSH_WAIT, do not return until the SPUs are idle. + * \param flags bitmask of flags CELL_FLUSH_WAIT, or zero + */ +void +cell_flush_int(struct cell_context *cell, unsigned flags) +{ + static boolean flushing = FALSE; /* recursion catcher */ + uint i; + + ASSERT(!flushing); + flushing = TRUE; + + if (flags & CELL_FLUSH_WAIT) { + uint64_t *cmd = (uint64_t *) cell_batch_alloc(cell, sizeof(uint64_t)); + *cmd = CELL_CMD_FINISH; + } + + cell_batch_flush(cell); + +#if 0 + /* Send CMD_FINISH to all SPUs */ + for (i = 0; i < cell->num_spus; i++) { + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_FINISH); + } +#endif + + if (flags & CELL_FLUSH_WAIT) { + /* Wait for ack */ + for (i = 0; i < cell->num_spus; i++) { + uint k = wait_mbox_message(cell_global.spe_contexts[i]); + assert(k == CELL_CMD_FINISH); + } + } + + flushing = FALSE; +} + + +void +cell_flush_buffer_range(struct cell_context *cell, void *ptr, + unsigned size) +{ + uint64_t batch[1 + (ROUNDUP8(sizeof(struct cell_buffer_range)) / 8)]; + struct cell_buffer_range *br = (struct cell_buffer_range *) & batch[1]; + + batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE; + br->base = (uintptr_t) ptr; + br->size = size; + cell_batch_append(cell, batch, sizeof(batch)); +} diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h new file mode 100644 index 0000000000..509ae6239a --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_flush.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_FLUSH +#define CELL_FLUSH + +#define CELL_FLUSH_WAIT 0x80000000 + +extern void +cell_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence); + +extern void +cell_flush_int(struct cell_context *cell, unsigned flags); + +extern void +cell_flush_buffer_range(struct cell_context *cell, void *ptr, + unsigned size); + +#endif diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c new file mode 100644 index 0000000000..d4d644d6e8 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -0,0 +1,1986 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +/** + * Generate SPU fragment program/shader code. + * + * Note that we generate SOA-style code here. So each TGSI instruction + * operates on four pixels (and is translated into four SPU instructions, + * generally speaking). + * + * \author Brian Paul + */ + +#include <math.h> +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" +#include "rtasm/rtasm_ppc_spe.h" +#include "util/u_memory.h" +#include "cell_context.h" +#include "cell_gen_fp.h" + + +#define MAX_TEMPS 16 +#define MAX_IMMED 8 + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + +/** + * Context needed during code generation. + */ +struct codegen +{ + struct cell_context *cell; + int inputs_reg; /**< 1st function parameter */ + int outputs_reg; /**< 2nd function parameter */ + int constants_reg; /**< 3rd function parameter */ + int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */ + int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */ + + int num_imm; /**< number of immediates */ + + int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */ + + /** Per-instruction temps / intermediate temps */ + int num_itemps; + int itemps[12]; + + /** Current IF/ELSE/ENDIF nesting level */ + int if_nesting; + /** Index of execution mask register */ + int exec_mask_reg; + + /** KIL mask: indicates which fragments have been killed */ + int kill_mask_reg; + + int frame_size; /**< Stack frame size, in words */ + + struct spe_function *f; + boolean error; +}; + + +/** + * Allocate an intermediate temporary register. + */ +static int +get_itemp(struct codegen *gen) +{ + int t = spe_allocate_available_register(gen->f); + assert(gen->num_itemps < Elements(gen->itemps)); + gen->itemps[gen->num_itemps++] = t; + return t; +} + +/** + * Free all intermediate temporary registers. To be called after each + * instruction has been emitted. + */ +static void +free_itemps(struct codegen *gen) +{ + int i; + for (i = 0; i < gen->num_itemps; i++) { + spe_release_register(gen->f, gen->itemps[i]); + } + gen->num_itemps = 0; +} + + +/** + * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}. + * The register is allocated and initialized upon the first call. + */ +static int +get_const_one_reg(struct codegen *gen) +{ + if (gen->one_reg <= 0) { + gen->one_reg = spe_allocate_available_register(gen->f); + + spe_indent(gen->f, 4); + spe_comment(gen->f, -4, "INIT CONSTANT 1.0:"); + + /* one = {1.0, 1.0, 1.0, 1.0} */ + spe_load_float(gen->f, gen->one_reg, 1.0f); + + spe_indent(gen->f, -4); + } + + return gen->one_reg; +} + + +/** + * Return index of the pixel execution mask. + * The register is allocated an initialized upon the first call. + * + * The pixel execution mask controls which pixels in a quad are + * modified, according to surrounding conditionals, loops, etc. + */ +static int +get_exec_mask_reg(struct codegen *gen) +{ + if (gen->exec_mask_reg <= 0) { + gen->exec_mask_reg = spe_allocate_available_register(gen->f); + + spe_indent(gen->f, 4); + spe_comment(gen->f, -4, "INIT EXEC MASK = ~0:"); + + /* exec_mask = {~0, ~0, ~0, ~0} */ + spe_load_int(gen->f, gen->exec_mask_reg, ~0); + + spe_indent(gen->f, -4); + } + + return gen->exec_mask_reg; +} + + +static boolean +is_register_src(struct codegen *gen, int channel, + const struct tgsi_full_src_register *src) +{ + int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); + int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); + + if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) { + return FALSE; + } + if (src->SrcRegister.File == TGSI_FILE_TEMPORARY || + src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + return TRUE; + } + return FALSE; +} + + +static boolean +is_memory_dst(struct codegen *gen, int channel, + const struct tgsi_full_dst_register *dst) +{ + if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + return TRUE; + } + else { + return FALSE; + } +} + + +/** + * Return the index of the SPU temporary containing the named TGSI + * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we + * just return the corresponding SPE register. If the TGIS register + * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register + * and emit an SPE load instruction. + */ +static int +get_src_reg(struct codegen *gen, + int channel, + const struct tgsi_full_src_register *src) +{ + int reg = -1; + int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); + boolean reg_is_itemp = FALSE; + uint sign_op; + + assert(swizzle >= TGSI_SWIZZLE_X); + assert(swizzle <= TGSI_EXTSWIZZLE_ONE); + + if (swizzle == TGSI_EXTSWIZZLE_ONE) { + /* Load const one float and early out */ + reg = get_const_one_reg(gen); + } + else if (swizzle == TGSI_EXTSWIZZLE_ZERO) { + /* Load const zero float and early out */ + reg = get_itemp(gen); + spe_xor(gen->f, reg, reg, reg); + } + else { + assert(swizzle < 4); + + switch (src->SrcRegister.File) { + case TGSI_FILE_TEMPORARY: + reg = gen->temp_regs[src->SrcRegister.Index][swizzle]; + break; + case TGSI_FILE_INPUT: + { + /* offset is measured in quadwords, not bytes */ + int offset = src->SrcRegister.Index * 4 + swizzle; + reg = get_itemp(gen); + reg_is_itemp = TRUE; + /* Load: reg = memory[(machine_reg) + offset] */ + spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16); + } + break; + case TGSI_FILE_IMMEDIATE: + reg = gen->imm_regs[src->SrcRegister.Index][swizzle]; + break; + case TGSI_FILE_CONSTANT: + { + /* offset is measured in quadwords, not bytes */ + int offset = src->SrcRegister.Index * 4 + swizzle; + reg = get_itemp(gen); + reg_is_itemp = TRUE; + /* Load: reg = memory[(machine_reg) + offset] */ + spe_lqd(gen->f, reg, gen->constants_reg, offset * 16); + } + break; + default: + assert(0); + } + } + + /* + * Handle absolute value, negate or set-negative of src register. + */ + sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); + if (sign_op != TGSI_UTIL_SIGN_KEEP) { + /* + * All sign ops are done by manipulating bit 31, the IEEE float sign bit. + */ + const int bit31mask_reg = get_itemp(gen); + int result_reg; + + if (reg_is_itemp) { + /* re-use 'reg' for the result */ + result_reg = reg; + } + else { + /* alloc a new reg for the result */ + result_reg = get_itemp(gen); + } + + /* mask with bit 31 set, the rest cleared */ + spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); + + if (sign_op == TGSI_UTIL_SIGN_CLEAR) { + spe_andc(gen->f, result_reg, reg, bit31mask_reg); + } + else if (sign_op == TGSI_UTIL_SIGN_SET) { + spe_and(gen->f, result_reg, reg, bit31mask_reg); + } + else { + assert(sign_op == TGSI_UTIL_SIGN_TOGGLE); + spe_xor(gen->f, result_reg, reg, bit31mask_reg); + } + + reg = result_reg; + } + + return reg; +} + + +/** + * Return the index of an SPE register to use for the given TGSI register. + * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the + * corresponding SPE register is returned. If the TGSI register is + * TGSI_FILE_OUTPUT we allocate an intermediate temporary register. + * See store_dest_reg() below... + */ +static int +get_dst_reg(struct codegen *gen, + int channel, + const struct tgsi_full_dst_register *dest) +{ + int reg = -1; + + switch (dest->DstRegister.File) { + case TGSI_FILE_TEMPORARY: + if (gen->if_nesting > 0) + reg = get_itemp(gen); + else + reg = gen->temp_regs[dest->DstRegister.Index][channel]; + break; + case TGSI_FILE_OUTPUT: + reg = get_itemp(gen); + break; + default: + assert(0); + } + + return reg; +} + + +/** + * When a TGSI instruction is writing to an output register, this + * function emits the SPE store instruction to store the value_reg. + * \param value_reg the SPE register containing the value to store. + * This would have been returned by get_dst_reg(). + */ +static void +store_dest_reg(struct codegen *gen, + int value_reg, int channel, + const struct tgsi_full_dst_register *dest) +{ + /* + * XXX need to implement dst reg clamping/saturation + */ +#if 0 + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + break; + case TGSI_SAT_MINUS_PLUS_ONE: + break; + default: + assert( 0 ); + } +#endif + + switch (dest->DstRegister.File) { + case TGSI_FILE_TEMPORARY: + if (gen->if_nesting > 0) { + int d_reg = gen->temp_regs[dest->DstRegister.Index][channel]; + int exec_reg = get_exec_mask_reg(gen); + /* Mix d with new value according to exec mask: + * d[i] = mask_reg[i] ? value_reg : d_reg + */ + spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg); + } + else { + /* we're not inside a condition or loop: do nothing special */ + + } + break; + case TGSI_FILE_OUTPUT: + { + /* offset is measured in quadwords, not bytes */ + int offset = dest->DstRegister.Index * 4 + channel; + if (gen->if_nesting > 0) { + int exec_reg = get_exec_mask_reg(gen); + int curval_reg = get_itemp(gen); + /* First read the current value from memory: + * Load: curval = memory[(machine_reg) + offset] + */ + spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); + /* Mix curval with newvalue according to exec mask: + * d[i] = mask_reg[i] ? value_reg : d_reg + */ + spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg); + /* Store: memory[(machine_reg) + offset] = curval */ + spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); + } + else { + /* Store: memory[(machine_reg) + offset] = reg */ + spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16); + } + } + break; + default: + assert(0); + } +} + + + +static void +emit_prologue(struct codegen *gen) +{ + gen->frame_size = 1024; /* XXX temporary, should be dynamic */ + + spe_comment(gen->f, -4, "Function prologue:"); + + /* save $lr on stack # stqd $lr,16($sp) */ + spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + + if (gen->frame_size >= 512) { + /* offset is too large for ai instruction */ + int offset_reg = spe_allocate_available_register(gen->f); + int sp_reg = spe_allocate_available_register(gen->f); + /* offset = -framesize */ + spe_load_int(gen->f, offset_reg, -gen->frame_size); + /* sp = $sp */ + spe_move(gen->f, sp_reg, SPE_REG_SP); + /* $sp = $sp + offset_reg */ + spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); + /* save $sp in stack frame */ + spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0); + /* clean up */ + spe_release_register(gen->f, offset_reg); + spe_release_register(gen->f, sp_reg); + } + else { + /* save stack pointer # stqd $sp,-frameSize($sp) */ + spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); + + /* adjust stack pointer # ai $sp,$sp,-frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); + } +} + + +static void +emit_epilogue(struct codegen *gen) +{ + const int return_reg = 3; + + spe_comment(gen->f, -4, "Function epilogue:"); + + spe_comment(gen->f, 0, "return the killed mask"); + if (gen->kill_mask_reg > 0) { + /* shader called KIL, return the "alive" mask */ + spe_move(gen->f, return_reg, gen->kill_mask_reg); + } + else { + /* return {0,0,0,0} */ + spe_load_uint(gen->f, return_reg, 0); + } + + spe_comment(gen->f, 0, "restore stack and return"); + if (gen->frame_size >= 512) { + /* offset is too large for ai instruction */ + int offset_reg = spe_allocate_available_register(gen->f); + /* offset = framesize */ + spe_load_int(gen->f, offset_reg, gen->frame_size); + /* $sp = $sp + offset */ + spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); + /* clean up */ + spe_release_register(gen->f, offset_reg); + } + else { + /* restore stack pointer # ai $sp,$sp,frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size); + } + + /* restore $lr # lqd $lr,16($sp) */ + spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + + /* return from function call */ + spe_bi(gen->f, SPE_REG_RA, 0, 0); +} + + +static boolean +emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, src_reg[4], dst_reg[4]; + + spe_comment(gen->f, -4, "MOV:"); + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + } + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) && + is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) { + /* special-case: register to memory store */ + store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]); + } + else { + spe_move(gen->f, dst_reg[ch], src_reg[ch]); + store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]); + } + free_itemps(gen); + } + } + return true; +} + +/** + * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD + * becomes (up to) four SPU "fa" instructions because we're doing SOA + * processing. + */ +static boolean +emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], s2_reg[4], d_reg[4]; + + spe_comment(gen->f, -4, "ADD:"); + /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */ + for (ch = 0; ch < 4; ch++) { + /* If the dest R, G, B or A writemask is enabled... */ + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + } + /* Loop over Red/Green/Blue/Alpha channels, do the add, store results */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + /* Emit actual SPE instruction: d = s1 + s2 */ + spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + /* Free any intermediate temps we allocated */ + free_itemps(gen); + } + } + return true; +} + +/** + * Emit subtract. See emit_ADD for comments. + */ +static boolean +emit_SUB(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], s2_reg[4], d_reg[4]; + spe_comment(gen->f, -4, "SUB:"); + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + /* d = s1 - s2 */ + spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + +/** + * Emit multiply add. See emit_ADD for comments. + */ +static boolean +emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4]; + spe_comment(gen->f, -4, "MAD:"); + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + /* d = s1 * s2 + s3 */ + spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]); + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + + +/** + * Emit linear interpolate. See emit_ADD for comments. + */ +static boolean +emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4]; + spe_comment(gen->f, -4, "LERP:"); + /* setup/get src/dst/temp regs */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + } + + /* d = s3 + s1(s2 - s3) */ + /* do all subtracts, then all fma, then all stores to better pipeline */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + } + free_itemps(gen); + return true; +} + +/** + * Emit multiply. See emit_ADD for comments. + */ +static boolean +emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], s2_reg[4], d_reg[4]; + spe_comment(gen->f, -4, "MUL:"); + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + /* d = s1 * s2 */ + spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + +/** + * Emit reciprocal. See emit_ADD for comments. + */ +static boolean +emit_RCP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + spe_comment(gen->f, -4, "RCP:"); + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + /* d = 1/s1 */ + spe_frest(gen->f, d_reg, s1_reg); + spe_fi(gen->f, d_reg, s1_reg, d_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + +/** + * Emit reciprocal sqrt. See emit_ADD for comments. + */ +static boolean +emit_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + spe_comment(gen->f, -4, "RSQ:"); + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + /* d = 1/s1 */ + spe_frsqest(gen->f, d_reg, s1_reg); + spe_fi(gen->f, d_reg, s1_reg, d_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + +/** + * Emit absolute value. See emit_ADD for comments. + */ +static boolean +emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + spe_comment(gen->f, -4, "ABS:"); + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + const int bit31mask_reg = get_itemp(gen); + + /* mask with bit 31 set, the rest cleared */ + spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); + + /* d = sign bit cleared in s1 */ + spe_andc(gen->f, d_reg, s1_reg, bit31mask_reg); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + return true; +} + +/** + * Emit 3 component dot product. See emit_ADD for comments. + */ +static boolean +emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + int s1x_reg, s1y_reg, s1z_reg; + int s2x_reg, s2y_reg, s2z_reg; + int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); + + spe_comment(gen->f, -4, "DP3:"); + + s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + + /* t0 = x0 * x1 */ + spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg); + + /* t1 = y0 * y1 */ + spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg); + + /* t0 = z0 * z1 + t0 */ + spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg); + + /* t0 = t0 + t1 */ + spe_fa(gen->f, t0_reg, t0_reg, t1_reg); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, t0_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + } + } + + free_itemps(gen); + return true; +} + +/** + * Emit 4 component dot product. See emit_ADD for comments. + */ +static boolean +emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + int s0x_reg, s0y_reg, s0z_reg, s0w_reg; + int s1x_reg, s1y_reg, s1z_reg, s1w_reg; + int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); + + spe_comment(gen->f, -4, "DP4:"); + + s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]); + s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + + /* t0 = x0 * x1 */ + spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg); + + /* t1 = y0 * y1 */ + spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg); + + /* t0 = z0 * z1 + t0 */ + spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg); + + /* t1 = w0 * w1 + t1 */ + spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg); + + /* t0 = t0 + t1 */ + spe_fa(gen->f, t0_reg, t0_reg, t1_reg); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, t0_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + } + } + + free_itemps(gen); + return true; +} + +/** + * Emit homogeneous dot product. See emit_ADD for comments. + */ +static boolean +emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + /* XXX rewrite this function to look more like DP3/DP4 */ + int ch; + spe_comment(gen->f, -4, "DPH:"); + + int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + int tmp_reg = get_itemp(gen); + + /* t = x0 * x1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + /* t = y0 * y1 + t */ + spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + /* t = z0 * z1 + t */ + spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + /* t = w1 + t */ + spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, tmp_reg); + store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); + } + } + + free_itemps(gen); + return true; +} + +/** + * Emit cross product. See emit_ADD for comments. + */ +static boolean +emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + spe_comment(gen->f, -4, "XPD:"); + + int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + int tmp_reg = get_itemp(gen); + + /* t = z0 * y1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + /* t = y0 * z1 - t */ + spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) { + store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]); + } + + s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + /* t = x0 * z1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + /* t = z0 * x1 - t */ + spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) { + store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]); + } + + s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + /* t = y0 * x1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + /* t = x0 * y1 - t */ + spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) { + store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return true; +} + +/** + * Emit set-if-greater-than. + * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as + * the result but OpenGL/TGSI needs 0.0 and 1.0 results. + * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND. + */ +static boolean +emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "SGT:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 > s2) */ + spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + /* d = d & one_reg */ + spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + +/** + * Emit set-if_less-then. See emit_SGT for comments. + */ +static boolean +emit_SLT(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "SLT:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 < s2) */ + spe_fcgt(gen->f, d_reg, s2_reg, s1_reg); + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + /* d = d & one_reg */ + spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + +/** + * Emit set-if_greater-then-or-equal. See emit_SGT for comments. + */ +static boolean +emit_SGE(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "SGE:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 >= s2) */ + spe_fcgt(gen->f, d_reg, s2_reg, s1_reg); + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + /* d = ~d & one_reg */ + spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + +/** + * Emit set-if_less-then-or-equal. See emit_SGT for comments. + */ +static boolean +emit_SLE(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "SLE:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 <= s2) */ + spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + /* d = ~d & one_reg */ + spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + +/** + * Emit set-if_equal. See emit_SGT for comments. + */ +static boolean +emit_SEQ(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "SEQ:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 == s2) */ + spe_fceq(gen->f, d_reg, s1_reg, s2_reg); + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + /* d = d & one_reg */ + spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + +/** + * Emit set-if_not_equal. See emit_SGT for comments. + */ +static boolean +emit_SNE(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "SNE:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* d = (s1 != s2) */ + spe_fceq(gen->f, d_reg, s1_reg, s2_reg); + spe_nor(gen->f, d_reg, d_reg, d_reg); + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + /* d = d & one_reg */ + spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + +/** + * Emit compare. See emit_SGT for comments. + */ +static boolean +emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "CMP:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int zero_reg = get_itemp(gen); + + spe_xor(gen->f, zero_reg, zero_reg, zero_reg); + + /* d = (s1 < 0) ? s2 : s3 */ + spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); + spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + +/** + * Emit trunc. + * Convert float to signed int + * Convert signed int to float + */ +static boolean +emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "TRUNC:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + /* Convert float to int */ + spe_cflts(gen->f, d_reg, s1_reg, 0); + + /* Convert int to float */ + spe_csflt(gen->f, d_reg, d_reg, 0); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + +/** + * Emit floor. + * If negative int subtract one + * Convert float to signed int + * Convert signed int to float + */ +static boolean +emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "FLR:"); + + int zero_reg = get_itemp(gen); + spe_xor(gen->f, zero_reg, zero_reg, zero_reg); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int tmp_reg = get_itemp(gen); + + /* If negative, subtract 1.0 */ + spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg); + spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg); + spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg); + + /* Convert float to int */ + spe_cflts(gen->f, tmp_reg, tmp_reg, 0); + + /* Convert int to float */ + spe_csflt(gen->f, d_reg, tmp_reg, 0); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + +/** + * Compute frac = Input - FLR(Input) + */ +static boolean +emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + + spe_comment(gen->f, -4, "FRC:"); + + int zero_reg = get_itemp(gen); + spe_xor(gen->f, zero_reg, zero_reg, zero_reg); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int tmp_reg = get_itemp(gen); + + /* If negative, subtract 1.0 */ + spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg); + spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg); + spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg); + + /* Convert float to int */ + spe_cflts(gen->f, tmp_reg, tmp_reg, 0); + + /* Convert int to float */ + spe_csflt(gen->f, tmp_reg, tmp_reg, 0); + + /* d = s1 - FLR(s1) */ + spe_fs(gen->f, d_reg, s1_reg, tmp_reg); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + + +#if 0 +static void +print_functions(struct cell_context *cell) +{ + struct cell_spu_function_info *funcs = &cell->spu_functions; + uint i; + for (i = 0; i < funcs->num; i++) { + printf("SPU func %u: %s at %u\n", + i, funcs->names[i], funcs->addrs[i]); + } +} +#endif + + +static uint +lookup_function(struct cell_context *cell, const char *funcname) +{ + const struct cell_spu_function_info *funcs = &cell->spu_functions; + uint i, addr = 0; + for (i = 0; i < funcs->num; i++) { + if (strcmp(funcs->names[i], funcname) == 0) { + addr = funcs->addrs[i]; + } + } + assert(addr && "spu function not found"); + return addr / 4; /* discard 2 least significant bits */ +} + + +/** + * Emit code to call a SPU function. + * Used to implement instructions like SIN/COS/POW/TEX/etc. + */ +static boolean +emit_function_call(struct codegen *gen, + const struct tgsi_full_instruction *inst, + char *funcname, uint num_args) +{ + const uint addr = lookup_function(gen->cell, funcname); + char comment[100]; + int ch; + + assert(num_args <= 3); + + snprintf(comment, sizeof(comment), "CALL %s:", funcname); + spe_comment(gen->f, -4, comment); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s_regs[3], d_reg; + ubyte usedRegs[SPE_NUM_REGS]; + uint a, i, numUsed; + + for (a = 0; a < num_args; a++) { + s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]); + } + d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + numUsed = spe_get_registers_used(gen->f, usedRegs); + assert(numUsed < gen->frame_size / 16 - 2); + + /* save registers to stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + int offset = 2 + i; + spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + + /* setup function arguments */ + for (a = 0; a < num_args; a++) { + spe_move(gen->f, 3 + a, s_regs[a]); + } + + /* branch to function, save return addr */ + spe_brasl(gen->f, SPE_REG_RA, addr); + + /* save function's return value */ + spe_move(gen->f, d_reg, 3); + + /* restore registers from stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + if (reg != d_reg) { + int offset = 2 + i; + spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + } + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return true; +} + + +static boolean +emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const uint target = inst->InstructionExtTexture.Texture; + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint addr; + int ch; + int coord_regs[4], d_regs[4]; + + switch (target) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_2D: + addr = lookup_function(gen->cell, "spu_tex_2d"); + break; + case TGSI_TEXTURE_3D: + addr = lookup_function(gen->cell, "spu_tex_3d"); + break; + case TGSI_TEXTURE_CUBE: + addr = lookup_function(gen->cell, "spu_tex_cube"); + break; + default: + ASSERT(0 && "unsupported texture target"); + return FALSE; + } + + assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER); + + spe_comment(gen->f, -4, "CALL tex:"); + + /* get src/dst reg info */ + for (ch = 0; ch < 4; ch++) { + coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + { + ubyte usedRegs[SPE_NUM_REGS]; + uint i, numUsed; + + numUsed = spe_get_registers_used(gen->f, usedRegs); + assert(numUsed < gen->frame_size / 16 - 2); + + /* save registers to stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + int offset = 2 + i; + spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + + /* setup function arguments (XXX depends on target) */ + for (i = 0; i < 4; i++) { + spe_move(gen->f, 3 + i, coord_regs[i]); + } + spe_load_uint(gen->f, 7, unit); /* sampler unit */ + + /* branch to function, save return addr */ + spe_brasl(gen->f, SPE_REG_RA, addr); + + /* save function's return values (four pixel's colors) */ + for (i = 0; i < 4; i++) { + spe_move(gen->f, d_regs[i], 3 + i); + } + + /* restore registers from stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + if (reg != d_regs[0] && + reg != d_regs[1] && + reg != d_regs[2] && + reg != d_regs[3]) { + int offset = 2 + i; + spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + } + } + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + } + + return TRUE; +} + + +/** + * KILL if any of src reg values are less than zero. + */ +static boolean +emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + int s_regs[4], kil_reg = -1, cmp_reg, zero_reg; + + spe_comment(gen->f, -4, "CALL kil:"); + + /* zero = {0,0,0,0} */ + zero_reg = get_itemp(gen); + spe_load_uint(gen->f, zero_reg, 0); + + cmp_reg = get_itemp(gen); + + /* get src regs */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + } + } + + /* test if any src regs are < 0 */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + if (kil_reg >= 0) { + /* cmp = 0 > src ? : ~0 : 0 */ + spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]); + /* kil = kil | cmp */ + spe_or(gen->f, kil_reg, kil_reg, cmp_reg); + } + else { + kil_reg = get_itemp(gen); + /* kil = 0 > src ? : ~0 : 0 */ + spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]); + } + } + } + + if (gen->if_nesting) { + /* may have been a conditional kil */ + spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg); + } + + /* allocate the kill mask reg if needed */ + if (gen->kill_mask_reg <= 0) { + gen->kill_mask_reg = spe_allocate_available_register(gen->f); + spe_move(gen->f, gen->kill_mask_reg, kil_reg); + } + else { + spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg); + } + + free_itemps(gen); + + return TRUE; +} + + + +/** + * Emit max. See emit_SGT for comments. + */ +static boolean +emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; + + spe_comment(gen->f, -4, "MAX:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + } + + /* d = (s0 > s1) ? s0 : s1 */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]); + } + } + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + } + + free_itemps(gen); + return true; +} + +/** + * Emit max. See emit_SGT for comments. + */ +static boolean +emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; + + spe_comment(gen->f, -4, "MIN:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + } + + /* d = (s1 > s0) ? s0 : s1 */ + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]); + } + } + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]); + } + } + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + } + + free_itemps(gen); + return true; +} + +static boolean +emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int channel = 0; + const int exec_reg = get_exec_mask_reg(gen); + + spe_comment(gen->f, -4, "IF:"); + + /* update execution mask with the predicate register */ + int tmp_reg = get_itemp(gen); + int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]); + + /* tmp = (s1_reg == 0) */ + spe_ceqi(gen->f, tmp_reg, s1_reg, 0); + /* tmp = !tmp */ + spe_complement(gen->f, tmp_reg, tmp_reg); + /* exec_mask = exec_mask & tmp */ + spe_and(gen->f, exec_reg, exec_reg, tmp_reg); + + gen->if_nesting++; + + free_itemps(gen); + + return true; +} + + +static boolean +emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int exec_reg = get_exec_mask_reg(gen); + + spe_comment(gen->f, -4, "ELSE:"); + + /* exec_mask = !exec_mask */ + spe_complement(gen->f, exec_reg, exec_reg); + + return true; +} + + +static boolean +emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int exec_reg = get_exec_mask_reg(gen); + + spe_comment(gen->f, -4, "ENDIF:"); + + /* XXX todo: pop execution mask */ + + spe_load_int(gen->f, exec_reg, ~0x0); + + gen->if_nesting--; + return true; +} + + +static boolean +emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, + boolean ddx) +{ + int ch; + + spe_comment(gen->f, -4, ddx ? "DDX:" : "DDY:"); + + for (ch = 0; ch < 4; ch++) { + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + int t1_reg = get_itemp(gen); + int t2_reg = get_itemp(gen); + + spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */ + if (ddx) { + spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */ + } + else { + spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */ + } + spe_fs(gen->f, d_reg, t2_reg, t1_reg); + + free_itemps(gen); + } + } + + return true; +} + + + + +/** + * Emit END instruction. + * We just return from the shader function at this point. + * + * Note that there may be more code after this that would be + * called by TGSI_OPCODE_CALL. + */ +static boolean +emit_END(struct codegen *gen) +{ + spe_comment(gen->f, -4, "END:"); + emit_epilogue(gen); + return true; +} + + +/** + * Emit code for the given instruction. Just a big switch stmt. + */ +static boolean +emit_instruction(struct codegen *gen, + const struct tgsi_full_instruction *inst) +{ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + return emit_MOV(gen, inst); + case TGSI_OPCODE_MUL: + return emit_MUL(gen, inst); + case TGSI_OPCODE_ADD: + return emit_ADD(gen, inst); + case TGSI_OPCODE_SUB: + return emit_SUB(gen, inst); + case TGSI_OPCODE_MAD: + return emit_MAD(gen, inst); + case TGSI_OPCODE_LERP: + return emit_LERP(gen, inst); + case TGSI_OPCODE_DP3: + return emit_DP3(gen, inst); + case TGSI_OPCODE_DP4: + return emit_DP4(gen, inst); + case TGSI_OPCODE_DPH: + return emit_DPH(gen, inst); + case TGSI_OPCODE_XPD: + return emit_XPD(gen, inst); + case TGSI_OPCODE_RCP: + return emit_RCP(gen, inst); + case TGSI_OPCODE_RSQ: + return emit_RSQ(gen, inst); + case TGSI_OPCODE_ABS: + return emit_ABS(gen, inst); + case TGSI_OPCODE_SGT: + return emit_SGT(gen, inst); + case TGSI_OPCODE_SLT: + return emit_SLT(gen, inst); + case TGSI_OPCODE_SGE: + return emit_SGE(gen, inst); + case TGSI_OPCODE_SLE: + return emit_SLE(gen, inst); + case TGSI_OPCODE_SEQ: + return emit_SEQ(gen, inst); + case TGSI_OPCODE_SNE: + return emit_SNE(gen, inst); + case TGSI_OPCODE_CMP: + return emit_CMP(gen, inst); + case TGSI_OPCODE_MAX: + return emit_MAX(gen, inst); + case TGSI_OPCODE_MIN: + return emit_MIN(gen, inst); + case TGSI_OPCODE_TRUNC: + return emit_TRUNC(gen, inst); + case TGSI_OPCODE_FLR: + return emit_FLR(gen, inst); + case TGSI_OPCODE_FRC: + return emit_FRC(gen, inst); + case TGSI_OPCODE_END: + return emit_END(gen); + + case TGSI_OPCODE_COS: + return emit_function_call(gen, inst, "spu_cos", 1); + case TGSI_OPCODE_SIN: + return emit_function_call(gen, inst, "spu_sin", 1); + case TGSI_OPCODE_POW: + return emit_function_call(gen, inst, "spu_pow", 2); + case TGSI_OPCODE_EXPBASE2: + return emit_function_call(gen, inst, "spu_exp2", 1); + case TGSI_OPCODE_LOGBASE2: + return emit_function_call(gen, inst, "spu_log2", 1); + case TGSI_OPCODE_TEX: + /* fall-through for now */ + case TGSI_OPCODE_TXD: + /* fall-through for now */ + case TGSI_OPCODE_TXB: + /* fall-through for now */ + case TGSI_OPCODE_TXL: + /* fall-through for now */ + case TGSI_OPCODE_TXP: + return emit_TEX(gen, inst); + case TGSI_OPCODE_KIL: + return emit_KIL(gen, inst); + + case TGSI_OPCODE_IF: + return emit_IF(gen, inst); + case TGSI_OPCODE_ELSE: + return emit_ELSE(gen, inst); + case TGSI_OPCODE_ENDIF: + return emit_ENDIF(gen, inst); + + case TGSI_OPCODE_DDX: + return emit_DDX_DDY(gen, inst, true); + case TGSI_OPCODE_DDY: + return emit_DDX_DDY(gen, inst, false); + + /* XXX lots more cases to do... */ + + default: + fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n", + inst->Instruction.Opcode); + return false; + } + + return true; +} + + + +/** + * Emit code for a TGSI immediate value (vector of four floats). + * This involves register allocation and initialization. + * XXX the initialization should be done by a "prepare" stage, not + * per quad execution! + */ +static boolean +emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) +{ + int ch; + + assert(gen->num_imm < MAX_TEMPS); + + spe_comment(gen->f, -4, "IMMEDIATE:"); + + for (ch = 0; ch < 4; ch++) { + float val = immed->u.ImmediateFloat32[ch].Float; + + if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) { + /* re-use previous register */ + gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1]; + } + else { + int reg = spe_allocate_available_register(gen->f); + + if (reg < 0) + return false; + + /* update immediate map */ + gen->imm_regs[gen->num_imm][ch] = reg; + + /* emit initializer instruction */ + spe_load_float(gen->f, reg, val); + } + } + + gen->num_imm++; + + return true; +} + + + +/** + * Emit "code" for a TGSI declaration. + * We only care about TGSI TEMPORARY register declarations at this time. + * For each TGSI TEMPORARY we allocate four SPE registers. + */ +static boolean +emit_declaration(struct cell_context *cell, + struct codegen *gen, const struct tgsi_full_declaration *decl) +{ + int i, ch; + + switch (decl->Declaration.File) { + case TGSI_FILE_TEMPORARY: + for (i = decl->DeclarationRange.First; + i <= decl->DeclarationRange.Last; + i++) { + assert(i < MAX_TEMPS); + for (ch = 0; ch < 4; ch++) { + gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f); + if (gen->temp_regs[i][ch] < 0) + return false; /* out of regs */ + } + + /* XXX if we run out of SPE registers, we need to spill + * to SPU memory. someday... + */ + + { + char buf[100]; + sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i, + gen->temp_regs[i][0], gen->temp_regs[i][1], + gen->temp_regs[i][2], gen->temp_regs[i][3]); + spe_comment(gen->f, -4, buf); + } + } + break; + default: + ; /* ignore */ + } + + return true; +} + + + +/** + * Translate TGSI shader code to SPE instructions. This is done when + * the state tracker gives us a new shader (via pipe->create_fs_state()). + * + * \param cell the rendering context (in) + * \param tokens the TGSI shader (in) + * \param f the generated function (out) + */ +boolean +cell_gen_fragment_program(struct cell_context *cell, + const struct tgsi_token *tokens, + struct spe_function *f) +{ + struct tgsi_parse_context parse; + struct codegen gen; + + memset(&gen, 0, sizeof(gen)); + gen.cell = cell; + gen.f = f; + + /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ + gen.inputs_reg = 3; /* pointer to inputs array */ + gen.outputs_reg = 4; /* pointer to outputs array */ + gen.constants_reg = 5; /* pointer to constants array */ + + spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); + spe_allocate_register(f, gen.inputs_reg); + spe_allocate_register(f, gen.outputs_reg); + spe_allocate_register(f, gen.constants_reg); + + if (cell->debug_flags & CELL_DEBUG_ASM) { + spe_print_code(f, true); + spe_indent(f, 8); + printf("Begin %s\n", __FUNCTION__); + tgsi_dump(tokens, 0); + } + + tgsi_parse_init(&parse, tokens); + + emit_prologue(&gen); + + while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) + gen.error = true; + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration)) + gen.error = true; + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (!emit_instruction(&gen, &parse.FullToken.FullInstruction)) + gen.error = true; + break; + + default: + assert(0); + } + } + + if (gen.error) { + /* terminate the SPE code */ + return emit_END(&gen); + } + + if (cell->debug_flags & CELL_DEBUG_ASM) { + printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); + printf("End %s\n", __FUNCTION__); + } + + tgsi_parse_free( &parse ); + + return !gen.error; +} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.h b/src/gallium/drivers/cell/ppu/cell_gen_fp.h new file mode 100644 index 0000000000..99faea7046 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#ifndef CELL_GEN_FP_H +#define CELL_GEN_FP_H + + + +extern boolean +cell_gen_fragment_program(struct cell_context *cell, + const struct tgsi_token *tokens, + struct spe_function *f); + + +#endif /* CELL_GEN_FP_H */ + diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c new file mode 100644 index 0000000000..4e1e53ecdc --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -0,0 +1,2154 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +/** + * Generate SPU per-fragment code (actually per-quad code). + * \author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "rtasm/rtasm_ppc_spe.h" +#include "cell_context.h" +#include "cell_gen_fragment.h" + + + +/** Do extra optimizations? */ +#define OPTIMIZATIONS 1 + + +/** + * Generate SPE code to perform Z/depth testing. + * + * \param dsa Gallium depth/stencil/alpha state to gen code for + * \param f SPE function to append instruction onto. + * \param mask_reg register containing quad/pixel "alive" mask (in/out) + * \param ifragZ_reg register containing integer fragment Z values (in) + * \param ifbZ_reg register containing integer frame buffer Z values (in/out) + * \param zmask_reg register containing result of Z test/comparison (out) + * + * Returns true if the Z-buffer needs to be updated. + */ +static boolean +gen_depth_test(struct spe_function *f, + const struct pipe_depth_stencil_alpha_state *dsa, + int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) +{ + /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_ + * quantities. This only makes a difference for 32-bit Z values though. + */ + ASSERT(dsa->depth.enabled); + + switch (dsa->depth.func) { + case PIPE_FUNC_EQUAL: + /* zmask = (ifragZ == ref) */ + spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_NOTEQUAL: + /* zmask = (ifragZ == ref) */ + spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_GREATER: + /* zmask = (ifragZ > ref) */ + spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_LESS: + /* zmask = (ref > ifragZ) */ + spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); + /* mask = (mask & zmask) */ + spe_and(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_LEQUAL: + /* zmask = (ifragZ > ref) */ + spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_GEQUAL: + /* zmask = (ref > ifragZ) */ + spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); + /* mask = (mask & ~zmask) */ + spe_andc(f, mask_reg, mask_reg, zmask_reg); + break; + + case PIPE_FUNC_NEVER: + spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */ + spe_move(f, zmask_reg, mask_reg); /* zmask = mask */ + break; + + case PIPE_FUNC_ALWAYS: + /* mask unchanged */ + spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */ + break; + + default: + ASSERT(0); + break; + } + + if (dsa->depth.writemask) { + /* + * If (ztest passed) { + * framebufferZ = fragmentZ; + * } + * OR, + * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; + */ + spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); + return true; + } + + return false; +} + + +/** + * Generate SPE code to perform alpha testing. + * + * \param dsa Gallium depth/stencil/alpha state to gen code for + * \param f SPE function to append instruction onto. + * \param mask_reg register containing quad/pixel "alive" mask (in/out) + * \param fragA_reg register containing four fragment alpha values (in) + */ +static void +gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask_reg, int fragA_reg) +{ + int ref_reg = spe_allocate_available_register(f); + int amask_reg = spe_allocate_available_register(f); + + ASSERT(dsa->alpha.enabled); + + if ((dsa->alpha.func != PIPE_FUNC_NEVER) && + (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { + /* load/splat the alpha reference float value */ + spe_load_float(f, ref_reg, dsa->alpha.ref); + } + + /* emit code to do the alpha comparison, updating 'mask' */ + switch (dsa->alpha.func) { + case PIPE_FUNC_EQUAL: + /* amask = (fragA == ref) */ + spe_fceq(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_NOTEQUAL: + /* amask = (fragA == ref) */ + spe_fceq(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_GREATER: + /* amask = (fragA > ref) */ + spe_fcgt(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_LESS: + /* amask = (ref > fragA) */ + spe_fcgt(f, amask_reg, ref_reg, fragA_reg); + /* mask = (mask & amask) */ + spe_and(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_LEQUAL: + /* amask = (fragA > ref) */ + spe_fcgt(f, amask_reg, fragA_reg, ref_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_GEQUAL: + /* amask = (ref > fragA) */ + spe_fcgt(f, amask_reg, ref_reg, fragA_reg); + /* mask = (mask & ~amask) */ + spe_andc(f, mask_reg, mask_reg, amask_reg); + break; + + case PIPE_FUNC_NEVER: + spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */ + break; + + case PIPE_FUNC_ALWAYS: + /* no-op, mask unchanged */ + break; + + default: + ASSERT(0); + break; + } + +#if OPTIMIZATIONS + /* if mask == {0,0,0,0} we're all done, return */ + { + /* re-use amask reg here */ + int tmp_reg = amask_reg; + /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */ + spe_orx(f, tmp_reg, mask_reg); + /* if tmp[0] == 0 then return from function call */ + spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0); + } +#endif + + spe_release_register(f, ref_reg); + spe_release_register(f, amask_reg); +} + +/* This pair of functions is used inline to allocate and deallocate + * optional constant registers. Once a constant is discovered to be + * needed, we will likely need it again, so we don't want to deallocate + * it and have to allocate and load it again unnecessarily. + */ +static inline void +setup_optional_register(struct spe_function *f, boolean *is_already_set, unsigned int *r) +{ + if (*is_already_set) return; + *r = spe_allocate_available_register(f); + *is_already_set = true; +} + +static inline void +release_optional_register(struct spe_function *f, boolean *is_already_set, unsigned int r) +{ + if (!*is_already_set) return; + spe_release_register(f, r); + *is_already_set = false; +} + +static inline void +setup_const_register(struct spe_function *f, boolean *is_already_set, unsigned int *r, float value) +{ + if (*is_already_set) return; + setup_optional_register(f, is_already_set, r); + spe_load_float(f, *r, value); +} + +static inline void +release_const_register(struct spe_function *f, boolean *is_already_set, unsigned int r) +{ + release_optional_register(f, is_already_set, r); +} + +/** + * Generate SPE code to implement the given blend mode for a quad of pixels. + * \param f SPE function to append instruction onto. + * \param fragR_reg register with fragment red values (float) (in/out) + * \param fragG_reg register with fragment green values (float) (in/out) + * \param fragB_reg register with fragment blue values (float) (in/out) + * \param fragA_reg register with fragment alpha values (float) (in/out) + * \param fbRGBA_reg register with packed framebuffer colors (integer) (in) + */ +static void +gen_blend(const struct pipe_blend_state *blend, + const struct pipe_blend_color *blend_color, + struct spe_function *f, + enum pipe_format color_format, + int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, + int fbRGBA_reg) +{ + int term1R_reg = spe_allocate_available_register(f); + int term1G_reg = spe_allocate_available_register(f); + int term1B_reg = spe_allocate_available_register(f); + int term1A_reg = spe_allocate_available_register(f); + + int term2R_reg = spe_allocate_available_register(f); + int term2G_reg = spe_allocate_available_register(f); + int term2B_reg = spe_allocate_available_register(f); + int term2A_reg = spe_allocate_available_register(f); + + int fbR_reg = spe_allocate_available_register(f); + int fbG_reg = spe_allocate_available_register(f); + int fbB_reg = spe_allocate_available_register(f); + int fbA_reg = spe_allocate_available_register(f); + + int tmp_reg = spe_allocate_available_register(f); + + /* Optional constant registers we might or might not end up using; + * if we do use them, make sure we only allocate them once by + * keeping a flag on each one. + */ + boolean one_reg_set = false; + unsigned int one_reg; + boolean constR_reg_set = false, constG_reg_set = false, + constB_reg_set = false, constA_reg_set = false; + unsigned int constR_reg, constG_reg, constB_reg, constA_reg; + + ASSERT(blend->blend_enable); + + /* Unpack/convert framebuffer colors from four 32-bit packed colors + * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA). + * Each 8-bit color component is expanded into a float in [0.0, 1.0]. + */ + { + int mask_reg = spe_allocate_available_register(f); + + /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */ + spe_load_int(f, mask_reg, 0xff); + + /* XXX there may be more clever ways to implement the following code */ + switch (color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + /* fbB = fbB & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbG = fbRGBA & mask */ + spe_and(f, fbG_reg, fbRGBA_reg, mask_reg); + /* fbG = fbG >> 8 */ + spe_roti(f, fbG_reg, fbG_reg, -8); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbR = fbRGBA & mask */ + spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); + /* fbR = fbR >> 16 */ + spe_roti(f, fbR_reg, fbR_reg, -16); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbA = fbRGBA & mask */ + spe_and(f, fbA_reg, fbRGBA_reg, mask_reg); + /* fbA = fbA >> 24 */ + spe_roti(f, fbA_reg, fbA_reg, -24); + break; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + /* fbA = fbA & mask */ + spe_and(f, fbA_reg, fbRGBA_reg, mask_reg); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbR = fbRGBA & mask */ + spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); + /* fbR = fbR >> 8 */ + spe_roti(f, fbR_reg, fbR_reg, -8); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbG = fbRGBA & mask */ + spe_and(f, fbG_reg, fbRGBA_reg, mask_reg); + /* fbG = fbG >> 16 */ + spe_roti(f, fbG_reg, fbG_reg, -16); + /* mask = mask << 8 */ + spe_roti(f, mask_reg, mask_reg, 8); + + /* fbB = fbRGBA & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); + /* fbB = fbB >> 24 */ + spe_roti(f, fbB_reg, fbB_reg, -24); + break; + + default: + ASSERT(0); + } + + /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */ + spe_cuflt(f, fbR_reg, fbR_reg, 8); + spe_cuflt(f, fbG_reg, fbG_reg, 8); + spe_cuflt(f, fbB_reg, fbB_reg, 8); + spe_cuflt(f, fbA_reg, fbA_reg, 8); + + spe_release_register(f, mask_reg); + } + + /* + * Compute Src RGB terms. We're actually looking for the value + * of (the appropriate RGB factors) * (the incoming source RGB color), + * because in some cases (like PIPE_BLENDFACTOR_ONE and + * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math. + */ + switch (blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + /* factors = (1,1,1), so term = (R,G,B) */ + spe_move(f, term1R_reg, fragR_reg); + spe_move(f, term1G_reg, fragG_reg); + spe_move(f, term1B_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + /* factors = (0,0,0), so term = (0,0,0) */ + spe_load_float(f, term1R_reg, 0.0f); + spe_load_float(f, term1G_reg, 0.0f); + spe_load_float(f, term1B_reg, 0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* factors = (R,G,B), so term = (R*R, G*G, B*B) */ + spe_fm(f, term1R_reg, fragR_reg, fragR_reg); + spe_fm(f, term1G_reg, fragG_reg, fragG_reg); + spe_fm(f, term1B_reg, fragB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + /* factors = (A,A,A), so term = (R*A, G*A, B*A) */ + spe_fm(f, term1R_reg, fragR_reg, fragA_reg); + spe_fm(f, term1G_reg, fragG_reg, fragA_reg); + spe_fm(f, term1B_reg, fragB_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) + * or in other words term = (R-R*R, G-G*G, B-B*B) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */ + spe_fm(f, term1R_reg, fragR_reg, fbR_reg); + spe_fm(f, term1G_reg, fragG_reg, fbG_reg); + spe_fm(f, term1B_reg, fragB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) + * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) + * or term = (R-R*A,G-G*A,B-B*A) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */ + spe_fm(f, term1R_reg, fragR_reg, fbA_reg); + spe_fm(f, term1G_reg, fragG_reg, fbA_reg); + spe_fm(f, term1B_reg, fragB_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) + * or term = (R-R*Afb,G-G*Afb,b-B*Afb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */ + spe_fm(f, term1R_reg, fragR_reg, constR_reg); + spe_fm(f, term1G_reg, fragG_reg, constG_reg); + spe_fm(f, term1B_reg, fragB_reg, constB_reg); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + /* we'll need the optional constant alpha register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */ + spe_fm(f, term1R_reg, fragR_reg, constA_reg); + spe_fm(f, term1G_reg, fragG_reg, constA_reg); + spe_fm(f, term1B_reg, fragB_reg, constA_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) + * or term = (R-R*Rc, G-G*Gc, B-B*Bc) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) + * or term = (R-R*Ac,G-G*Ac,B-B*Ac) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + /* We'll need the optional {1,1,1,1} register */ + setup_const_register(f, &one_reg_set, &one_reg, 1.0f); + /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so + * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb)) + * We could expand the term (as a*min(b,c) == min(a*b,a*c) + * as long as a is positive), but then we'd have to do three + * spe_float_min() functions instead of one, so this is simpler. + */ + /* tmp = 1 - Afb */ + spe_fs(f, tmp_reg, one_reg, fbA_reg); + /* tmp = min(A,tmp) */ + spe_float_min(f, tmp_reg, fragA_reg, tmp_reg); + /* term = R*tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + + default: + ASSERT(0); + } + + /* + * Compute Src Alpha term. Like the above, we're looking for + * the full term A*factor, not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. + */ + switch (blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ZERO: + /* factor = 0, so term = 0 */ + spe_load_float(f, term1A_reg, 0.0f); + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */ + case PIPE_BLENDFACTOR_ONE: + /* factor = 1, so term = A */ + spe_move(f, term1A_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_SRC_COLOR: + /* factor = A, so term = A*A */ + spe_fm(f, term1A_reg, fragA_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + spe_fm(f, term1A_reg, fragA_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factor = 1-A, so term = A*(1-A) = A-A*A */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_DST_COLOR: + /* factor = Afb, so term = A*Afb */ + spe_fm(f, term1A_reg, fragA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = Ac, so term = A*Ac */ + spe_fm(f, term1A_reg, fragA_reg, constA_reg); + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: + ASSERT(0); + } + + /* + * Compute Dest RGB term. Like the above, we're looking for + * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. + */ + switch (blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */ + spe_move(f, term2R_reg, fbR_reg); + spe_move(f, term2G_reg, fbG_reg); + spe_move(f, term2B_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + /* factor s= (0,0,0), so term = (0,0,0) */ + spe_load_float(f, term2R_reg, 0.0f); + spe_load_float(f, term2G_reg, 0.0f); + spe_load_float(f, term2B_reg, 0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */ + spe_fm(f, term2R_reg, fbR_reg, fragR_reg); + spe_fm(f, term2G_reg, fbG_reg, fragG_reg); + spe_fm(f, term2B_reg, fbB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B)) + * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */ + spe_fm(f, term2R_reg, fbR_reg, fragA_reg); + spe_fm(f, term2G_reg, fbG_reg, fragA_reg); + spe_fm(f, term2B_reg, fbB_reg, fragA_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */ + spe_fm(f, term2R_reg, fbR_reg, fbR_reg); + spe_fm(f, term2G_reg, fbG_reg, fbG_reg); + spe_fm(f, term2B_reg, fbB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb)) + * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */ + spe_fm(f, term2R_reg, fbR_reg, fbA_reg); + spe_fm(f, term2G_reg, fbG_reg, fbA_reg); + spe_fm(f, term2B_reg, fbB_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb)) + * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */ + spe_fm(f, term2R_reg, fbR_reg, constR_reg); + spe_fm(f, term2G_reg, fbG_reg, constG_reg); + spe_fm(f, term2B_reg, fbB_reg, constB_reg); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + /* we'll need the optional constant alpha register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */ + spe_fm(f, term2R_reg, fbR_reg, constA_reg); + spe_fm(f, term2G_reg, fbG_reg, constA_reg); + spe_fm(f, term2B_reg, fbB_reg, constA_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc)) + * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac)) + * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */ + ASSERT(0); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + + default: + ASSERT(0); + } + + /* + * Compute Dest Alpha term. Like the above, we're looking for + * the full term Afb*factor, not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. + */ + switch (blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* factor = 1, so term = Afb */ + spe_move(f, term2A_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_ZERO: + /* factor = 0, so term = 0 */ + spe_load_float(f, term2A_reg, 0.0f); + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_SRC_COLOR: + /* factor = A, so term = Afb*A */ + spe_fm(f, term2A_reg, fbA_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_DST_COLOR: + /* factor = Afb, so term = Afb*Afb */ + spe_fm(f, term2A_reg, fbA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = Ac, so term = Afb*Ac */ + spe_fm(f, term2A_reg, fbA_reg, constA_reg); + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */ + ASSERT(0); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: + ASSERT(0); + } + + /* + * Combine Src/Dest RGB terms as per the blend equation. + */ + switch (blend->rgb_func) { + case PIPE_BLEND_ADD: + spe_fa(f, fragR_reg, term1R_reg, term2R_reg); + spe_fa(f, fragG_reg, term1G_reg, term2G_reg); + spe_fa(f, fragB_reg, term1B_reg, term2B_reg); + break; + case PIPE_BLEND_SUBTRACT: + spe_fs(f, fragR_reg, term1R_reg, term2R_reg); + spe_fs(f, fragG_reg, term1G_reg, term2G_reg); + spe_fs(f, fragB_reg, term1B_reg, term2B_reg); + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + spe_fs(f, fragR_reg, term2R_reg, term1R_reg); + spe_fs(f, fragG_reg, term2G_reg, term1G_reg); + spe_fs(f, fragB_reg, term2B_reg, term1B_reg); + break; + case PIPE_BLEND_MIN: + spe_float_min(f, fragR_reg, term1R_reg, term2R_reg); + spe_float_min(f, fragG_reg, term1G_reg, term2G_reg); + spe_float_min(f, fragB_reg, term1B_reg, term2B_reg); + break; + case PIPE_BLEND_MAX: + spe_float_max(f, fragR_reg, term1R_reg, term2R_reg); + spe_float_max(f, fragG_reg, term1G_reg, term2G_reg); + spe_float_max(f, fragB_reg, term1B_reg, term2B_reg); + break; + default: + ASSERT(0); + } + + /* + * Combine Src/Dest A term + */ + switch (blend->alpha_func) { + case PIPE_BLEND_ADD: + spe_fa(f, fragA_reg, term1A_reg, term2A_reg); + break; + case PIPE_BLEND_SUBTRACT: + spe_fs(f, fragA_reg, term1A_reg, term2A_reg); + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + spe_fs(f, fragA_reg, term2A_reg, term1A_reg); + break; + case PIPE_BLEND_MIN: + spe_float_min(f, fragA_reg, term1A_reg, term2A_reg); + break; + case PIPE_BLEND_MAX: + spe_float_max(f, fragA_reg, term1A_reg, term2A_reg); + break; + default: + ASSERT(0); + } + + spe_release_register(f, term1R_reg); + spe_release_register(f, term1G_reg); + spe_release_register(f, term1B_reg); + spe_release_register(f, term1A_reg); + + spe_release_register(f, term2R_reg); + spe_release_register(f, term2G_reg); + spe_release_register(f, term2B_reg); + spe_release_register(f, term2A_reg); + + spe_release_register(f, fbR_reg); + spe_release_register(f, fbG_reg); + spe_release_register(f, fbB_reg); + spe_release_register(f, fbA_reg); + + spe_release_register(f, tmp_reg); + + /* Free any optional registers that actually got used */ + release_const_register(f, &one_reg_set, one_reg); + release_const_register(f, &constR_reg_set, constR_reg); + release_const_register(f, &constG_reg_set, constG_reg); + release_const_register(f, &constB_reg_set, constB_reg); + release_const_register(f, &constA_reg_set, constA_reg); +} + + +static void +gen_logicop(const struct pipe_blend_state *blend, + struct spe_function *f, + int fragRGBA_reg, int fbRGBA_reg) +{ + /* We've got four 32-bit RGBA packed pixels in each of + * fragRGBA_reg and fbRGBA_reg, not sets of floating-point + * reds, greens, blues, and alphas. + * */ + ASSERT(blend->logicop_enable); + + switch(blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: /* 0 */ + spe_zero(f, fragRGBA_reg); + break; + case PIPE_LOGICOP_NOR: /* ~(s | d) */ + spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */ + /* andc R, A, B computes R = A & ~B */ + spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_COPY_INVERTED: /* ~s */ + spe_complement(f, fragRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */ + /* andc R, A, B computes R = A & ~B */ + spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_INVERT: /* ~d */ + /* Note that (A nor A) == ~(A|A) == ~A */ + spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_XOR: /* s ^ d */ + spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_NAND: /* ~(s & d) */ + spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_AND: /* s & d */ + spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */ + spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + spe_complement(f, fragRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_NOOP: /* d */ + spe_move(f, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */ + /* orc R, A, B computes R = A | ~B */ + spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_COPY: /* s */ + break; + case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */ + /* orc R, A, B computes R = A | ~B */ + spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_OR: /* s | d */ + spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_SET: /* 1 */ + spe_load_int(f, fragRGBA_reg, 0xffffffff); + break; + default: + ASSERT(0); + } +} + + +/** + * Generate code to pack a quad of float colors into four 32-bit integers. + * + * \param f SPE function to append instruction onto. + * \param color_format the dest color packing format + * \param r_reg register containing four red values (in/clobbered) + * \param g_reg register containing four green values (in/clobbered) + * \param b_reg register containing four blue values (in/clobbered) + * \param a_reg register containing four alpha values (in/clobbered) + * \param rgba_reg register to store the packed RGBA colors (out) + */ +static void +gen_pack_colors(struct spe_function *f, + enum pipe_format color_format, + int r_reg, int g_reg, int b_reg, int a_reg, + int rgba_reg) +{ + int rg_reg = spe_allocate_available_register(f); + int ba_reg = spe_allocate_available_register(f); + + /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ + spe_cfltu(f, r_reg, r_reg, 32); + spe_cfltu(f, g_reg, g_reg, 32); + spe_cfltu(f, b_reg, b_reg, 32); + spe_cfltu(f, a_reg, a_reg, 32); + + /* Shift the most significant bytes to the least significant positions. + * I.e.: reg = reg >> 24 + */ + spe_rotmi(f, r_reg, r_reg, -24); + spe_rotmi(f, g_reg, g_reg, -24); + spe_rotmi(f, b_reg, b_reg, -24); + spe_rotmi(f, a_reg, a_reg, -24); + + /* Shift the color bytes according to the surface format */ + if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) { + spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */ + spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */ + spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */ + } + else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) { + spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */ + spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */ + spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */ + } + else { + ASSERT(0); + } + + /* Merge red, green, blue, alpha registers to make packed RGBA colors. + * Eg: after shifting according to color_format we might have: + * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000} + * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600} + * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099} + * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000} + * OR-ing all those together gives us four packed colors: + * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} + */ + spe_or(f, rg_reg, r_reg, g_reg); + spe_or(f, ba_reg, a_reg, b_reg); + spe_or(f, rgba_reg, rg_reg, ba_reg); + + spe_release_register(f, rg_reg); + spe_release_register(f, ba_reg); +} + +static void +gen_colormask(struct spe_function *f, + uint colormask, + enum pipe_format color_format, + int fragRGBA_reg, int fbRGBA_reg) +{ + /* We've got four 32-bit RGBA packed pixels in each of + * fragRGBA_reg and fbRGBA_reg, not sets of floating-point + * reds, greens, blues, and alphas. Further, the pixels + * are packed according to the given color format, not + * necessarily RGBA... + */ + unsigned int r_mask; + unsigned int g_mask; + unsigned int b_mask; + unsigned int a_mask; + + /* Calculate exactly where the bits for any particular color + * end up, so we can mask them correctly. + */ + switch(color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + /* ARGB */ + a_mask = 0xff000000; + r_mask = 0x00ff0000; + g_mask = 0x0000ff00; + b_mask = 0x000000ff; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + /* BGRA */ + b_mask = 0xff000000; + g_mask = 0x00ff0000; + r_mask = 0x0000ff00; + a_mask = 0x000000ff; + break; + default: + ASSERT(0); + } + + /* For each R, G, B, and A component we're supposed to mask out, + * clear its bits. Then our mask operation later will work + * as expected. + */ + if (!(colormask & PIPE_MASK_R)) { + r_mask = 0; + } + if (!(colormask & PIPE_MASK_G)) { + g_mask = 0; + } + if (!(colormask & PIPE_MASK_B)) { + b_mask = 0; + } + if (!(colormask & PIPE_MASK_A)) { + a_mask = 0; + } + + /* Get a temporary register to hold the mask that will be applied to the fragment */ + int colormask_reg = spe_allocate_available_register(f); + + /* The actual mask we're going to use is an OR of the remaining R, G, B, and A + * masks. Load the result value into our temporary register. + */ + spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask); + + /* Use the mask register to select between the fragment color + * values and the frame buffer color values. Wherever the + * mask has a 0 bit, the current frame buffer color should override + * the fragment color. Wherever the mask has a 1 bit, the + * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM) + * instruction will select bits from its first operand rA wherever the + * the mask bits rM are 0, and from its second operand rB wherever the + * mask bits rM are 1. That means that the frame buffer color is the + * first operand, and the fragment color the second. + */ + spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg); + + /* Release the temporary register and we're done */ + spe_release_register(f, colormask_reg); +} + +/* This function is annoyingly similar to gen_depth_test(), above, except + * that instead of comparing two varying values (i.e. fragment and buffer), + * we're comparing a varying value with a static value. As such, we have + * access to the Compare Immediate instructions where we don't in + * gen_depth_test(), which is what makes us very different. + * + * The return value in the stencil_pass_reg is a bitmask of valid + * fragments that also passed the stencil test. The bitmask of valid + * fragments that failed would be found in (mask_reg & ~stencil_pass_reg). + */ +static void +gen_stencil_test(struct spe_function *f, const struct pipe_stencil_state *state, + unsigned int mask_reg, unsigned int fbS_reg, + unsigned int stencil_pass_reg) +{ + /* Generate code that puts the set of passing fragments into the stencil_pass_reg + * register, taking into account whether each fragment was active to begin with. + */ + switch (state->func) { + case PIPE_FUNC_EQUAL: + /* stencil_pass = mask & (s == reference) */ + spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_and(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + break; + + case PIPE_FUNC_NOTEQUAL: + /* stencil_pass = mask & ~(s == reference) */ + spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_andc(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + break; + + case PIPE_FUNC_GREATER: + /* stencil_pass = mask & (s > reference) */ + spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_and(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + break; + + case PIPE_FUNC_LESS: { + /* stencil_pass = mask & (reference > s) */ + /* There's no convenient Compare Less Than Immediate instruction, so + * we'll have to do this one the harder way, by loading a register and + * comparing directly. Compare Logical Greater Than Word (clgt) + * treats its operands as unsigned - no sign extension. + */ + unsigned int tmp_reg = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->ref_value); + spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); + spe_and(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + break; + } + + case PIPE_FUNC_LEQUAL: + /* stencil_pass = mask & (s <= reference) = mask & ~(s > reference) */ + spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_andc(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + break; + + case PIPE_FUNC_GEQUAL: { + /* stencil_pass = mask & (s >= reference) = mask & ~(reference > s) */ + /* As above, we have to do this by loading a register */ + unsigned int tmp_reg = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->ref_value); + spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); + spe_andc(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + break; + } + + case PIPE_FUNC_NEVER: + /* stencil_pass = mask & 0 = 0 */ + spe_load_uint(f, stencil_pass_reg, 0); + break; + + case PIPE_FUNC_ALWAYS: + /* stencil_pass = mask & 1 = mask */ + spe_move(f, stencil_pass_reg, mask_reg); + break; + } + + /* The fragments that passed the stencil test are now in stencil_pass_reg. + * The fragments that failed would be (mask_reg & ~stencil_pass_reg). + */ +} + +/* This function generates code that calculates a set of new stencil values + * given the earlier values and the operation to apply. It does not + * apply any tests. It is intended to be called up to 3 times + * (for the stencil fail operation, for the stencil pass-z fail operation, + * and for the stencil pass-z pass operation) to collect up to three + * possible sets of values, and for the caller to combine them based + * on the result of the tests. + * + * stencil_max_value should be (2^n - 1) where n is the number of bits + * in the stencil buffer - in other words, it should be usable as a mask. + */ +static void +gen_stencil_values(struct spe_function *f, unsigned int stencil_op, + unsigned int stencil_ref_value, unsigned int stencil_max_value, + unsigned int fbS_reg, unsigned int newS_reg) +{ + /* The code below assumes that newS_reg and fbS_reg are not the same + * register; if they can be, the calculations below will have to use + * an additional temporary register. For now, mark the assumption + * with an assertion that will fail if they are the same. + */ + ASSERT(fbS_reg != newS_reg); + + /* The code also assumes the the stencil_max_value is of the form + * 2^n-1 and can therefore be used as a mask for the valid bits in + * addition to a maximum. Make sure this is the case as well. + * The clever math below exploits the fact that incrementing a + * binary number serves to flip all the bits of a number starting at + * the LSB and continuing to (and including) the first zero bit + * found. That means that a number and its increment will always + * have at least one bit in common (the high order bit, if nothing + * else) *unless* the number is zero, *or* the number is of a form + * consisting of some number of 1s in the low-order bits followed + * by nothing but 0s in the high-order bits. The latter case + * implies it's of the form 2^n-1. + */ + ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0); + + switch(stencil_op) { + case PIPE_STENCIL_OP_KEEP: + /* newS = S */ + spe_move(f, newS_reg, fbS_reg); + break; + + case PIPE_STENCIL_OP_ZERO: + /* newS = 0 */ + spe_zero(f, newS_reg); + break; + + case PIPE_STENCIL_OP_REPLACE: + /* newS = stencil reference value */ + spe_load_uint(f, newS_reg, stencil_ref_value); + break; + + case PIPE_STENCIL_OP_INCR: { + /* newS = (s == max ? max : s + 1) */ + unsigned int equals_reg = spe_allocate_available_register(f); + + spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value); + /* Add Word Immediate computes rT = rA + 10-bit signed immediate */ + spe_ai(f, newS_reg, fbS_reg, 1); + /* Select from the current value or the new value based on the equality test */ + spe_selb(f, newS_reg, fbS_reg, newS_reg, equals_reg); + + spe_release_register(f, equals_reg); + break; + } + case PIPE_STENCIL_OP_DECR: { + /* newS = (s == 0 ? 0 : s - 1) */ + unsigned int equals_reg = spe_allocate_available_register(f); + + spe_compare_equal_uint(f, equals_reg, fbS_reg, 0); + /* Add Word Immediate with a (-1) value works */ + spe_ai(f, newS_reg, fbS_reg, -1); + /* Select from the current value or the new value based on the equality test */ + spe_selb(f, newS_reg, fbS_reg, newS_reg, equals_reg); + + spe_release_register(f, equals_reg); + break; + } + case PIPE_STENCIL_OP_INCR_WRAP: + /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can + * do a normal add and mask off the correct bits + */ + spe_ai(f, newS_reg, fbS_reg, 1); + spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); + break; + + case PIPE_STENCIL_OP_DECR_WRAP: + /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */ + spe_ai(f, newS_reg, fbS_reg, -1); + spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); + break; + + case PIPE_STENCIL_OP_INVERT: + /* newS = ~s. We take advantage of the mask/max value to invert only + * the valid bits for the field so we don't have to do an extra "and". + */ + spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value); + break; + + default: + ASSERT(0); + } +} + + +/* This function generates code to get all the necessary possible + * stencil values. For each of the output registers (fail_reg, + * zfail_reg, and zpass_reg), it either allocates a new register + * and calculates a new set of values based on the stencil operation, + * or it reuses a register allocation and calculation done for an + * earlier (matching) operation, or it reuses the fbS_reg register + * (if the stencil operation is KEEP, which doesn't change the + * stencil buffer). + * + * Since this function allocates a variable number of registers, + * to avoid incurring complex logic to free them, they should + * be allocated after a spe_allocate_register_set() call + * and released by the corresponding spe_release_register_set() call. + */ +static void +gen_get_stencil_values(struct spe_function *f, const struct pipe_depth_stencil_alpha_state *dsa, + unsigned int fbS_reg, + unsigned int *fail_reg, unsigned int *zfail_reg, + unsigned int *zpass_reg, unsigned int *back_fail_reg, + unsigned int *back_zfail_reg, unsigned int *back_zpass_reg) +{ + unsigned zfail_op, back_zfail_op; + + /* Stenciling had better be enabled here */ + ASSERT(dsa->stencil[0].enabled); + + /* If the depth test is not enabled, it is treated as though it always + * passes. In particular, that means that the "zfail_op" (and the backfacing + * counterpart, if active) are not considered - a failing stencil test will + * trigger the "fail_op", and a passing stencil test will trigger the + * "zpass_op". + * + * By overriding the operations in this case to be PIPE_STENCIL_OP_KEEP, + * we keep them from being calculated. + */ + if (dsa->depth.enabled) { + zfail_op = dsa->stencil[0].zfail_op; + back_zfail_op = dsa->stencil[1].zfail_op; + } + else { + zfail_op = PIPE_STENCIL_OP_KEEP; + back_zfail_op = PIPE_STENCIL_OP_KEEP; + } + + /* One-sided or front-facing stencil */ + if (dsa->stencil[0].fail_op == PIPE_STENCIL_OP_KEEP) { + *fail_reg = fbS_reg; + } + else { + *fail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[0].fail_op, dsa->stencil[0].ref_value, + 0xff, fbS_reg, *fail_reg); + } + + if (zfail_op == PIPE_STENCIL_OP_KEEP) { + *zfail_reg = fbS_reg; + } + else if (zfail_op == dsa->stencil[0].fail_op) { + *zfail_reg = *fail_reg; + } + else { + *zfail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[0].zfail_op, dsa->stencil[0].ref_value, + 0xff, fbS_reg, *zfail_reg); + } + + if (dsa->stencil[0].zpass_op == PIPE_STENCIL_OP_KEEP) { + *zpass_reg = fbS_reg; + } + else if (dsa->stencil[0].zpass_op == dsa->stencil[0].fail_op) { + *zpass_reg = *fail_reg; + } + else if (dsa->stencil[0].zpass_op == zfail_op) { + *zpass_reg = *zfail_reg; + } + else { + *zpass_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[0].zpass_op, dsa->stencil[0].ref_value, + 0xff, fbS_reg, *zpass_reg); + } + + /* If two-sided stencil is enabled, we have more work to do. */ + if (!dsa->stencil[1].enabled) { + /* This just flags that the registers need not be deallocated later */ + *back_fail_reg = fbS_reg; + *back_zfail_reg = fbS_reg; + *back_zpass_reg = fbS_reg; + } + else { + /* Same calculations as above, but for the back stencil */ + if (dsa->stencil[1].fail_op == PIPE_STENCIL_OP_KEEP) { + *back_fail_reg = fbS_reg; + } + else if (dsa->stencil[1].fail_op == dsa->stencil[0].fail_op) { + *back_fail_reg = *fail_reg; + } + else if (dsa->stencil[1].fail_op == zfail_op) { + *back_fail_reg = *zfail_reg; + } + else if (dsa->stencil[1].fail_op == dsa->stencil[0].zpass_op) { + *back_fail_reg = *zpass_reg; + } + else { + *back_fail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[1].fail_op, dsa->stencil[1].ref_value, + 0xff, fbS_reg, *back_fail_reg); + } + + if (back_zfail_op == PIPE_STENCIL_OP_KEEP) { + *back_zfail_reg = fbS_reg; + } + else if (back_zfail_op == dsa->stencil[0].fail_op) { + *back_zfail_reg = *fail_reg; + } + else if (back_zfail_op == zfail_op) { + *back_zfail_reg = *zfail_reg; + } + else if (back_zfail_op == dsa->stencil[0].zpass_op) { + *back_zfail_reg = *zpass_reg; + } + else if (back_zfail_op == dsa->stencil[1].fail_op) { + *back_zfail_reg = *back_fail_reg; + } + else { + *back_zfail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[1].zfail_op, dsa->stencil[1].ref_value, + 0xff, fbS_reg, *back_zfail_reg); + } + + if (dsa->stencil[1].zpass_op == PIPE_STENCIL_OP_KEEP) { + *back_zpass_reg = fbS_reg; + } + else if (dsa->stencil[1].zpass_op == dsa->stencil[0].fail_op) { + *back_zpass_reg = *fail_reg; + } + else if (dsa->stencil[1].zpass_op == zfail_op) { + *back_zpass_reg = *zfail_reg; + } + else if (dsa->stencil[1].zpass_op == dsa->stencil[0].zpass_op) { + *back_zpass_reg = *zpass_reg; + } + else if (dsa->stencil[1].zpass_op == dsa->stencil[1].fail_op) { + *back_zpass_reg = *back_fail_reg; + } + else if (dsa->stencil[1].zpass_op == back_zfail_op) { + *back_zpass_reg = *back_zfail_reg; + } + else { + *back_zfail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[1].zpass_op, dsa->stencil[1].ref_value, + 0xff, fbS_reg, *back_zpass_reg); + } + } /* End of calculations for back-facing stencil */ +} + +/* Note that fbZ_reg may *not* be set on entry, if in fact + * the depth test is not enabled. This function must not use + * the register if depth is not enabled. + */ +static boolean +gen_stencil_depth_test(struct spe_function *f, + const struct pipe_depth_stencil_alpha_state *dsa, + const int const facing_reg, + const int mask_reg, const int fragZ_reg, + const int fbZ_reg, const int fbS_reg) +{ + /* True if we've generated code that could require writeback to the + * depth and/or stencil buffers + */ + boolean modified_buffers = false; + + boolean need_to_calculate_stencil_values; + boolean need_to_writemask_stencil_values; + + /* Registers. We may or may not actually allocate these, depending + * on whether the state values indicate that we need them. + */ + unsigned int stencil_pass_reg, stencil_fail_reg; + unsigned int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values; + unsigned int stencil_writemask_reg; + unsigned int zmask_reg; + unsigned int newS_reg; + + /* Stenciling is quite complex: up to six different configurable stencil + * operations/calculations can be required (three each for front-facing + * and back-facing fragments). Many of those operations will likely + * be identical, so there's good reason to try to avoid calculating + * the same values more than once (which unfortunately makes the code less + * straightforward). + * + * To make register management easier, we start a new + * register set; we can release all the registers in the set at + * once, and avoid having to keep track of exactly which registers + * we allocate. We can still allocate and free registers as + * desired (if we know we no longer need a register), but we don't + * have to spend the complexity to track the more difficult variant + * register usage scenarios. + */ + spe_comment(f, 0, "Allocating stencil register set"); + spe_allocate_register_set(f); + + /* Calculate the writemask. If the writemask is trivial (either + * all 0s, meaning that we don't need to calculate any stencil values + * because they're not going to change the stencil anyway, or all 1s, + * meaning that we have to calculate the stencil values but do not + * need to mask them), we can avoid generating code. Don't forget + * that we need to consider backfacing stencil, if enabled. + */ + if (dsa->stencil[0].write_mask == 0x0 && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0x00)) { + /* Trivial: don't need to calculate stencil values, and don't need to + * write them back to the framebuffer. + */ + need_to_calculate_stencil_values = false; + need_to_writemask_stencil_values = false; + } + else if (dsa->stencil[0].write_mask == 0xff && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0xff)) { + /* Still trivial, but a little less so. We need to write the stencil + * values, but we don't need to mask them. + */ + need_to_calculate_stencil_values = true; + need_to_writemask_stencil_values = false; + } + else { + /* The general case: calculate, mask, and write */ + need_to_calculate_stencil_values = true; + need_to_writemask_stencil_values = true; + + /* While we're here, generate code that calculates what the + * writemask should be. If backface stenciling is enabled, + * and the backface writemask is not the same as the frontface + * writemask, we'll have to generate code that merges the + * two masks into a single effective mask based on fragment facing. + */ + spe_comment(f, 0, "Computing stencil writemask"); + stencil_writemask_reg = spe_allocate_available_register(f); + spe_load_uint(f, stencil_writemask_reg, dsa->stencil[0].write_mask); + if (dsa->stencil[1].enabled && dsa->stencil[0].write_mask != dsa->stencil[1].write_mask) { + unsigned int back_write_mask_reg = spe_allocate_available_register(f); + spe_comment(f, 0, "Resolving two-sided stencil writemask"); + spe_load_uint(f, back_write_mask_reg, dsa->stencil[1].write_mask); + spe_selb(f, stencil_writemask_reg, stencil_writemask_reg, back_write_mask_reg, facing_reg); + spe_release_register(f, back_write_mask_reg); + } + } + + /* At least one-sided stenciling must be on. Generate code that + * runs the stencil test on the basic/front-facing stencil, leaving + * the mask of passing stencil bits in stencil_pass_reg. This mask will + * be used both to mask the set of active pixels, and also to + * determine how the stencil buffer changes. + * + * This test will *not* change the value in mask_reg (because we don't + * yet know whether to apply the two-sided stencil or one-sided stencil). + */ + spe_comment(f, 0, "Running basic stencil test"); + stencil_pass_reg = spe_allocate_available_register(f); + gen_stencil_test(f, &dsa->stencil[0], mask_reg, fbS_reg, stencil_pass_reg); + + /* If two-sided stenciling is on, generate code to run the stencil + * test on the backfacing stencil as well, and combine the two results + * into the one correct result based on facing. + */ + if (dsa->stencil[1].enabled) { + unsigned int temp_reg = spe_allocate_available_register(f); + spe_comment(f, 0, "Running backface stencil test"); + gen_stencil_test(f, &dsa->stencil[1], mask_reg, fbS_reg, temp_reg); + spe_selb(f, stencil_pass_reg, stencil_pass_reg, temp_reg, facing_reg); + spe_release_register(f, temp_reg); + } + + /* Generate code that, given the mask of valid fragments and the + * mask of valid fragments that passed the stencil test, computes + * the mask of valid fragments that failed the stencil test. We + * have to do this before we run a depth test (because the + * depth test should not be performed on fragments that failed the + * stencil test, and because the depth test will update the + * mask of valid fragments based on the results of the depth test). + */ + spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask"); + stencil_fail_reg = spe_allocate_available_register(f); + spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg); + /* Now remove the stenciled-out pixels from the valid fragment mask, + * so we can later use the valid fragment mask in the depth test. + */ + spe_and(f, mask_reg, mask_reg, stencil_pass_reg); + + /* We may not need to calculate stencil values, if the writemask is off */ + if (need_to_calculate_stencil_values) { + unsigned int back_stencil_fail_values, back_stencil_pass_depth_fail_values, back_stencil_pass_depth_pass_values; + unsigned int front_stencil_fail_values, front_stencil_pass_depth_fail_values, front_stencil_pass_depth_pass_values; + + /* Generate code that calculates exactly which stencil values we need, + * without calculating the same value twice (say, if two different + * stencil ops have the same value). This code will work for one-sided + * and two-sided stenciling (so that we take into account that operations + * may match between front and back stencils), and will also take into + * account whether the depth test is enabled (if the depth test is off, + * we don't need any of the zfail results, because the depth test always + * is considered to pass if it is disabled). Any register value that + * does not need to be calculated will come back with the same value + * that's in fbS_reg. + * + * This function will allocate a variant number of registers that + * will be released as part of the register set. + */ + spe_comment(f, 0, "Computing stencil values"); + gen_get_stencil_values(f, dsa, fbS_reg, + &front_stencil_fail_values, &front_stencil_pass_depth_fail_values, + &front_stencil_pass_depth_pass_values, &back_stencil_fail_values, + &back_stencil_pass_depth_fail_values, &back_stencil_pass_depth_pass_values); + + /* Tricky, tricky, tricky - the things we do to create optimal + * code... + * + * The various stencil values registers may overlap with each other + * and with fbS_reg arbitrarily (as any particular operation is + * only calculated once and stored in one register, no matter + * how many times it is used). So we can't change the values + * within those registers directly - if we change a value in a + * register that's being referenced by two different calculations, + * we've just unwittingly changed the second value as well... + * + * Avoid this by allocating new registers to hold the results + * (there may be 2, if the depth test is off, or 3, if it is on). + * These will be released as part of the register set. + */ + if (!dsa->stencil[1].enabled) { + /* The easy case: if two-sided stenciling is *not* enabled, we + * just use the front-sided values. + */ + stencil_fail_values = front_stencil_fail_values; + stencil_pass_depth_fail_values = front_stencil_pass_depth_fail_values; + stencil_pass_depth_pass_values = front_stencil_pass_depth_pass_values; + } + else { /* two-sided stencil enabled */ + spe_comment(f, 0, "Resolving backface stencil values"); + /* Allocate new registers for the needed merged values */ + stencil_fail_values = spe_allocate_available_register(f); + spe_selb(f, stencil_fail_values, front_stencil_fail_values, back_stencil_fail_values, facing_reg); + if (dsa->depth.enabled) { + stencil_pass_depth_fail_values = spe_allocate_available_register(f); + spe_selb(f, stencil_pass_depth_fail_values, front_stencil_pass_depth_fail_values, back_stencil_pass_depth_fail_values, facing_reg); + } + else { + stencil_pass_depth_fail_values = fbS_reg; + } + stencil_pass_depth_pass_values = spe_allocate_available_register(f); + spe_selb(f, stencil_pass_depth_pass_values, front_stencil_pass_depth_pass_values, back_stencil_pass_depth_pass_values, facing_reg); + } + } + + /* We now have all the stencil values we need. We also need + * the results of the depth test to figure out which + * stencil values will become the new stencil values. (Even if + * we aren't actually calculating stencil values, we need to apply + * the depth test if it's enabled.) + * + * The code generated by gen_depth_test() returns the results of the + * test in the given register, but also alters the mask_reg based + * on the results of the test. + */ + if (dsa->depth.enabled) { + spe_comment(f, 0, "Running stencil depth test"); + zmask_reg = spe_allocate_available_register(f); + modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); + } + + if (need_to_calculate_stencil_values) { + + /* If we need to writemask the stencil values before going into + * the stencil buffer, we'll have to use a new register to + * hold the new values. If not, we can just keep using the + * current register. + */ + if (need_to_writemask_stencil_values) { + newS_reg = spe_allocate_available_register(f); + spe_comment(f, 0, "Saving current stencil values for writemasking"); + spe_move(f, newS_reg, fbS_reg); + } + else { + newS_reg = fbS_reg; + } + + /* Merge in the selected stencil fail values */ + if (stencil_fail_values != fbS_reg) { + spe_comment(f, 0, "Loading stencil fail values"); + spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg); + modified_buffers = true; + } + + /* Same for the stencil pass/depth fail values. If this calculation + * is not needed (say, if depth test is off), then the + * stencil_pass_depth_fail_values register will be equal to fbS_reg + * and we'll skip the calculation. + */ + if (stencil_pass_depth_fail_values != fbS_reg) { + /* We don't actually have a stencil pass/depth fail mask yet. + * Calculate it here from the stencil passing mask and the + * depth passing mask. Note that zmask_reg *must* have been + * set above if we're here. + */ + unsigned int stencil_pass_depth_fail_mask = spe_allocate_available_register(f); + spe_comment(f, 0, "Loading stencil pass/depth fail values"); + spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg); + + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values, stencil_pass_depth_fail_mask); + + spe_release_register(f, stencil_pass_depth_fail_mask); + modified_buffers = true; + } + + /* Same for the stencil pass/depth pass mask. Note that we + * *can* get here with zmask_reg being unset (if the depth + * test is off but the stencil test is on). In this case, + * we assume the depth test passes, and don't need to mask + * the stencil pass mask with the Z mask. + */ + if (stencil_pass_depth_pass_values != fbS_reg) { + if (dsa->depth.enabled) { + unsigned int stencil_pass_depth_pass_mask = spe_allocate_available_register(f); + /* We'll need a separate register */ + spe_comment(f, 0, "Loading stencil pass/depth pass values"); + spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); + spe_release_register(f, stencil_pass_depth_pass_mask); + } + else { + /* We can use the same stencil-pass register */ + spe_comment(f, 0, "Loading stencil pass values"); + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg); + } + modified_buffers = true; + } + + /* Almost done. If we need to writemask, do it now, leaving the + * results in the fbS_reg register passed in. If we don't need + * to writemask, then the results are *already* in the fbS_reg, + * so there's nothing more to do. + */ + + if (need_to_writemask_stencil_values && modified_buffers) { + /* The Select Bytes command makes a fine writemask. Where + * the mask is 0, the first (original) values are retained, + * effectively masking out changes. Where the mask is 1, the + * second (new) values are retained, incorporating changes. + */ + spe_comment(f, 0, "Writemasking new stencil values"); + spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg); + } + + } /* done calculating stencil values */ + + /* The stencil and/or depth values have been applied, and the + * mask_reg, fbS_reg, and fbZ_reg values have been updated. + * We're all done, except that we've allocated a fair number + * of registers that we didn't bother tracking. Release all + * those registers as part of the register set, and go home. + */ + spe_comment(f, 0, "Releasing stencil register set"); + spe_release_register_set(f); + + /* Return true if we could have modified the stencil and/or + * depth buffers. + */ + return modified_buffers; +} + + +/** + * Generate SPE code to implement the fragment operations (alpha test, + * depth test, stencil test, blending, colormask, and final + * framebuffer write) as specified by the current context state. + * + * Logically, this code will be called after running the fragment + * shader. But under some circumstances we could run some of this + * code before the fragment shader to cull fragments/quads that are + * totally occluded/discarded. + * + * XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now. + * + * See the spu_default_fragment_ops() function to see how the per-fragment + * operations would be done with ordinary C code. + * The code we generate here though has no branches, is SIMD, etc and + * should be much faster. + * + * \param cell the rendering context (in) + * \param f the generated function (out) + */ +void +cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) +{ + const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil; + const struct pipe_blend_state *blend = cell->blend; + const struct pipe_blend_color *blend_color = &cell->blend_color; + const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; + + /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ + const int x_reg = 3; /* uint */ + const int y_reg = 4; /* uint */ + const int color_tile_reg = 5; /* tile_t * */ + const int depth_tile_reg = 6; /* tile_t * */ + const int fragZ_reg = 7; /* vector float */ + const int fragR_reg = 8; /* vector float */ + const int fragG_reg = 9; /* vector float */ + const int fragB_reg = 10; /* vector float */ + const int fragA_reg = 11; /* vector float */ + const int mask_reg = 12; /* vector uint */ + const int facing_reg = 13; /* uint */ + + /* offset of quad from start of tile + * XXX assuming 4-byte pixels for color AND Z/stencil!!!! + */ + int quad_offset_reg; + + int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ + int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */ + + spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + + if (cell->debug_flags & CELL_DEBUG_ASM) { + spe_print_code(f, true); + spe_indent(f, 8); + spe_comment(f, -4, "Begin per-fragment ops"); + } + + spe_allocate_register(f, x_reg); + spe_allocate_register(f, y_reg); + spe_allocate_register(f, color_tile_reg); + spe_allocate_register(f, depth_tile_reg); + spe_allocate_register(f, fragZ_reg); + spe_allocate_register(f, fragR_reg); + spe_allocate_register(f, fragG_reg); + spe_allocate_register(f, fragB_reg); + spe_allocate_register(f, fragA_reg); + spe_allocate_register(f, mask_reg); + spe_allocate_register(f, facing_reg); + + quad_offset_reg = spe_allocate_available_register(f); + fbRGBA_reg = spe_allocate_available_register(f); + fbZS_reg = spe_allocate_available_register(f); + + /* compute offset of quad from start of tile, in bytes */ + { + int x2_reg = spe_allocate_available_register(f); + int y2_reg = spe_allocate_available_register(f); + + ASSERT(TILE_SIZE == 32); + + spe_comment(f, 0, "Compute quad offset within tile"); + spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ + spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ + spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ + spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */ + spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */ + + spe_release_register(f, x2_reg); + spe_release_register(f, y2_reg); + } + + if (dsa->alpha.enabled) { + gen_alpha_test(dsa, f, mask_reg, fragA_reg); + } + + /* If we need the stencil buffers (because one- or two-sided stencil is + * enabled) or the depth buffer (because the depth test is enabled), + * go grab them. Note that if either one- or two-sided stencil is + * enabled, dsa->stencil[0].enabled will be true. + */ + if (dsa->depth.enabled || dsa->stencil[0].enabled) { + const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; + boolean write_depth_stencil; + + /* We may or may not need to allocate a register for Z or stencil values */ + boolean fbS_reg_set = false, fbZ_reg_set = false; + unsigned int fbS_reg, fbZ_reg = 0; + + spe_comment(f, 0, "Fetching Z/stencil quad from tile"); + + /* fetch quad of depth/stencil values from tile at (x,y) */ + /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ + /* XXX Not sure this is allowed if we've only got a 16-bit Z buffer... */ + spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + + /* From the Z/stencil buffer format, pull out the bits we need for + * Z and/or stencil. We'll also convert the incoming fragment Z + * value in fragZ_reg from a floating point value in [0.0..1.0] to + * an unsigned integer value with the appropriate resolution. + */ + switch(zs_format) { + + case PIPE_FORMAT_S8Z24_UNORM: /* fall through */ + case PIPE_FORMAT_X8Z24_UNORM: + if (dsa->depth.enabled) { + /* We need the Z part at least */ + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* four 24-bit Z values in the low-order bits */ + spe_and_uint(f, fbZ_reg, fbZS_reg, 0x00ffffff); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 24-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + spe_rotmi(f, fragZ_reg, fragZ_reg, -8); + } + if (dsa->stencil[0].enabled) { + setup_optional_register(f, &fbS_reg_set, &fbS_reg); + /* four 8-bit Z values in the high-order bits */ + spe_rotmi(f, fbS_reg, fbZS_reg, -24); + } + break; + + case PIPE_FORMAT_Z24S8_UNORM: /* fall through */ + case PIPE_FORMAT_Z24X8_UNORM: + if (dsa->depth.enabled) { + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* shift by 8 to get the upper 24-bit values */ + spe_rotmi(f, fbS_reg, fbZS_reg, -8); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 24-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + spe_rotmi(f, fragZ_reg, fragZ_reg, -8); + } + if (dsa->stencil[0].enabled) { + setup_optional_register(f, &fbS_reg_set, &fbS_reg); + /* 8-bit stencil in the low-order bits - mask them out */ + spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff); + } + break; + + case PIPE_FORMAT_Z32_UNORM: + if (dsa->depth.enabled) { + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* Copy over 4 32-bit values */ + spe_move(f, fbZ_reg, fbZS_reg); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 32-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + } + /* No stencil, so can't do anything there */ + break; + + case PIPE_FORMAT_Z16_UNORM: + if (dsa->depth.enabled) { + /* XXX Not sure this is correct, but it was here before, so we're + * going with it for now + */ + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* Copy over 4 32-bit values */ + spe_move(f, fbZ_reg, fbZS_reg); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 16-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + spe_rotmi(f, fragZ_reg, fragZ_reg, -16); + } + /* No stencil */ + break; + + default: + ASSERT(0); /* invalid format */ + } + + /* If stencil is enabled, use the stencil-specific code + * generator to generate both the stencil and depth (if needed) + * tests. Otherwise, if only depth is enabled, generate + * a quick depth test. The test generators themselves will + * report back whether the depth/stencil buffer has to be + * written back. + */ + if (dsa->stencil[0].enabled) { + /* This will perform the stencil and depth tests, and update + * the mask_reg, fbZ_reg, and fbS_reg as required by the + * tests. + */ + ASSERT(fbS_reg_set); + spe_comment(f, 0, "Perform stencil test"); + + /* Note that fbZ_reg may not be set on entry, if stenciling + * is enabled but there's no Z-buffer. The + * gen_stencil_depth_test() function must ignore the + * fbZ_reg register if depth is not enabled. + */ + write_depth_stencil = gen_stencil_depth_test(f, dsa, facing_reg, mask_reg, fragZ_reg, fbZ_reg, fbS_reg); + } + else if (dsa->depth.enabled) { + int zmask_reg = spe_allocate_available_register(f); + ASSERT(fbZ_reg_set); + spe_comment(f, 0, "Perform depth test"); + write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); + spe_release_register(f, zmask_reg); + } + else { + write_depth_stencil = false; + } + + if (write_depth_stencil) { + /* Merge latest Z and Stencil values into fbZS_reg. + * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. + * fbS_reg has four 8-bit Z values in bits [7..0]. + */ + spe_comment(f, 0, "Store quad's depth/stencil values in tile"); + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + if (fbS_reg_set && fbZ_reg_set) { + spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } + else if (fbS_reg_set) { + spe_shli(f, fbZS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ + } + else { + spe_move(f, fbZS_reg, fbZ_reg); + } + } + else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || + zs_format == PIPE_FORMAT_Z24X8_UNORM) { + if (fbS_reg_set && fbZ_reg_set) { + spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } + else if (fbS_reg_set) { + spe_move(f, fbZS_reg, fbS_reg); + } + else { + spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ + } + } + else if (zs_format == PIPE_FORMAT_Z32_UNORM) { + if (fbZ_reg_set) { + spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ + } + } + else if (zs_format == PIPE_FORMAT_Z16_UNORM) { + if (fbZ_reg_set) { + spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ + } + } + else if (zs_format == PIPE_FORMAT_S8_UNORM) { + ASSERT(0); /* XXX to do */ + } + else { + ASSERT(0); /* bad zs_format */ + } + + /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ + spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + } + + release_optional_register(f, &fbZ_reg_set, fbZ_reg); + release_optional_register(f, &fbS_reg_set, fbS_reg); + } + + /* Get framebuffer quad/colors. We'll need these for blending, + * color masking, and to obey the quad/pixel mask. + * Load: fbRGBA_reg = memory[color_tile + quad_offset] + * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking + * we could skip this load. + */ + spe_comment(f, 0, "Fetch quad colors from tile"); + spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); + + if (blend->blend_enable) { + spe_comment(f, 0, "Perform blending"); + gen_blend(blend, blend_color, f, color_format, + fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); + } + + /* + * Write fragment colors to framebuffer/tile. + * This involves converting the fragment colors from float[4] to the + * tile's specific format and obeying the quad/pixel mask. + */ + { + int rgba_reg = spe_allocate_available_register(f); + + /* Pack four float colors as four 32-bit int colors */ + spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors"); + gen_pack_colors(f, color_format, + fragR_reg, fragG_reg, fragB_reg, fragA_reg, + rgba_reg); + + if (blend->logicop_enable) { + spe_comment(f, 0, "Compute logic op"); + gen_logicop(blend, f, rgba_reg, fbRGBA_reg); + } + + if (blend->colormask != PIPE_MASK_RGBA) { + spe_comment(f, 0, "Compute color mask"); + gen_colormask(f, blend->colormask, color_format, rgba_reg, fbRGBA_reg); + } + + /* Mix fragment colors with framebuffer colors using the quad/pixel mask: + * if (mask[i]) + * rgba[i] = rgba[i]; + * else + * rgba[i] = framebuffer[i]; + */ + spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg); + + /* Store updated quad in tile: + * memory[color_tile + quad_offset] = rgba_reg; + */ + spe_comment(f, 0, "Store quad colors into color tile"); + spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); + + spe_release_register(f, rgba_reg); + } + + //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst); + + spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ + + spe_release_register(f, fbRGBA_reg); + spe_release_register(f, fbZS_reg); + spe_release_register(f, quad_offset_reg); + + if (cell->debug_flags & CELL_DEBUG_ASM) { + spe_comment(f, -4, "End per-fragment ops"); + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h new file mode 100644 index 0000000000..b59de198dc --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_GEN_FRAGMENT_H +#define CELL_GEN_FRAGMENT_H + + +extern void +cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f); + + +#endif /* CELL_GEN_FRAGMENT_H */ + diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c new file mode 100644 index 0000000000..825110c62b --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -0,0 +1,357 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "draw/draw_context.h" +#include "cell_context.h" +#include "cell_flush.h" +#include "cell_state.h" +#include "cell_texture.h" + + + +static void * +cell_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + return mem_dup(blend, sizeof(*blend)); +} + + +static void +cell_bind_blend_state(struct pipe_context *pipe, void *blend) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->blend = (struct pipe_blend_state *) blend; + cell->dirty |= CELL_NEW_BLEND; +} + + +static void +cell_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + + +static void +cell_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *blend_color) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->blend_color = *blend_color; + + cell->dirty |= CELL_NEW_BLEND; +} + + + + +static void * +cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *dsa) +{ + return mem_dup(dsa, sizeof(*dsa)); +} + + +static void +cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, + void *dsa) +{ + struct cell_context *cell = cell_context(pipe); + + draw_flush(cell->draw); + + cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa; + cell->dirty |= CELL_NEW_DEPTH_STENCIL; +} + + +static void +cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa) +{ + FREE(dsa); +} + + +static void +cell_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct cell_context *cell = cell_context(pipe); + + /* pass the clip state to the draw module */ + draw_set_clip_state(cell->draw, clip); +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +static void +cell_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct cell_context *cell = cell_context(pipe); + + cell->viewport = *viewport; /* struct copy */ + cell->dirty |= CELL_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(cell->draw, viewport); + + /* Using tnl/ and vf/ modules is temporary while getting started. + * Full pipe will have vertex shader, vertex fetch of its own. + */ +} + + +static void +cell_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct cell_context *cell = cell_context(pipe); + + memcpy( &cell->scissor, scissor, sizeof(*scissor) ); + cell->dirty |= CELL_NEW_SCISSOR; +} + + +static void +cell_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct cell_context *cell = cell_context(pipe); + + memcpy( &cell->poly_stipple, stipple, sizeof(*stipple) ); + cell->dirty |= CELL_NEW_STIPPLE; +} + + + +static void * +cell_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + return mem_dup(rasterizer, sizeof(*rasterizer)); +} + + +static void +cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast) +{ + struct pipe_rasterizer_state *rasterizer = + (struct pipe_rasterizer_state *) rast; + struct cell_context *cell = cell_context(pipe); + + /* pass-through to draw module */ + draw_set_rasterizer_state(cell->draw, rasterizer); + + cell->rasterizer = rasterizer; + + cell->dirty |= CELL_NEW_RASTERIZER; +} + + +static void +cell_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer) +{ + FREE(rasterizer); +} + + + +static void * +cell_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + return mem_dup(sampler, sizeof(*sampler)); +} + + +static void +cell_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **samplers) +{ + struct cell_context *cell = cell_context(pipe); + uint i, changed = 0x0; + + assert(num <= CELL_MAX_SAMPLERS); + + draw_flush(cell->draw); + + for (i = 0; i < CELL_MAX_SAMPLERS; i++) { + struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL; + if (cell->sampler[i] != new_samp) { + cell->sampler[i] = new_samp; + changed |= (1 << i); + } + } + + if (changed) { + cell->dirty |= CELL_NEW_SAMPLER; + cell->dirty_samplers |= changed; + } +} + + +static void +cell_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + + +static void +cell_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) +{ + struct cell_context *cell = cell_context(pipe); + uint i, changed = 0x0; + + assert(num <= CELL_MAX_SAMPLERS); + + for (i = 0; i < CELL_MAX_SAMPLERS; i++) { + struct pipe_texture *new_tex = i < num ? texture[i] : NULL; + if ((struct pipe_texture *) cell->texture[i] != new_tex) { + pipe_texture_reference((struct pipe_texture **) &cell->texture[i], + new_tex); + changed |= (1 << i); + } + } + + cell->num_textures = num; + + if (changed) { + cell->dirty |= CELL_NEW_TEXTURE; + cell->dirty_textures |= changed; + } +} + + + +static void +cell_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct cell_context *cell = cell_context(pipe); + + if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { + struct pipe_surface *csurf = fb->cbufs[0]; + struct pipe_surface *zsurf = fb->zsbuf; + uint i; + uint flags = (PIPE_BUFFER_USAGE_GPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ); + + /* unmap old surfaces */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (cell->framebuffer.cbufs[i] && cell->cbuf_map[i]) { + pipe_surface_unmap(cell->framebuffer.cbufs[i]); + cell->cbuf_map[i] = NULL; + } + } + + if (cell->framebuffer.zsbuf && cell->zsbuf_map) { + pipe_surface_unmap(cell->framebuffer.zsbuf); + cell->zsbuf_map = NULL; + } + + /* Finish any pending rendering to the current surface before + * installing a new surface! + */ + cell_flush_int(cell, CELL_FLUSH_WAIT); + + /* update my state + * (this is also where old surfaces will finally get freed) + */ + cell->framebuffer.width = fb->width; + cell->framebuffer.height = fb->height; + cell->framebuffer.num_cbufs = fb->num_cbufs; + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]); + } + pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf); + + /* map new surfaces */ + if (csurf) + cell->cbuf_map[0] = pipe_surface_map(csurf, flags); + + if (zsurf) + cell->zsbuf_map = pipe_surface_map(zsurf, flags); + + cell->dirty |= CELL_NEW_FRAMEBUFFER; + } +} + + + +void +cell_init_state_functions(struct cell_context *cell) +{ + cell->pipe.create_blend_state = cell_create_blend_state; + cell->pipe.bind_blend_state = cell_bind_blend_state; + cell->pipe.delete_blend_state = cell_delete_blend_state; + + cell->pipe.create_sampler_state = cell_create_sampler_state; + cell->pipe.bind_sampler_states = cell_bind_sampler_states; + cell->pipe.delete_sampler_state = cell_delete_sampler_state; + + cell->pipe.set_sampler_textures = cell_set_sampler_textures; + + cell->pipe.create_depth_stencil_alpha_state = cell_create_depth_stencil_alpha_state; + cell->pipe.bind_depth_stencil_alpha_state = cell_bind_depth_stencil_alpha_state; + cell->pipe.delete_depth_stencil_alpha_state = cell_delete_depth_stencil_alpha_state; + + cell->pipe.create_rasterizer_state = cell_create_rasterizer_state; + cell->pipe.bind_rasterizer_state = cell_bind_rasterizer_state; + cell->pipe.delete_rasterizer_state = cell_delete_rasterizer_state; + + cell->pipe.set_blend_color = cell_set_blend_color; + cell->pipe.set_clip_state = cell_set_clip_state; + + cell->pipe.set_framebuffer_state = cell_set_framebuffer_state; + + cell->pipe.set_polygon_stipple = cell_set_polygon_stipple; + cell->pipe.set_scissor_state = cell_set_scissor_state; + cell->pipe.set_viewport_state = cell_set_viewport_state; +} diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.h b/src/gallium/drivers/cell/ppu/cell_pipe_state.h new file mode 100644 index 0000000000..1889bd52ff --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_PIPE_STATE_H +#define CELL_PIPE_STATE_H + + +struct cell_context; + +extern void +cell_init_state_functions(struct cell_context *cell); + + +#endif /* CELL_PIPE_STATE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c new file mode 100644 index 0000000000..79cb8df82f --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -0,0 +1,211 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Last stage of 'draw' pipeline: send tris to SPUs. + * \author Brian Paul + */ + +#include "cell_context.h" +#include "cell_render.h" +#include "cell_spu.h" +#include "util/u_memory.h" +#include "draw/draw_private.h" + + +struct render_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct cell_context *cell; +}; + + +static INLINE struct render_stage * +render_stage(struct draw_stage *stage) +{ + return (struct render_stage *) stage; +} + + +static void render_begin( struct draw_stage *stage ) +{ +#if 0 + struct render_stage *render = render_stage(stage); + struct cell_context *sp = render->cell; + const struct pipe_shader_state *fs = &render->cell->fs->shader; + render->quad.nr_attrs = render->cell->nr_frag_attrs; + + render->firstFpInput = fs->input_semantic_name[0]; + + sp->quad.first->begin(sp->quad.first); +#endif +} + + +static void render_end( struct draw_stage *stage ) +{ +} + + +static void reset_stipple_counter( struct draw_stage *stage ) +{ + struct render_stage *render = render_stage(stage); + /*render->cell->line_stipple_counter = 0;*/ +} + + +static void +render_point(struct draw_stage *stage, struct prim_header *prim) +{ +} + + +static void +render_line(struct draw_stage *stage, struct prim_header *prim) +{ +} + + +/** Write a vertex into the prim buffer */ +static void +save_vertex(struct cell_prim_buffer *buf, uint pos, + const struct vertex_header *vert) +{ + uint attr, j; + + for (attr = 0; attr < 2; attr++) { + for (j = 0; j < 4; j++) { + buf->vertex[pos][attr][j] = vert->data[attr][j]; + } + } + + /* update bounding box */ + if (vert->data[0][0] < buf->xmin) + buf->xmin = vert->data[0][0]; + if (vert->data[0][0] > buf->xmax) + buf->xmax = vert->data[0][0]; + if (vert->data[0][1] < buf->ymin) + buf->ymin = vert->data[0][1]; + if (vert->data[0][1] > buf->ymax) + buf->ymax = vert->data[0][1]; +} + + +static void +render_tri(struct draw_stage *stage, struct prim_header *prim) +{ + struct render_stage *rs = render_stage(stage); + struct cell_context *cell = rs->cell; + struct cell_prim_buffer *buf = &cell->prim_buffer; + uint i; + + if (buf->num_verts + 3 > CELL_MAX_VERTS) { + cell_flush_prim_buffer(cell); + } + + i = buf->num_verts; + assert(i+2 <= CELL_MAX_VERTS); + save_vertex(buf, i+0, prim->v[0]); + save_vertex(buf, i+1, prim->v[1]); + save_vertex(buf, i+2, prim->v[2]); + buf->num_verts += 3; +} + + +/** + * Send the a RENDER command to all SPUs to have them render the prims + * in the current prim_buffer. + */ +void +cell_flush_prim_buffer(struct cell_context *cell) +{ + uint i; + + if (cell->prim_buffer.num_verts == 0) + return; + + for (i = 0; i < cell->num_spus; i++) { + struct cell_command_render *render = &cell_global.command[i].render; + render->prim_type = PIPE_PRIM_TRIANGLES; + render->num_verts = cell->prim_buffer.num_verts; + render->front_winding = cell->rasterizer->front_winding; + render->vertex_size = cell->vertex_info->size * 4; + render->xmin = cell->prim_buffer.xmin; + render->ymin = cell->prim_buffer.ymin; + render->xmax = cell->prim_buffer.xmax; + render->ymax = cell->prim_buffer.ymax; + render->vertex_data = &cell->prim_buffer.vertex; + ASSERT_ALIGN16(render->vertex_data); + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_RENDER); + } + + cell->prim_buffer.num_verts = 0; + + cell->prim_buffer.xmin = 1e100; + cell->prim_buffer.ymin = 1e100; + cell->prim_buffer.xmax = -1e100; + cell->prim_buffer.ymax = -1e100; + + /* XXX temporary, need to double-buffer the prim buffer until we get + * a real command buffer/list system. + */ + cell_flush(&cell->pipe, 0x0); +} + + + +static void render_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create a new draw/render stage. This will be plugged into the + * draw module as the last pipeline stage. + */ +struct draw_stage *cell_draw_render_stage( struct cell_context *cell ) +{ + struct render_stage *render = CALLOC_STRUCT(render_stage); + + render->cell = cell; + render->stage.draw = cell->draw; + render->stage.begin = render_begin; + render->stage.point = render_point; + render->stage.line = render_line; + render->stage.tri = render_tri; + render->stage.end = render_end; + render->stage.reset_stipple_counter = reset_stipple_counter; + render->stage.destroy = render_destroy; + + /* + render->quad.coef = render->coef; + render->quad.posCoef = &render->posCoef; + */ + + return &render->stage; +} diff --git a/src/gallium/drivers/cell/ppu/cell_render.h b/src/gallium/drivers/cell/ppu/cell_render.h new file mode 100644 index 0000000000..826dcbafeb --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_render.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_RENDER_H +#define CELL_RENDER_H + +struct cell_context; +struct draw_stage; + +extern void +cell_flush_prim_buffer(struct cell_context *cell); + +extern struct draw_stage *cell_draw_render_stage( struct cell_context *cell ); + +#endif /* CELL_RENDER_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c new file mode 100644 index 0000000000..d223557950 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -0,0 +1,170 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/p_winsys.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" + +#include "cell/common.h" +#include "cell_screen.h" +#include "cell_texture.h" +#include "cell_winsys.h" + + +static const char * +cell_get_vendor(struct pipe_screen *screen) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +cell_get_name(struct pipe_screen *screen) +{ + return "Cell"; +} + + +static int +cell_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return CELL_MAX_SAMPLERS; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 1; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 10; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return CELL_MAX_TEXTURE_LEVELS; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return CELL_MAX_TEXTURE_LEVELS; + default: + return 10; + } +} + + +static float +cell_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + + default: + return 10; + } +} + + +static boolean +cell_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) +{ + /* cell supports most formats, XXX for now anyway */ + if (format == PIPE_FORMAT_DXT5_RGBA || + format == PIPE_FORMAT_R8G8B8A8_SRGB) + return FALSE; + else + return TRUE; +} + + +static void +cell_destroy_screen( struct pipe_screen *screen ) +{ + struct pipe_winsys *winsys = screen->winsys; + + if(winsys->destroy) + winsys->destroy(winsys); + + FREE(screen); +} + + +/** + * Create a new pipe_screen object + * Note: we're not presently subclassing pipe_screen (no cell_screen) but + * that would be the place to put SPU thread/context info... + */ +struct pipe_screen * +cell_create_screen(struct pipe_winsys *winsys) +{ + struct pipe_screen *screen = CALLOC_STRUCT(pipe_screen); + + if (!screen) + return NULL; + + screen->winsys = winsys; + + screen->destroy = cell_destroy_screen; + + screen->get_name = cell_get_name; + screen->get_vendor = cell_get_vendor; + screen->get_param = cell_get_param; + screen->get_paramf = cell_get_paramf; + screen->is_format_supported = cell_is_format_supported; + + cell_init_screen_texture_funcs(screen); + + return screen; +} diff --git a/src/gallium/drivers/cell/ppu/cell_screen.h b/src/gallium/drivers/cell/ppu/cell_screen.h new file mode 100644 index 0000000000..c7e15889d6 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_screen.h @@ -0,0 +1,41 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_SCREEN_H +#define CELL_SCREEN_H + + +struct pipe_screen; +struct pipe_winsys; + + +extern struct pipe_screen * +cell_create_screen(struct pipe_winsys *winsys); + + +#endif /* CELL_SCREEN_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c new file mode 100644 index 0000000000..28e5e6d706 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -0,0 +1,219 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Utility/wrappers for communicating with the SPUs. + */ + + +#include <pthread.h> + +#include "cell_spu.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" +#include "cell/common.h" + + +/* +helpful headers: +/opt/ibm/cell-sdk/prototype/src/include/ppu/cbe_mfc.h +*/ + + +/** + * Cell/SPU info that's not per-context. + */ +struct cell_global_info cell_global; + + +/** + * Scan /proc/cpuinfo to determine the timebase for the system. + * This is used by the SPUs to convert 'decrementer' ticks to seconds. + * There may be a better way to get this value... + */ +static unsigned +get_timebase(void) +{ + FILE *f = fopen("/proc/cpuinfo", "r"); + unsigned timebase; + + assert(f); + while (!feof(f)) { + char line[80]; + fgets(line, sizeof(line), f); + if (strncmp(line, "timebase", 8) == 0) { + char *colon = strchr(line, ':'); + if (colon) { + timebase = atoi(colon + 2); + break; + } + } + } + fclose(f); + + return timebase; +} + + +/** + * Write a 1-word message to the given SPE mailbox. + */ +void +send_mbox_message(spe_context_ptr_t ctx, unsigned int msg) +{ + spe_in_mbox_write(ctx, &msg, 1, SPE_MBOX_ALL_BLOCKING); +} + + +/** + * Wait for a 1-word message to arrive in given mailbox. + */ +uint +wait_mbox_message(spe_context_ptr_t ctx) +{ + do { + unsigned data; + int count = spe_out_mbox_read(ctx, &data, 1); + + if (count == 1) { + return data; + } + + if (count < 0) { + /* error */ ; + } + } while (1); +} + + +/** + * Called by pthread_create() to spawn an SPU thread. + */ +static void * +cell_thread_function(void *arg) +{ + struct cell_init_info *init = (struct cell_init_info *) arg; + unsigned entry = SPE_DEFAULT_ENTRY; + + ASSERT_ALIGN16(init); + + if (spe_context_run(cell_global.spe_contexts[init->id], &entry, 0, + init, NULL, NULL) < 0) { + fprintf(stderr, "spe_context_run() failed\n"); + exit(1); + } + + pthread_exit(NULL); +} + + +/** + * Create the SPU threads. This is done once during driver initialization. + * This involves setting the the "init" message which is sent to each SPU. + * The init message specifies an SPU id, total number of SPUs, location + * and number of batch buffers, etc. + */ +void +cell_start_spus(struct cell_context *cell) +{ + static boolean one_time_init = FALSE; + uint i, j; + uint timebase = get_timebase(); + + if (one_time_init) { + fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " + "on Cell.\n"); + abort(); + } + + one_time_init = TRUE; + + assert(cell->num_spus <= CELL_MAX_SPUS); + + ASSERT_ALIGN16(&cell_global.inits[0]); + ASSERT_ALIGN16(&cell_global.inits[1]); + + /* + * Initialize the global 'inits' structure for each SPU. + * A pointer to the init struct will be passed to each SPU. + * The SPUs will then each grab their init info with mfc_get(). + */ + for (i = 0; i < cell->num_spus; i++) { + cell_global.inits[i].id = i; + cell_global.inits[i].num_spus = cell->num_spus; + cell_global.inits[i].debug_flags = cell->debug_flags; + cell_global.inits[i].inv_timebase = 1000.0f / timebase; + + for (j = 0; j < CELL_NUM_BUFFERS; j++) { + cell_global.inits[i].buffers[j] = cell->buffer[j]; + } + cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0]; + + cell_global.inits[i].spu_functions = &cell->spu_functions; + + cell_global.spe_contexts[i] = spe_context_create(0, NULL); + if (!cell_global.spe_contexts[i]) { + fprintf(stderr, "spe_context_create() failed\n"); + exit(1); + } + + if (spe_program_load(cell_global.spe_contexts[i], &g3d_spu)) { + fprintf(stderr, "spe_program_load() failed\n"); + exit(1); + } + + pthread_create(&cell_global.spe_threads[i], /* returned thread handle */ + NULL, /* pthread attribs */ + &cell_thread_function, /* start routine */ + &cell_global.inits[i]); /* thread argument */ + } +} + + +/** + * Tell all the SPUs to stop/exit. + * This is done when the driver's exiting / cleaning up. + */ +void +cell_spu_exit(struct cell_context *cell) +{ + uint i; + + for (i = 0; i < cell->num_spus; i++) { + send_mbox_message(cell_global.spe_contexts[i], CELL_CMD_EXIT); + } + + /* wait for threads to exit */ + for (i = 0; i < cell->num_spus; i++) { + void *value; + pthread_join(cell_global.spe_threads[i], &value); + cell_global.spe_threads[i] = 0; + cell_global.spe_contexts[i] = 0; + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h new file mode 100644 index 0000000000..b633880c25 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_spu.h @@ -0,0 +1,80 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_SPU +#define CELL_SPU + + +#include <libspe2.h> +#include <libmisc.h> +#include <pthread.h> +#include "cell/common.h" + +#include "cell_context.h" + + +/** + * Global vars, for now anyway. + */ +struct cell_global_info +{ + /** + * SPU/SPE handles, etc + */ + spe_context_ptr_t spe_contexts[CELL_MAX_SPUS]; + pthread_t spe_threads[CELL_MAX_SPUS]; + + /** + * Data sent to SPUs at start-up + */ + struct cell_init_info inits[CELL_MAX_SPUS]; +}; + + +extern struct cell_global_info cell_global; + + +/** This is the handle for the actual SPE code */ +extern spe_program_handle_t g3d_spu; + + +extern void +send_mbox_message(spe_context_ptr_t ctx, unsigned int msg); + +extern uint +wait_mbox_message(spe_context_ptr_t ctx); + + +extern void +cell_start_spus(struct cell_context *cell); + + +extern void +cell_spu_exit(struct cell_context *cell); + + +#endif /* CELL_SPU */ diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h new file mode 100644 index 0000000000..b193170f9c --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -0,0 +1,65 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_STATE_H +#define CELL_STATE_H + + +#define CELL_NEW_VIEWPORT 0x1 +#define CELL_NEW_RASTERIZER 0x2 +#define CELL_NEW_FS 0x4 +#define CELL_NEW_BLEND 0x8 +#define CELL_NEW_CLIP 0x10 +#define CELL_NEW_SCISSOR 0x20 +#define CELL_NEW_STIPPLE 0x40 +#define CELL_NEW_FRAMEBUFFER 0x80 +#define CELL_NEW_ALPHA_TEST 0x100 +#define CELL_NEW_DEPTH_STENCIL 0x200 +#define CELL_NEW_SAMPLER 0x400 +#define CELL_NEW_TEXTURE 0x800 +#define CELL_NEW_VERTEX 0x1000 +#define CELL_NEW_VS 0x2000 +#define CELL_NEW_VS_CONSTANTS 0x4000 +#define CELL_NEW_FS_CONSTANTS 0x8000 +#define CELL_NEW_VERTEX_INFO 0x10000 + + +extern void +cell_update_derived( struct cell_context *softpipe ); + + +extern void +cell_init_shader_functions(struct cell_context *cell); + + +extern void +cell_init_vertex_functions(struct cell_context *cell); + + +#endif /* CELL_STATE_H */ + diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c new file mode 100644 index 0000000000..efc4f78364 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -0,0 +1,170 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_state.h" +#include "cell_state_emit.h" + + +/** + * Determine how to map vertex program outputs to fragment program inputs. + * Basically, this will be used when computing the triangle interpolation + * coefficients from the post-transform vertex attributes. + */ +static void +calculate_vertex_layout( struct cell_context *cell ) +{ + const struct cell_fragment_shader_state *fs = cell->fs; + const enum interp_mode colorInterp + = cell->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + struct vertex_info *vinfo = &cell->vertex_info; + uint i; + int src; + +#if 0 + if (cell->vbuf) { + /* if using the post-transform vertex buffer, tell draw_vbuf to + * simply emit the whole post-xform vertex as-is: + */ + struct vertex_info *vinfo_vbuf = &cell->vertex_info_vbuf; + vinfo_vbuf->num_attribs = 0; + draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); + vinfo_vbuf->size = 4 * vs->num_outputs + sizeof(struct vertex_header)/4; + } +#endif + + /* reset vinfo */ + vinfo->num_attribs = 0; + + /* we always want to emit vertex pos */ + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_POSITION, 0); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + + + /* + * Loop over fragment shader inputs, searching for the matching output + * from the vertex shader. + */ + for (i = 0; i < fs->info.num_inputs; i++) { + switch (fs->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + /* already done above */ + break; + + case TGSI_SEMANTIC_COLOR: + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_COLOR, + fs->info.input_semantic_index[i]); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + break; + + case TGSI_SEMANTIC_FOG: + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_FOG, 0); +#if 1 + if (src < 0) /* XXX temp hack, try demos/fogcoord.c with this */ + src = 0; +#endif + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + break; + + case TGSI_SEMANTIC_GENERIC: + /* this includes texcoords and varying vars */ + src = draw_find_vs_output(cell->draw, TGSI_SEMANTIC_GENERIC, + fs->info.input_semantic_index[i]); + assert(src >= 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + break; + + default: + assert(0); + } + } + + draw_compute_vertex_size(vinfo); + + /* XXX only signal this if format really changes */ + cell->dirty |= CELL_NEW_VERTEX_INFO; +} + + +#if 0 +/** + * Recompute cliprect from scissor bounds, scissor enable and surface size. + */ +static void +compute_cliprect(struct cell_context *sp) +{ + uint surfWidth = sp->framebuffer.width; + uint surfHeight = sp->framebuffer.height; + + if (sp->rasterizer->scissor) { + /* clip to scissor rect */ + sp->cliprect.minx = MAX2(sp->scissor.minx, 0); + sp->cliprect.miny = MAX2(sp->scissor.miny, 0); + sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); + sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); + } + else { + /* clip to surface bounds */ + sp->cliprect.minx = 0; + sp->cliprect.miny = 0; + sp->cliprect.maxx = surfWidth; + sp->cliprect.maxy = surfHeight; + } +} +#endif + + + +/** + * Update derived state, send current state to SPUs prior to rendering. + */ +void cell_update_derived( struct cell_context *cell ) +{ + if (cell->dirty & (CELL_NEW_RASTERIZER | + CELL_NEW_FS | + CELL_NEW_VS)) + calculate_vertex_layout( cell ); + +#if 0 + if (cell->dirty & (CELL_NEW_SCISSOR | + CELL_NEW_DEPTH_STENCIL_ALPHA | + CELL_NEW_FRAMEBUFFER)) + compute_cliprect(cell); +#endif + + cell_emit_state(cell); + + cell->dirty = 0; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c new file mode 100644 index 0000000000..dd2d7f7d1e --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -0,0 +1,271 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "cell_context.h" +#include "cell_gen_fragment.h" +#include "cell_state.h" +#include "cell_state_emit.h" +#include "cell_batch.h" +#include "cell_texture.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" + + +/** + * Find/create a cell_command_fragment_ops object corresponding to the + * current blend/stencil/z/colormask/etc. state. + */ +static struct cell_command_fragment_ops * +lookup_fragment_ops(struct cell_context *cell) +{ + struct cell_fragment_ops_key key; + struct cell_command_fragment_ops *ops; + + /* + * Build key + */ + memset(&key, 0, sizeof(key)); + key.blend = *cell->blend; + key.blend_color = cell->blend_color; + key.dsa = *cell->depth_stencil; + + if (cell->framebuffer.cbufs[0]) + key.color_format = cell->framebuffer.cbufs[0]->format; + else + key.color_format = PIPE_FORMAT_NONE; + + if (cell->framebuffer.zsbuf) + key.zs_format = cell->framebuffer.zsbuf->format; + else + key.zs_format = PIPE_FORMAT_NONE; + + /* + * Look up key in cache. + */ + ops = (struct cell_command_fragment_ops *) + util_keymap_lookup(cell->fragment_ops_cache, &key); + + /* + * If not found, create/save new fragment ops command. + */ + if (!ops) { + struct spe_function spe_code; + + if (0) + debug_printf("**** Create New Fragment Ops\n"); + + /* Prepare the buffer that will hold the generated code. */ + spe_init_func(&spe_code, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + + /* generate new code */ + cell_gen_fragment_function(cell, &spe_code); + + /* alloc new fragment ops command */ + ops = CALLOC_STRUCT(cell_command_fragment_ops); + + /* populate the new cell_command_fragment_ops object */ + ops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; + memcpy(ops->code, spe_code.store, spe_code_size(&spe_code)); + ops->dsa = *cell->depth_stencil; + ops->blend = *cell->blend; + + /* insert cell_command_fragment_ops object into keymap/cache */ + util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL); + + /* release rtasm buffer */ + spe_release_func(&spe_code); + } + else { + if (0) + debug_printf("**** Re-use Fragment Ops\n"); + } + + return ops; +} + + + +static void +emit_state_cmd(struct cell_context *cell, uint cmd, + const void *state, uint state_size) +{ + uint64_t *dst = (uint64_t *) + cell_batch_alloc(cell, ROUNDUP8(sizeof(uint64_t) + state_size)); + *dst = cmd; + memcpy(dst + 1, state, state_size); +} + + +/** + * For state marked as 'dirty', construct a state-update command block + * and insert it into the current batch buffer. + */ +void +cell_emit_state(struct cell_context *cell) +{ + if (cell->dirty & CELL_NEW_FRAMEBUFFER) { + struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; + struct pipe_surface *zbuf = cell->framebuffer.zsbuf; + struct cell_command_framebuffer *fb + = cell_batch_alloc(cell, sizeof(*fb)); + fb->opcode = CELL_CMD_STATE_FRAMEBUFFER; + fb->color_start = cell->cbuf_map[0]; + fb->color_format = cbuf->format; + fb->depth_start = cell->zsbuf_map; + fb->depth_format = zbuf ? zbuf->format : PIPE_FORMAT_NONE; + fb->width = cell->framebuffer.width; + fb->height = cell->framebuffer.height; +#if 0 + printf("EMIT color format %s\n", pf_name(fb->color_format)); + printf("EMIT depth format %s\n", pf_name(fb->depth_format)); +#endif + } + + if (cell->dirty & (CELL_NEW_RASTERIZER)) { + struct cell_command_rasterizer *rast = + cell_batch_alloc(cell, sizeof(*rast)); + rast->opcode = CELL_CMD_STATE_RASTERIZER; + rast->rasterizer = *cell->rasterizer; + } + + if (cell->dirty & (CELL_NEW_FS)) { + /* Send new fragment program to SPUs */ + struct cell_command_fragment_program *fp + = cell_batch_alloc(cell, sizeof(*fp)); + fp->opcode = CELL_CMD_STATE_FRAGMENT_PROGRAM; + fp->num_inst = cell->fs->code.num_inst; + memcpy(&fp->code, cell->fs->code.store, + SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); + if (0) { + int i; + printf("PPU Emit CELL_CMD_STATE_FRAGMENT_PROGRAM:\n"); + for (i = 0; i < fp->num_inst; i++) { + printf(" %3d: 0x%08x\n", i, fp->code[i]); + } + } + } + + if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) { + const uint shader = PIPE_SHADER_FRAGMENT; + const uint num_const = cell->constants[shader].size / sizeof(float); + uint i, j; + float *buf = cell_batch_alloc(cell, 16 + num_const * sizeof(float)); + uint64_t *ibuf = (uint64_t *) buf; + const float *constants = pipe_buffer_map(cell->pipe.screen, + cell->constants[shader].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS; + ibuf[1] = num_const; + j = 4; + for (i = 0; i < num_const; i++) { + buf[j++] = constants[i]; + } + pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader].buffer); + } + + if (cell->dirty & (CELL_NEW_FRAMEBUFFER | + CELL_NEW_DEPTH_STENCIL | + CELL_NEW_BLEND)) { + struct cell_command_fragment_ops *fops, *fops_cmd; + fops_cmd = cell_batch_alloc(cell, sizeof(*fops_cmd)); + fops = lookup_fragment_ops(cell); + memcpy(fops_cmd, fops, sizeof(*fops)); + } + + if (cell->dirty & CELL_NEW_SAMPLER) { + uint i; + for (i = 0; i < CELL_MAX_SAMPLERS; i++) { + if (cell->dirty_samplers & (1 << i)) { + if (cell->sampler[i]) { + struct cell_command_sampler *sampler + = cell_batch_alloc(cell, sizeof(*sampler)); + sampler->opcode = CELL_CMD_STATE_SAMPLER; + sampler->unit = i; + sampler->state = *cell->sampler[i]; + } + } + } + cell->dirty_samplers = 0x0; + } + + if (cell->dirty & CELL_NEW_TEXTURE) { + uint i; + for (i = 0;i < CELL_MAX_SAMPLERS; i++) { + if (cell->dirty_textures & (1 << i)) { + struct cell_command_texture *texture + = cell_batch_alloc(cell, sizeof(*texture)); + texture->opcode = CELL_CMD_STATE_TEXTURE; + texture->unit = i; + if (cell->texture[i]) { + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + texture->start[level] = cell->texture[i]->tiled_mapped[level]; + texture->width[level] = cell->texture[i]->base.width[level]; + texture->height[level] = cell->texture[i]->base.height[level]; + texture->depth[level] = cell->texture[i]->base.depth[level]; + } + texture->target = cell->texture[i]->base.target; + } + else { + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + texture->start[level] = NULL; + texture->width[level] = 0; + texture->height[level] = 0; + texture->depth[level] = 0; + } + texture->target = 0; + } + } + } + cell->dirty_textures = 0x0; + } + + if (cell->dirty & CELL_NEW_VERTEX_INFO) { + emit_state_cmd(cell, CELL_CMD_STATE_VERTEX_INFO, + &cell->vertex_info, sizeof(struct vertex_info)); + } + +#if 0 + if (cell->dirty & CELL_NEW_VS) { + const struct draw_context *const draw = cell->draw; + struct cell_shader_info info; + + info.num_outputs = draw_num_vs_outputs(draw); + info.declarations = (uintptr_t) draw->vs.machine.Declarations; + info.num_declarations = draw->vs.machine.NumDeclarations; + info.instructions = (uintptr_t) draw->vs.machine.Instructions; + info.num_instructions = draw->vs.machine.NumInstructions; + info.immediates = (uintptr_t) draw->vs.machine.Imms; + info.num_immediates = draw->vs.machine.ImmLimit / 4; + + emit_state_cmd(cell, CELL_CMD_STATE_BIND_VS, &info, sizeof(info)); + } +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.h b/src/gallium/drivers/cell/ppu/cell_state_emit.h new file mode 100644 index 0000000000..59f8affe8d --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.h @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_STATE_EMIT_H +#define CELL_STATE_EMIT_H + + +extern void +cell_emit_state(struct cell_context *cell); + + +#endif /* CELL_STATE_EMIT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c new file mode 100644 index 0000000000..78cb446c14 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -0,0 +1,1430 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Generate code to perform all per-fragment operations. + * + * Code generated by these functions perform both alpha, depth, and stencil + * testing as well as alpha blending. + * + * \note + * Occlusion query is not supported, but this is the right place to add that + * support. + * + * \author Ian Romanick <idr@us.ibm.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "cell_context.h" + +#include "rtasm/rtasm_ppc_spe.h" + + +/** + * Generate code to perform alpha testing. + * + * The code generated by this function uses the register specificed by + * \c mask as both an input and an output. + * + * \param dsa Current alpha-test state + * \param f Function to which code should be appended + * \param mask Index of register containing active fragment mask + * \param alphas Index of register containing per-fragment alpha values + * + * \note Emits a maximum of 6 instructions. + */ +static void +emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask, int alphas) +{ + /* If the alpha function is either NEVER or ALWAYS, there is no need to + * load the reference value into a register. ALWAYS is a fairly common + * case, and this optimization saves 2 instructions. + */ + if (dsa->alpha.enabled + && (dsa->alpha.func != PIPE_FUNC_NEVER) + && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { + int ref = spe_allocate_available_register(f); + int tmp_a = spe_allocate_available_register(f); + int tmp_b = spe_allocate_available_register(f); + union { + float f; + unsigned u; + } ref_val; + boolean complement = FALSE; + + ref_val.f = dsa->alpha.ref; + + spe_il(f, ref, ref_val.u & 0x0000ffff); + spe_ilh(f, ref, ref_val.u >> 16); + + switch (dsa->alpha.func) { + case PIPE_FUNC_NOTEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_EQUAL: + spe_fceq(f, tmp_a, ref, alphas); + break; + + case PIPE_FUNC_LEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_GREATER: + spe_fcgt(f, tmp_a, ref, alphas); + break; + + case PIPE_FUNC_LESS: + complement = TRUE; + /* FALLTHROUGH */ + + case PIPE_FUNC_GEQUAL: + spe_fcgt(f, tmp_a, ref, alphas); + spe_fceq(f, tmp_b, ref, alphas); + spe_or(f, tmp_a, tmp_b, tmp_a); + break; + + case PIPE_FUNC_ALWAYS: + case PIPE_FUNC_NEVER: + default: + assert(0); + break; + } + + if (complement) { + spe_andc(f, mask, mask, tmp_a); + } else { + spe_and(f, mask, mask, tmp_a); + } + + spe_release_register(f, ref); + spe_release_register(f, tmp_a); + spe_release_register(f, tmp_b); + } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) { + spe_il(f, mask, 0); + } +} + + +/** + * Generate code to perform Z testing. Four Z values are tested at once. + * \param dsa Current depth-test state + * \param f Function to which code should be appended + * \param mask Index of register to contain depth-pass mask + * \param stored Index of register containing values from depth buffer + * \param calculated Index of register containing per-fragment depth values + * + * \return + * If the calculated depth comparison mask is the actual mask, \c FALSE is + * returned. If the calculated depth comparison mask is the compliment of + * the actual mask, \c TRUE is returned. + * + * \note Emits a maximum of 3 instructions. + */ +static boolean +emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, int mask, int stored, int calculated) +{ + unsigned func = (dsa->depth.enabled) + ? dsa->depth.func : PIPE_FUNC_ALWAYS; + int tmp = spe_allocate_available_register(f); + boolean compliment = FALSE; + + switch (func) { + case PIPE_FUNC_NEVER: + spe_il(f, mask, 0); + break; + + case PIPE_FUNC_NOTEQUAL: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_EQUAL: + spe_ceq(f, mask, calculated, stored); + break; + + case PIPE_FUNC_LEQUAL: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GREATER: + spe_clgt(f, mask, calculated, stored); + break; + + case PIPE_FUNC_LESS: + compliment = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GEQUAL: + spe_clgt(f, mask, calculated, stored); + spe_ceq(f, tmp, calculated, stored); + spe_or(f, mask, mask, tmp); + break; + + case PIPE_FUNC_ALWAYS: + spe_il(f, mask, ~0); + break; + + default: + assert(0); + break; + } + + spe_release_register(f, tmp); + return compliment; +} + + +/** + * Generate code to apply the stencil operation (after testing). + * \note Emits a maximum of 5 instructions. + * + * \warning + * Since \c out and \c in might be the same register, this routine cannot + * generate code that uses \c out as a temporary. + */ +static void +emit_stencil_op(struct spe_function *f, + int out, int in, int mask, unsigned op, unsigned ref) +{ + const int clamp = spe_allocate_available_register(f); + const int clamp_mask = spe_allocate_available_register(f); + const int result = spe_allocate_available_register(f); + + switch(op) { + case PIPE_STENCIL_OP_KEEP: + assert(0); + case PIPE_STENCIL_OP_ZERO: + spe_il(f, result, 0); + break; + case PIPE_STENCIL_OP_REPLACE: + spe_il(f, result, ref); + break; + case PIPE_STENCIL_OP_INCR: + /* clamp = [0xff, 0xff, 0xff, 0xff] */ + spe_il(f, clamp, 0x0ff); + /* result[i] = in[i] + 1 */ + spe_ai(f, result, in, 1); + /* clamp_mask[i] = (result[i] > 0xff) */ + spe_clgti(f, clamp_mask, result, 0x0ff); + /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */ + spe_selb(f, result, result, clamp, clamp_mask); + break; + case PIPE_STENCIL_OP_DECR: + spe_il(f, clamp, 0); + spe_ai(f, result, in, -1); + + /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned + * arithmetic. + */ + spe_clgti(f, clamp_mask, result, 0x0ff); + spe_selb(f, result, result, clamp, clamp_mask); + break; + case PIPE_STENCIL_OP_INCR_WRAP: + spe_ai(f, result, in, 1); + break; + case PIPE_STENCIL_OP_DECR_WRAP: + spe_ai(f, result, in, -1); + break; + case PIPE_STENCIL_OP_INVERT: + spe_nor(f, result, in, in); + break; + default: + assert(0); + } + + spe_selb(f, out, in, result, mask); + + spe_release_register(f, result); + spe_release_register(f, clamp_mask); + spe_release_register(f, clamp); +} + + +/** + * Generate code to do stencil test. Four pixels are tested at once. + * \param dsa Depth / stencil test state + * \param face 0 for front face, 1 for back face + * \param f Function to append instructions to + * \param mask Register containing mask of fragments passing the + * alpha test + * \param depth_mask Register containing mask of fragments passing the + * depth test + * \param depth_compliment Is \c depth_mask the compliment of the actual mask? + * \param stencil Register containing values from stencil buffer + * \param depth_pass Register to store mask of fragments passing stencil test + * and depth test + * + * \note + * Emits a maximum of 10 + (3 * 5) = 25 instructions. + */ +static int +emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, + unsigned face, + struct spe_function *f, + int mask, + int depth_mask, + boolean depth_complement, + int stencil, + int depth_pass) +{ + int stencil_fail = spe_allocate_available_register(f); + int depth_fail = spe_allocate_available_register(f); + int stencil_mask = spe_allocate_available_register(f); + int stencil_pass = spe_allocate_available_register(f); + int face_stencil = spe_allocate_available_register(f); + int stencil_src = stencil; + const unsigned ref = (dsa->stencil[face].ref_value + & dsa->stencil[face].value_mask); + boolean complement = FALSE; + int stored; + int tmp = spe_allocate_available_register(f); + + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS) + && (dsa->stencil[face].value_mask != 0x0ff)) { + stored = spe_allocate_available_register(f); + spe_andi(f, stored, stencil, dsa->stencil[face].value_mask); + } else { + stored = stencil; + } + + + switch (dsa->stencil[face].func) { + case PIPE_FUNC_NEVER: + spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */ + break; + + case PIPE_FUNC_NOTEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_EQUAL: + /* stencil_mask[i] = (stored[i] == ref) */ + spe_ceqi(f, stencil_mask, stored, ref); + break; + + case PIPE_FUNC_LEQUAL: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GREATER: + complement = TRUE; + /* stencil_mask[i] = (stored[i] > ref) */ + spe_clgti(f, stencil_mask, stored, ref); + break; + + case PIPE_FUNC_LESS: + complement = TRUE; + /* FALLTHROUGH */ + case PIPE_FUNC_GEQUAL: + /* stencil_mask[i] = (stored[i] > ref) */ + spe_clgti(f, stencil_mask, stored, ref); + /* tmp[i] = (stored[i] == ref) */ + spe_ceqi(f, tmp, stored, ref); + /* stencil_mask[i] = stencil_mask[i] | tmp[i] */ + spe_or(f, stencil_mask, stencil_mask, tmp); + break; + + case PIPE_FUNC_ALWAYS: + /* See comment below. */ + break; + + default: + assert(0); + break; + } + + if (stored != stencil) { + spe_release_register(f, stored); + } + spe_release_register(f, tmp); + + + /* ALWAYS is a very common stencil-test, so some effort is applied to + * optimize that case. The stencil-pass mask is the same as the input + * fragment mask. This makes the stencil-test (above) a no-op, and the + * input fragment mask can be "renamed" the stencil-pass mask. + */ + if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) { + spe_release_register(f, stencil_pass); + stencil_pass = mask; + } else { + if (complement) { + spe_andc(f, stencil_pass, mask, stencil_mask); + } else { + spe_and(f, stencil_pass, mask, stencil_mask); + } + } + + if (depth_complement) { + spe_andc(f, depth_pass, stencil_pass, depth_mask); + } else { + spe_and(f, depth_pass, stencil_pass, depth_mask); + } + + + /* Conditionally emit code to update the stencil value under various + * condititons. Note that there is no need to generate code under the + * following circumstances: + * + * - Stencil write mask is zero. + * - For stencil-fail if the stencil test is ALWAYS + * - For depth-fail if the stencil test is NEVER + * - For depth-pass if the stencil test is NEVER + * - Any of the 3 conditions if the operation is KEEP + */ + if (dsa->stencil[face].write_mask != 0) { + if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS) + && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) { + if (complement) { + spe_and(f, stencil_fail, mask, stencil_mask); + } else { + spe_andc(f, stencil_fail, mask, stencil_mask); + } + + emit_stencil_op(f, face_stencil, stencil_src, stencil_fail, + dsa->stencil[face].fail_op, + dsa->stencil[face].ref_value); + + stencil_src = face_stencil; + } + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) { + if (depth_complement) { + spe_and(f, depth_fail, stencil_pass, depth_mask); + } else { + spe_andc(f, depth_fail, stencil_pass, depth_mask); + } + + emit_stencil_op(f, face_stencil, stencil_src, depth_fail, + dsa->stencil[face].zfail_op, + dsa->stencil[face].ref_value); + stencil_src = face_stencil; + } + + if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) + && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) { + emit_stencil_op(f, face_stencil, stencil_src, depth_pass, + dsa->stencil[face].zpass_op, + dsa->stencil[face].ref_value); + stencil_src = face_stencil; + } + } + + spe_release_register(f, stencil_fail); + spe_release_register(f, depth_fail); + spe_release_register(f, stencil_mask); + if (stencil_pass != mask) { + spe_release_register(f, stencil_pass); + } + + /* If all of the stencil operations were KEEP or the stencil write mask was + * zero, "stencil_src" will still be set to "stencil". In this case + * release the "face_stencil" register. Otherwise apply the stencil write + * mask to select bits from the calculated stencil value and the previous + * stencil value. + */ + if (stencil_src == stencil) { + spe_release_register(f, face_stencil); + } else if (dsa->stencil[face].write_mask != 0x0ff) { + int tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, dsa->stencil[face].write_mask); + spe_selb(f, stencil_src, stencil, stencil_src, tmp); + + spe_release_register(f, tmp); + } + + return stencil_src; +} + + +void +cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) +{ + struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base; + struct spe_function *const f = &cdsa->code; + + /* This code generates a maximum of 6 (alpha test) + 3 (depth test) + * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round + * up to 64 to make it a happy power-of-two. + */ + spe_init_func(f, SPE_INST_SIZE * 64); + + + /* Allocate registers for the function's input parameters. Cleverly (and + * clever code is usually dangerous, but I couldn't resist) the generated + * function returns a structure. Returned structures start with register + * 3, and the structure fields are ordered to match up exactly with the + * input parameters. + */ + int mask = spe_allocate_register(f, 3); + int depth = spe_allocate_register(f, 4); + int stencil = spe_allocate_register(f, 5); + int zvals = spe_allocate_register(f, 6); + int frag_a = spe_allocate_register(f, 7); + int facing = spe_allocate_register(f, 8); + + int depth_mask = spe_allocate_available_register(f); + + boolean depth_complement; + + + emit_alpha_test(dsa, f, mask, frag_a); + + depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals); + + if (dsa->stencil[0].enabled) { + const int front_depth_pass = spe_allocate_available_register(f); + int front_stencil = emit_stencil_test(dsa, 0, f, mask, + depth_mask, depth_complement, + stencil, front_depth_pass); + + if (dsa->stencil[1].enabled) { + const int back_depth_pass = spe_allocate_available_register(f); + int back_stencil = emit_stencil_test(dsa, 1, f, mask, + depth_mask, depth_complement, + stencil, back_depth_pass); + + /* If the front facing stencil value and the back facing stencil + * value are stored in the same register, there is no need to select + * a value based on the facing. This can happen if the stencil value + * was not modified due to the write masks being zero, the stencil + * operations being KEEP, etc. + */ + if (front_stencil != back_stencil) { + spe_selb(f, stencil, back_stencil, front_stencil, facing); + } + + if (back_stencil != stencil) { + spe_release_register(f, back_stencil); + } + + if (front_stencil != stencil) { + spe_release_register(f, front_stencil); + } + + spe_selb(f, mask, back_depth_pass, front_depth_pass, facing); + + spe_release_register(f, back_depth_pass); + } else { + if (front_stencil != stencil) { + spe_or(f, stencil, front_stencil, front_stencil); + spe_release_register(f, front_stencil); + } + spe_or(f, mask, front_depth_pass, front_depth_pass); + } + + spe_release_register(f, front_depth_pass); + } else if (dsa->depth.enabled) { + if (depth_complement) { + spe_andc(f, mask, mask, depth_mask); + } else { + spe_and(f, mask, mask, depth_mask); + } + } + + if (dsa->depth.writemask) { + spe_selb(f, depth, depth, zvals, mask); + } + + spe_bi(f, 0, 0, 0); /* return from function call */ + + +#if 0 + { + const uint32_t *p = f->store; + unsigned i; + + printf("# alpha (%sabled)\n", + (dsa->alpha.enabled) ? "en" : "dis"); + printf("# func: %u\n", dsa->alpha.func); + printf("# ref: %.2f\n", dsa->alpha.ref); + + printf("# depth (%sabled)\n", + (dsa->depth.enabled) ? "en" : "dis"); + printf("# func: %u\n", dsa->depth.func); + + for (i = 0; i < 2; i++) { + printf("# %s stencil (%sabled)\n", + (i == 0) ? "front" : "back", + (dsa->stencil[i].enabled) ? "en" : "dis"); + + printf("# func: %u\n", dsa->stencil[i].func); + printf("# op (sf, zf, zp): %u %u %u\n", + dsa->stencil[i].fail_op, + dsa->stencil[i].zfail_op, + dsa->stencil[i].zpass_op); + printf("# ref value / value mask / write mask: %02x %02x %02x\n", + dsa->stencil[i].ref_value, + dsa->stencil[i].value_mask, + dsa->stencil[i].write_mask); + } + + printf("\t.text\n"); + for (/* empty */; p < f->csr; p++) { + printf("\t.long\t0x%04x\n", *p); + } + fflush(stdout); + } +#endif +} + + +/** + * \note Emits a maximum of 3 instructions + */ +static int +emit_alpha_factor_calculation(struct spe_function *f, + unsigned factor, + int src_alpha, int dst_alpha, int const_alpha) +{ + int factor_reg; + int tmp; + + + switch (factor) { + case PIPE_BLENDFACTOR_ONE: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA: + factor_reg = spe_allocate_available_register(f); + + spe_or(f, factor_reg, src_alpha, src_alpha); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + factor_reg = dst_alpha; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + factor_reg = spe_allocate_available_register(f); + + tmp = spe_allocate_available_register(f); + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor_reg, tmp, const_alpha); + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: + factor_reg = const_alpha; + break; + + case PIPE_BLENDFACTOR_ZERO: + factor_reg = -1; + break; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + tmp = spe_allocate_available_register(f); + factor_reg = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor_reg, tmp, src_alpha); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + tmp = spe_allocate_available_register(f); + factor_reg = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor_reg, tmp, dst_alpha); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: + assert(0); + factor_reg = -1; + break; + } + + return factor_reg; +} + + +/** + * \note Emits a maximum of 6 instructions + */ +static void +emit_color_factor_calculation(struct spe_function *f, + unsigned sF, unsigned mask, + const int *src, + const int *dst, + const int *const_color, + int *factor) +{ + int tmp; + unsigned i; + + + factor[0] = -1; + factor[1] = -1; + factor[2] = -1; + factor[3] = -1; + + switch (sF) { + case PIPE_BLENDFACTOR_ONE: + break; + + case PIPE_BLENDFACTOR_SRC_COLOR: + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_or(f, factor[i], src[i], src[i]); + } + } + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA: + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_or(f, factor[0], src[3], src[3]); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + factor[0] = dst[3]; + factor[1] = dst[3]; + factor[2] = dst[3]; + break; + + case PIPE_BLENDFACTOR_DST_COLOR: + factor[0] = dst[0]; + factor[1] = dst[1]; + factor[2] = dst[2]; + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + /* Alpha saturate means min(As, 1-Ad). + */ + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, tmp, tmp, dst[3]); + spe_fcgt(f, factor[0], tmp, src[3]); + spe_selb(f, factor[0], src[3], tmp, factor[0]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + tmp = spe_allocate_available_register(f); + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + + for (i = 0; i < 3; i++) { + factor[i] = spe_allocate_available_register(f); + + spe_fs(f, factor[i], tmp, const_color[i]); + } + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_CONST_COLOR: + for (i = 0; i < 3; i++) { + factor[i] = const_color[i]; + } + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + tmp = spe_allocate_available_register(f); + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor[0], tmp, const_color[3]); + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: + factor[0] = const_color[3]; + factor[1] = factor[0]; + factor[2] = factor[0]; + break; + + case PIPE_BLENDFACTOR_ZERO: + break; + + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_fs(f, factor[i], tmp, src[i]); + } + } + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor[0], tmp, src[3]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + tmp = spe_allocate_available_register(f); + factor[0] = spe_allocate_available_register(f); + factor[1] = factor[0]; + factor[2] = factor[0]; + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + spe_fs(f, factor[0], tmp, dst[3]); + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_INV_DST_COLOR: + tmp = spe_allocate_available_register(f); + + spe_il(f, tmp, 1); + spe_cuflt(f, tmp, tmp, 0); + + for (i = 0; i < 3; ++i) { + if ((mask & (1U << i)) != 0) { + factor[i] = spe_allocate_available_register(f); + spe_fs(f, factor[i], tmp, dst[i]); + } + } + + spe_release_register(f, tmp); + break; + + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: + assert(0); + } +} + + +static void +emit_blend_calculation(struct spe_function *f, + unsigned func, unsigned sF, unsigned dF, + int src, int src_factor, int dst, int dst_factor) +{ + int tmp = spe_allocate_available_register(f); + + switch (func) { + case PIPE_BLEND_ADD: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + /* Do nothing. */ + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fa(f, src, src, dst); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_or(f, src, dst, dst); + } else { + spe_fm(f, src, dst, dst_factor); + } + } else if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_fm(f, src, src, src_factor); + } else { + spe_fm(f, tmp, dst, dst_factor); + spe_fma(f, src, src, src_factor, tmp); + } + break; + + case PIPE_BLEND_SUBTRACT: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + /* Do nothing. */ + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fs(f, src, src, dst); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_il(f, tmp, 0); + spe_fs(f, src, tmp, dst); + } else { + spe_fm(f, src, dst, dst_factor); + } + } else if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_fm(f, src, src, src_factor); + } else { + spe_fm(f, tmp, dst, dst_factor); + spe_fms(f, src, src, src_factor, tmp); + } + break; + + case PIPE_BLEND_REVERSE_SUBTRACT: + if (sF == PIPE_BLENDFACTOR_ONE) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, tmp, 0); + spe_fs(f, src, tmp, src); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_fs(f, src, dst, src); + } + } else if (sF == PIPE_BLENDFACTOR_ZERO) { + if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_il(f, src, 0); + } else if (dF == PIPE_BLENDFACTOR_ONE) { + spe_or(f, src, dst, dst); + } else { + spe_fm(f, src, dst, dst_factor); + } + } else if (dF == PIPE_BLENDFACTOR_ZERO) { + spe_fm(f, src, src, src_factor); + } else { + spe_fm(f, tmp, src, src_factor); + spe_fms(f, src, src, dst_factor, tmp); + } + break; + + case PIPE_BLEND_MIN: + spe_cgt(f, tmp, src, dst); + spe_selb(f, src, src, dst, tmp); + break; + + case PIPE_BLEND_MAX: + spe_cgt(f, tmp, src, dst); + spe_selb(f, src, dst, src, tmp); + break; + + default: + assert(0); + } + + spe_release_register(f, tmp); +} + + +/** + * Generate code to perform alpha blending on the SPE + */ +void +cell_generate_alpha_blend(struct cell_blend_state *cb) +{ + struct pipe_blend_state *const b = &cb->base; + struct spe_function *const f = &cb->code; + + /* This code generates a maximum of 3 (source alpha factor) + * + 3 (destination alpha factor) + (3 * 6) (source color factor) + * + (3 * 6) (destination color factor) + (4 * 2) (blend equation) + * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to + * make it a happy power-of-two. + */ + spe_init_func(f, SPE_INST_SIZE * 64); + + + const int frag[4] = { + spe_allocate_register(f, 3), + spe_allocate_register(f, 4), + spe_allocate_register(f, 5), + spe_allocate_register(f, 6), + }; + const int pixel[4] = { + spe_allocate_register(f, 7), + spe_allocate_register(f, 8), + spe_allocate_register(f, 9), + spe_allocate_register(f, 10), + }; + const int const_color[4] = { + spe_allocate_register(f, 11), + spe_allocate_register(f, 12), + spe_allocate_register(f, 13), + spe_allocate_register(f, 14), + }; + unsigned func[4]; + unsigned sF[4]; + unsigned dF[4]; + unsigned i; + int src_factor[4]; + int dst_factor[4]; + + + /* Does the selected blend mode make use of the source / destination + * color (RGB) blend factors? + */ + boolean need_color_factor = b->blend_enable + && (b->rgb_func != PIPE_BLEND_MIN) + && (b->rgb_func != PIPE_BLEND_MAX); + + /* Does the selected blend mode make use of the source / destination + * alpha blend factors? + */ + boolean need_alpha_factor = b->blend_enable + && (b->alpha_func != PIPE_BLEND_MIN) + && (b->alpha_func != PIPE_BLEND_MAX); + + + if (b->blend_enable) { + sF[0] = b->rgb_src_factor; + sF[1] = sF[0]; + sF[2] = sF[0]; + switch (b->alpha_src_factor & 0x0f) { + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + sF[3] = PIPE_BLENDFACTOR_ONE; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_SRC1_COLOR: + sF[3] = b->alpha_src_factor + 1; + break; + default: + sF[3] = b->alpha_src_factor; + } + + dF[0] = b->rgb_dst_factor; + dF[1] = dF[0]; + dF[2] = dF[0]; + switch (b->alpha_dst_factor & 0x0f) { + case PIPE_BLENDFACTOR_SRC_COLOR: + case PIPE_BLENDFACTOR_DST_COLOR: + case PIPE_BLENDFACTOR_CONST_COLOR: + case PIPE_BLENDFACTOR_SRC1_COLOR: + dF[3] = b->alpha_dst_factor + 1; + break; + default: + dF[3] = b->alpha_dst_factor; + } + + func[0] = b->rgb_func; + func[1] = func[0]; + func[2] = func[0]; + func[3] = b->alpha_func; + } else { + sF[0] = PIPE_BLENDFACTOR_ONE; + sF[1] = PIPE_BLENDFACTOR_ONE; + sF[2] = PIPE_BLENDFACTOR_ONE; + sF[3] = PIPE_BLENDFACTOR_ONE; + dF[0] = PIPE_BLENDFACTOR_ZERO; + dF[1] = PIPE_BLENDFACTOR_ZERO; + dF[2] = PIPE_BLENDFACTOR_ZERO; + dF[3] = PIPE_BLENDFACTOR_ZERO; + + func[0] = PIPE_BLEND_ADD; + func[1] = PIPE_BLEND_ADD; + func[2] = PIPE_BLEND_ADD; + func[3] = PIPE_BLEND_ADD; + } + + + /* If alpha writing is enabled and the alpha blend mode requires use of + * the alpha factor, calculate the alpha factor. + */ + if (((b->colormask & 8) != 0) && need_alpha_factor) { + src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3], + frag[3], pixel[3]); + + /* If the alpha destination blend factor is the same as the alpha source + * blend factor, re-use the previously calculated value. + */ + dst_factor[3] = (dF[3] == sF[3]) + ? src_factor[3] + : emit_alpha_factor_calculation(f, dF[3], const_color[3], + frag[3], pixel[3]); + } + + + if (sF[0] == sF[3]) { + src_factor[0] = src_factor[3]; + src_factor[1] = src_factor[3]; + src_factor[2] = src_factor[3]; + } else if (sF[0] == dF[3]) { + src_factor[0] = dst_factor[3]; + src_factor[1] = dst_factor[3]; + src_factor[2] = dst_factor[3]; + } else if (need_color_factor) { + emit_color_factor_calculation(f, + b->rgb_src_factor, + b->colormask, + frag, pixel, const_color, src_factor); + } + + + if (dF[0] == sF[3]) { + dst_factor[0] = src_factor[3]; + dst_factor[1] = src_factor[3]; + dst_factor[2] = src_factor[3]; + } else if (dF[0] == dF[3]) { + dst_factor[0] = dst_factor[3]; + dst_factor[1] = dst_factor[3]; + dst_factor[2] = dst_factor[3]; + } else if (dF[0] == sF[0]) { + dst_factor[0] = src_factor[0]; + dst_factor[1] = src_factor[1]; + dst_factor[2] = src_factor[2]; + } else if (need_color_factor) { + emit_color_factor_calculation(f, + b->rgb_dst_factor, + b->colormask, + frag, pixel, const_color, dst_factor); + } + + + + for (i = 0; i < 4; ++i) { + if ((b->colormask & (1U << i)) != 0) { + emit_blend_calculation(f, + func[i], sF[i], dF[i], + frag[i], src_factor[i], + pixel[i], dst_factor[i]); + } + } + + spe_bi(f, 0, 0, 0); + +#if 0 + { + const uint32_t *p = f->store; + + printf("# %u instructions\n", f->csr - f->store); + printf("# blend (%sabled)\n", + (cb->base.blend_enable) ? "en" : "dis"); + printf("# RGB func / sf / df: %u %u %u\n", + cb->base.rgb_func, + cb->base.rgb_src_factor, + cb->base.rgb_dst_factor); + printf("# ALP func / sf / df: %u %u %u\n", + cb->base.alpha_func, + cb->base.alpha_src_factor, + cb->base.alpha_dst_factor); + + printf("\t.text\n"); + for (/* empty */; p < f->csr; p++) { + printf("\t.long\t0x%04x\n", *p); + } + fflush(stdout); + } +#endif +} + + +static int +PC_OFFSET(const struct spe_function *f, const void *d) +{ + const intptr_t pc = (intptr_t) &f->store[f->num_inst]; + const intptr_t ea = ~0x0f & (intptr_t) d; + + return (ea - pc) >> 2; +} + + +/** + * Generate code to perform color conversion and logic op + * + * \bug + * The code generated by this function should also perform dithering. + * + * \bug + * The code generated by this function should also perform color-write + * masking. + * + * \bug + * Only two framebuffer formats are supported at this time. + */ +void +cell_generate_logic_op(struct spe_function *f, + const struct pipe_blend_state *blend, + struct pipe_surface *surf) +{ + const unsigned logic_op = (blend->logicop_enable) + ? blend->logicop_func : PIPE_LOGICOP_COPY; + + /* This code generates a maximum of 37 instructions. An additional 32 + * bytes (equiv. to 8 instructions) are needed for data storage. Round up + * to 64 to make it a happy power-of-two. + */ + spe_init_func(f, SPE_INST_SIZE * 64); + + + /* Pixel colors in framebuffer format in AoS layout. + */ + const int pixel[4] = { + spe_allocate_register(f, 3), + spe_allocate_register(f, 4), + spe_allocate_register(f, 5), + spe_allocate_register(f, 6), + }; + + /* Fragment colors stored as floats in SoA layout. + */ + const int frag[4] = { + spe_allocate_register(f, 7), + spe_allocate_register(f, 8), + spe_allocate_register(f, 9), + spe_allocate_register(f, 10), + }; + + const int mask = spe_allocate_register(f, 11); + + + /* Short-circuit the noop and invert cases. + */ + if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->colormask == 0)) { + spe_bi(f, 0, 0, 0); + return; + } else if (logic_op == PIPE_LOGICOP_INVERT) { + spe_nor(f, pixel[0], pixel[0], pixel[0]); + spe_nor(f, pixel[1], pixel[1], pixel[1]); + spe_nor(f, pixel[2], pixel[2], pixel[2]); + spe_nor(f, pixel[3], pixel[3], pixel[3]); + spe_bi(f, 0, 0, 0); + return; + } + + + const int tmp[4] = { + spe_allocate_available_register(f), + spe_allocate_available_register(f), + spe_allocate_available_register(f), + spe_allocate_available_register(f), + }; + + const int shuf_xpose_hi = spe_allocate_available_register(f); + const int shuf_xpose_lo = spe_allocate_available_register(f); + const int shuf_color = spe_allocate_available_register(f); + + + /* Pointer to the begining of the function's private data area. + */ + uint32_t *const data = ((uint32_t *) f->store) + (64 - 8); + + + /* Convert fragment colors to framebuffer format in AoS layout. + */ + switch (surf->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + data[0] = 0x00010203; + data[1] = 0x10111213; + data[2] = 0x04050607; + data[3] = 0x14151617; + data[4] = 0x0c000408; + data[5] = 0x80808080; + data[6] = 0x80808080; + data[7] = 0x80808080; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + data[0] = 0x03020100; + data[1] = 0x13121110; + data[2] = 0x07060504; + data[3] = 0x17161514; + data[4] = 0x0804000c; + data[5] = 0x80808080; + data[6] = 0x80808080; + data[7] = 0x80808080; + break; + default: + fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()"); + ASSERT(0); + } + + spe_ilh(f, tmp[0], 0x0808); + spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0)); + spe_lqr(f, shuf_color, PC_OFFSET(f, data+4)); + spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]); + + spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi); + spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo); + spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi); + spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo); + + spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi); + spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo); + spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi); + spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo); + + spe_cfltu(f, frag[0], frag[0], 32); + spe_cfltu(f, frag[1], frag[1], 32); + spe_cfltu(f, frag[2], frag[2], 32); + spe_cfltu(f, frag[3], frag[3], 32); + + spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color); + spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color); + spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color); + spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color); + + + /* If logic op is enabled, perform the requested logical operation on the + * converted fragment colors and the pixel colors. + */ + switch (logic_op) { + case PIPE_LOGICOP_CLEAR: + spe_il(f, frag[0], 0); + spe_il(f, frag[1], 0); + spe_il(f, frag[2], 0); + spe_il(f, frag[3], 0); + break; + case PIPE_LOGICOP_NOR: + spe_nor(f, frag[0], frag[0], pixel[0]); + spe_nor(f, frag[1], frag[1], pixel[1]); + spe_nor(f, frag[2], frag[2], pixel[2]); + spe_nor(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_AND_INVERTED: + spe_andc(f, frag[0], pixel[0], frag[0]); + spe_andc(f, frag[1], pixel[1], frag[1]); + spe_andc(f, frag[2], pixel[2], frag[2]); + spe_andc(f, frag[3], pixel[3], frag[3]); + break; + case PIPE_LOGICOP_COPY_INVERTED: + spe_nor(f, frag[0], frag[0], frag[0]); + spe_nor(f, frag[1], frag[1], frag[1]); + spe_nor(f, frag[2], frag[2], frag[2]); + spe_nor(f, frag[3], frag[3], frag[3]); + break; + case PIPE_LOGICOP_AND_REVERSE: + spe_andc(f, frag[0], frag[0], pixel[0]); + spe_andc(f, frag[1], frag[1], pixel[1]); + spe_andc(f, frag[2], frag[2], pixel[2]); + spe_andc(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_XOR: + spe_xor(f, frag[0], frag[0], pixel[0]); + spe_xor(f, frag[1], frag[1], pixel[1]); + spe_xor(f, frag[2], frag[2], pixel[2]); + spe_xor(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_NAND: + spe_nand(f, frag[0], frag[0], pixel[0]); + spe_nand(f, frag[1], frag[1], pixel[1]); + spe_nand(f, frag[2], frag[2], pixel[2]); + spe_nand(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_AND: + spe_and(f, frag[0], frag[0], pixel[0]); + spe_and(f, frag[1], frag[1], pixel[1]); + spe_and(f, frag[2], frag[2], pixel[2]); + spe_and(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_EQUIV: + spe_eqv(f, frag[0], frag[0], pixel[0]); + spe_eqv(f, frag[1], frag[1], pixel[1]); + spe_eqv(f, frag[2], frag[2], pixel[2]); + spe_eqv(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_OR_INVERTED: + spe_orc(f, frag[0], pixel[0], frag[0]); + spe_orc(f, frag[1], pixel[1], frag[1]); + spe_orc(f, frag[2], pixel[2], frag[2]); + spe_orc(f, frag[3], pixel[3], frag[3]); + break; + case PIPE_LOGICOP_COPY: + break; + case PIPE_LOGICOP_OR_REVERSE: + spe_orc(f, frag[0], frag[0], pixel[0]); + spe_orc(f, frag[1], frag[1], pixel[1]); + spe_orc(f, frag[2], frag[2], pixel[2]); + spe_orc(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_OR: + spe_or(f, frag[0], frag[0], pixel[0]); + spe_or(f, frag[1], frag[1], pixel[1]); + spe_or(f, frag[2], frag[2], pixel[2]); + spe_or(f, frag[3], frag[3], pixel[3]); + break; + case PIPE_LOGICOP_SET: + spe_il(f, frag[0], ~0); + spe_il(f, frag[1], ~0); + spe_il(f, frag[2], ~0); + spe_il(f, frag[3], ~0); + break; + + /* These two cases are short-circuited above. + */ + case PIPE_LOGICOP_INVERT: + case PIPE_LOGICOP_NOOP: + default: + assert(0); + } + + + /* Apply fragment mask. + */ + spe_ilh(f, tmp[0], 0x0000); + spe_ilh(f, tmp[1], 0x0404); + spe_ilh(f, tmp[2], 0x0808); + spe_ilh(f, tmp[3], 0x0c0c); + + spe_shufb(f, tmp[0], mask, mask, tmp[0]); + spe_shufb(f, tmp[1], mask, mask, tmp[1]); + spe_shufb(f, tmp[2], mask, mask, tmp[2]); + spe_shufb(f, tmp[3], mask, mask, tmp[3]); + + spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]); + spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]); + spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]); + spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]); + + spe_bi(f, 0, 0, 0); + +#if 0 + { + const uint32_t *p = f->store; + unsigned i; + + printf("# %u instructions\n", f->csr - f->store); + + printf("\t.text\n"); + for (i = 0; i < 64; i++) { + printf("\t.long\t0x%04x\n", p[i]); + } + fflush(stdout); + } +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h new file mode 100644 index 0000000000..a8267a5133 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.h @@ -0,0 +1,39 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef CELL_STATE_PER_FRAGMENT_H +#define CELL_STATE_PER_FRAGMENT_H + +extern void +cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa); + +extern void +cell_generate_alpha_blend(struct cell_blend_state *cb); + +extern void +cell_generate_logic_op(struct spe_function *f, + const struct pipe_blend_state *blend, + struct pipe_surface *surf); + +#endif /* CELL_STATE_PER_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c new file mode 100644 index 0000000000..cda39f8d59 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -0,0 +1,221 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "draw/draw_context.h" +#include "tgsi/tgsi_parse.h" + +#include "cell_context.h" +#include "cell_state.h" +#include "cell_gen_fp.h" + + +/** cast wrapper */ +static INLINE struct cell_fragment_shader_state * +cell_fragment_shader_state(void *shader) +{ + return (struct cell_fragment_shader_state *) shader; +} + + +/** cast wrapper */ +static INLINE struct cell_vertex_shader_state * +cell_vertex_shader_state(void *shader) +{ + return (struct cell_vertex_shader_state *) shader; +} + + +/** + * Create fragment shader state. + * Called via pipe->create_fs_state() + */ +static void * +cell_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct cell_context *cell = cell_context(pipe); + struct cell_fragment_shader_state *cfs; + + cfs = CALLOC_STRUCT(cell_fragment_shader_state); + if (!cfs) + return NULL; + + cfs->shader.tokens = tgsi_dup_tokens(templ->tokens); + if (!cfs->shader.tokens) { + FREE(cfs); + return NULL; + } + + tgsi_scan_shader(templ->tokens, &cfs->info); + + cell_gen_fragment_program(cell, cfs->shader.tokens, &cfs->code); + + return cfs; +} + + +/** + * Called via pipe->bind_fs_state() + */ +static void +cell_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct cell_context *cell = cell_context(pipe); + + cell->fs = cell_fragment_shader_state(fs); + + cell->dirty |= CELL_NEW_FS; +} + + +/** + * Called via pipe->delete_fs_state() + */ +static void +cell_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs); + + spe_release_func(&cfs->code); + + FREE((void *) cfs->shader.tokens); + FREE(cfs); +} + + +/** + * Create vertex shader state. + * Called via pipe->create_vs_state() + */ +static void * +cell_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct cell_context *cell = cell_context(pipe); + struct cell_vertex_shader_state *cvs; + + cvs = CALLOC_STRUCT(cell_vertex_shader_state); + if (!cvs) + return NULL; + + cvs->shader.tokens = tgsi_dup_tokens(templ->tokens); + if (!cvs->shader.tokens) { + FREE(cvs); + return NULL; + } + + tgsi_scan_shader(templ->tokens, &cvs->info); + + cvs->draw_data = draw_create_vertex_shader(cell->draw, &cvs->shader); + if (cvs->draw_data == NULL) { + FREE( (void *) cvs->shader.tokens ); + FREE( cvs ); + return NULL; + } + + return cvs; +} + + +/** + * Called via pipe->bind_vs_state() + */ +static void +cell_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct cell_context *cell = cell_context(pipe); + + cell->vs = cell_vertex_shader_state(vs); + + draw_bind_vertex_shader(cell->draw, + (cell->vs ? cell->vs->draw_data : NULL)); + + cell->dirty |= CELL_NEW_VS; +} + + +/** + * Called via pipe->delete_vs_state() + */ +static void +cell_delete_vs_state(struct pipe_context *pipe, void *vs) +{ + struct cell_context *cell = cell_context(pipe); + struct cell_vertex_shader_state *cvs = cell_vertex_shader_state(vs); + + draw_delete_vertex_shader(cell->draw, cvs->draw_data); + FREE( (void *) cvs->shader.tokens ); + FREE( cvs ); +} + + +/** + * Called via pipe->set_constant_buffer() + */ +static void +cell_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct cell_context *cell = cell_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + draw_flush(cell->draw); + + /* note: reference counting */ + winsys_buffer_reference(ws, + &cell->constants[shader].buffer, + buf->buffer); + cell->constants[shader].size = buf->size; + + if (shader == PIPE_SHADER_VERTEX) + cell->dirty |= CELL_NEW_VS_CONSTANTS; + else if (shader == PIPE_SHADER_FRAGMENT) + cell->dirty |= CELL_NEW_FS_CONSTANTS; +} + + +void +cell_init_shader_functions(struct cell_context *cell) +{ + cell->pipe.create_fs_state = cell_create_fs_state; + cell->pipe.bind_fs_state = cell_bind_fs_state; + cell->pipe.delete_fs_state = cell_delete_fs_state; + + cell->pipe.create_vs_state = cell_create_vs_state; + cell->pipe.bind_vs_state = cell_bind_vs_state; + cell->pipe.delete_vs_state = cell_delete_vs_state; + + cell->pipe.set_constant_buffer = cell_set_constant_buffer; +} diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c new file mode 100644 index 0000000000..fbe55c8472 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "cell_context.h" +#include "cell_state.h" + +#include "draw/draw_context.h" + + +static void +cell_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct cell_context *cell = cell_context(pipe); + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(cell->vertex_element, elements, count * sizeof(elements[0])); + cell->num_vertex_elements = count; + + cell->dirty |= CELL_NEW_VERTEX; + + draw_set_vertex_elements(cell->draw, count, elements); +} + + +static void +cell_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct cell_context *cell = cell_context(pipe); + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(cell->vertex_buffer, buffers, count * sizeof(buffers[0])); + cell->num_vertex_buffers = count; + + cell->dirty |= CELL_NEW_VERTEX; + + draw_set_vertex_buffers(cell->draw, count, buffers); +} + + +void +cell_init_vertex_functions(struct cell_context *cell) +{ + cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; + cell->pipe.set_vertex_elements = cell_set_vertex_elements; +} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c new file mode 100644 index 0000000000..732c64082e --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_rect.h" +#include "cell_context.h" + + +void +cell_init_surface_functions(struct cell_context *cell) +{ + cell->pipe.surface_copy = util_surface_copy; + cell->pipe.surface_fill = util_surface_fill; +} diff --git a/src/gallium/drivers/cell/ppu/cell_surface.h b/src/gallium/drivers/cell/ppu/cell_surface.h new file mode 100644 index 0000000000..9e58f32944 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_surface.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef CELL_SURFACE_H +#define CELL_SURFACE_H + + +struct cell_context; + + +extern void +cell_init_surface_functions(struct cell_context *cell); + + +#endif /* SP_SURFACE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c new file mode 100644 index 0000000000..9ac2f3bbb9 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -0,0 +1,372 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Michel Dänzer <michel@tungstengraphics.com> + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "cell_context.h" +#include "cell_state.h" +#include "cell_texture.h" + + +/* Simple, maximally packed layout. + */ + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +static void +cell_texture_layout(struct cell_texture *ct) +{ + struct pipe_texture *pt = &ct->base; + unsigned level; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + + ct->buffer_size = 0; + + for ( level = 0 ; level <= pt->last_level ; level++ ) { + unsigned size; + unsigned w_tile, h_tile; + + assert(level < CELL_MAX_TEXTURE_LEVELS); + + /* width, height, rounded up to tile size */ + w_tile = align(width, TILE_SIZE); + h_tile = align(height, TILE_SIZE); + + pt->width[level] = width; + pt->height[level] = height; + pt->depth[level] = depth; + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile); + + ct->stride[level] = pt->nblocksx[level] * pt->block.size; + + ct->level_offset[level] = ct->buffer_size; + + size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size; + if (pt->target == PIPE_TEXTURE_CUBE) + size *= 6; + else + size *= depth; + + ct->buffer_size += size; + + width = minify(width); + height = minify(height); + depth = minify(depth); + } +} + + +static struct pipe_texture * +cell_texture_create(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct pipe_winsys *ws = screen->winsys; + struct cell_texture *ct = CALLOC_STRUCT(cell_texture); + if (!ct) + return NULL; + + ct->base = *templat; + ct->base.refcount = 1; + ct->base.screen = screen; + + cell_texture_layout(ct); + + ct->buffer = ws->buffer_create(ws, 32, PIPE_BUFFER_USAGE_PIXEL, + ct->buffer_size); + + if (!ct->buffer) { + FREE(ct); + return NULL; + } + + return &ct->base; +} + + +static void +cell_texture_release(struct pipe_screen *screen, + struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + /* Delete this texture now. + * But note that the underlying pipe_buffer may linger... + */ + struct cell_texture *ct = cell_texture(*pt); + uint i; + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) ct); + */ + + pipe_buffer_reference(screen, &ct->buffer, NULL); + + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + /* Unreference the tiled image buffer. + * It may not actually be deleted until a fence is hit. + */ + if (ct->tiled_buffer[i]) { + ct->tiled_mapped[i] = NULL; + winsys_buffer_reference(screen->winsys, &ct->tiled_buffer[i], NULL); + } + } + + FREE(ct); + } + *pt = NULL; +} + + + +/** + * Convert image from linear layout to tiled layout. 4-byte pixels. + */ +static void +twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, + uint src_stride, const uint *src) +{ + const uint tile_size2 = tile_size * tile_size; + const uint h_t = (h + tile_size - 1) / tile_size; + const uint w_t = (w + tile_size - 1) / tile_size; + + uint it, jt; /* tile counters */ + uint i, j; /* intra-tile counters */ + + src_stride /= 4; /* convert from bytes to pixels */ + + /* loop over dest tiles */ + for (it = 0; it < h_t; it++) { + for (jt = 0; jt < w_t; jt++) { + /* start of dest tile: */ + uint *tdst = dst + (it * w_t + jt) * tile_size2; + + /* compute size of this tile (may be smaller than tile_size) */ + /* XXX note: a compiler bug was found here. That's why the code + * looks as it does. + */ + uint tile_width = w - jt * tile_size; + tile_width = MIN2(tile_width, tile_size); + uint tile_height = h - it * tile_size; + tile_height = MIN2(tile_height, tile_size); + + /* loop over texels in the tile */ + for (i = 0; i < tile_height; i++) { + for (j = 0; j < tile_width; j++) { + const uint srci = it * tile_size + i; + const uint srcj = jt * tile_size + j; + ASSERT(srci < h); + ASSERT(srcj < w); + tdst[i * tile_size + j] = src[srci * src_stride + srcj]; + } + } + } + } +} + + +/** + * Convert linear texture image data to tiled format for SPU usage. + */ +static void +cell_twiddle_texture(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + struct cell_texture *ct = cell_texture(surface->texture); + const uint level = surface->level; + const uint texWidth = ct->base.width[level]; + const uint texHeight = ct->base.height[level]; + const uint bufWidth = align(texWidth, TILE_SIZE); + const uint bufHeight = align(texHeight, TILE_SIZE); + const void *map = pipe_buffer_map(screen, surface->buffer, + PIPE_BUFFER_USAGE_CPU_READ); + const uint *src = (const uint *) ((const ubyte *) map + surface->offset); + + switch (ct->base.format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + { + int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1; + int offset = bufWidth * bufHeight * 4 * surface->face; + uint *dst; + + if (!ct->tiled_buffer[level]) { + /* allocate buffer for tiled data now */ + struct pipe_winsys *ws = screen->winsys; + uint bytes = bufWidth * bufHeight * 4 * numFaces; + ct->tiled_buffer[level] = ws->buffer_create(ws, 16, + PIPE_BUFFER_USAGE_PIXEL, + bytes); + /* and map it */ + ct->tiled_mapped[level] = ws->buffer_map(ws, ct->tiled_buffer[level], + PIPE_BUFFER_USAGE_GPU_READ); + } + dst = (uint *) ((ubyte *) ct->tiled_mapped[level] + offset); + + twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, + surface->stride, src); + } + break; + default: + printf("Cell: twiddle unsupported texture format\n"); + ; + } + + pipe_buffer_unmap(screen, surface->buffer); +} + + +static struct pipe_surface * +cell_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned usage) +{ + struct pipe_winsys *ws = screen->winsys; + struct cell_texture *ct = cell_texture(pt); + struct pipe_surface *ps; + + ps = ws->surface_alloc(ws); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + winsys_buffer_reference(ws, &ps->buffer, ct->buffer); + ps->format = pt->format; + ps->block = pt->block; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = ct->stride[level]; + ps->offset = ct->level_offset[level]; + ps->usage = usage; + + /* XXX may need to override usage flags (see sp_texture.c) */ + + pipe_texture_reference(&ps->texture, pt); + ps->face = face; + ps->level = level; + ps->zslice = zslice; + + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { + ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * + ps->nblocksy * + ps->stride; + } + else { + assert(face == 0); + assert(zslice == 0); + } + } + return ps; +} + + +static void +cell_tex_surface_release(struct pipe_screen *screen, + struct pipe_surface **s) +{ + /* XXX if done rendering to teximage, re-tile */ + + pipe_texture_reference(&(*s)->texture, NULL); + + screen->winsys->surface_release(screen->winsys, s); +} + + +static void * +cell_surface_map(struct pipe_screen *screen, + struct pipe_surface *surface, + unsigned flags) +{ + ubyte *map; + + if (flags & ~surface->usage) { + assert(0); + return NULL; + } + + map = pipe_buffer_map( screen, surface->buffer, flags ); + if (map == NULL) + return NULL; + else + return (void *) (map + surface->offset); +} + + +static void +cell_surface_unmap(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + struct cell_texture *ct = cell_texture(surface->texture); + + assert(ct); + + if ((ct->base.tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) && + (surface->usage & PIPE_BUFFER_USAGE_CPU_WRITE)) { + /* convert from linear to tiled layout */ + cell_twiddle_texture(screen, surface); + } + + pipe_buffer_unmap( screen, surface->buffer ); +} + + + +void +cell_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = cell_texture_create; + screen->texture_release = cell_texture_release; + + screen->get_tex_surface = cell_get_tex_surface; + screen->tex_surface_release = cell_tex_surface_release; + + screen->surface_map = cell_surface_map; + screen->surface_unmap = cell_surface_unmap; +} diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h new file mode 100644 index 0000000000..2f5fe0dd1b --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_TEXTURE_H +#define CELL_TEXTURE_H + + +struct cell_context; +struct pipe_texture; + + +/** + * Subclass of pipe_texture + */ +struct cell_texture +{ + struct pipe_texture base; + + unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS]; + unsigned long stride[CELL_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; + unsigned long buffer_size; + + /** Texture data in tiled layout is held here */ + struct pipe_buffer *tiled_buffer[CELL_MAX_TEXTURE_LEVELS]; + /** Mapped, tiled texture data */ + void *tiled_mapped[CELL_MAX_TEXTURE_LEVELS]; +}; + + +/** cast wrapper */ +static INLINE struct cell_texture * +cell_texture(struct pipe_texture *pt) +{ + return (struct cell_texture *) pt; +} + + + +extern void +cell_init_screen_texture_funcs(struct pipe_screen *screen); + + +#endif /* CELL_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c new file mode 100644 index 0000000000..65ba51b6bb --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -0,0 +1,307 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Vertex buffer code. The draw module transforms vertices to window + * coords, etc. and emits the vertices into buffer supplied by this module. + * When a vertex buffer is full, or we flush, we'll send the vertex data + * to the SPUs. + * + * Authors + * Brian Paul + */ + + +#include "cell_batch.h" +#include "cell_context.h" +#include "cell_fence.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_vbuf.h" +#include "draw/draw_vbuf.h" +#include "util/u_memory.h" + + +/** Allow vertex data to be inlined after RENDER command */ +#define ALLOW_INLINE_VERTS 1 + + +/** + * Subclass of vbuf_render because we need a cell_context pointer in + * a few places. + */ +struct cell_vbuf_render +{ + struct vbuf_render base; + struct cell_context *cell; + uint prim; /**< PIPE_PRIM_x */ + uint vertex_size; /**< in bytes */ + void *vertex_buffer; /**< just for debug, really */ + uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ +}; + + +/** cast wrapper */ +static struct cell_vbuf_render * +cell_vbuf_render(struct vbuf_render *vbr) +{ + return (struct cell_vbuf_render *) vbr; +} + + + +static const struct vertex_info * +cell_vbuf_get_vertex_info(struct vbuf_render *vbr) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + return &cvbr->cell->vertex_info; +} + + +static void * +cell_vbuf_allocate_vertices(struct vbuf_render *vbr, + ushort vertex_size, ushort nr_vertices) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/ + + assert(cvbr->vertex_buf == ~0); + cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell); + cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf]; + cvbr->vertex_size = vertex_size; + return cvbr->vertex_buffer; +} + + +static void +cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, + unsigned vertex_size, unsigned vertices_used) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + struct cell_context *cell = cvbr->cell; + + /* + printf("%s vertex_buf = %u count = %u\n", + __FUNCTION__, cvbr->vertex_buf, vertices_used); + */ + + /* Make sure texture buffers aren't released until we're done rendering + * with them. + */ + cell_add_fenced_textures(cell); + + /* Tell SPUs they can release the vert buf */ + if (cvbr->vertex_buf != ~0U) { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) + cell_batch_alloc(cell, sizeof(struct cell_command_release_verts)); + release->opcode = CELL_CMD_RELEASE_VERTS; + release->vertex_buf = cvbr->vertex_buf; + } + + cvbr->vertex_buf = ~0; + cell_flush_int(cell, 0x0); + + assert(vertices == cvbr->vertex_buffer); + cvbr->vertex_buffer = NULL; +} + + + +static boolean +cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + cvbr->prim = prim; + /*printf("cell_set_prim %u\n", prim);*/ + return TRUE; +} + + +static void +cell_vbuf_draw(struct vbuf_render *vbr, + const ushort *indices, + uint nr_indices) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + struct cell_context *cell = cvbr->cell; + float xmin, ymin, xmax, ymax; + uint i; + uint nr_vertices = 0, min_index = ~0; + const void *vertices = cvbr->vertex_buffer; + const uint vertex_size = cvbr->vertex_size; + + for (i = 0; i < nr_indices; i++) { + if (indices[i] > nr_vertices) + nr_vertices = indices[i]; + if (indices[i] < min_index) + min_index = indices[i]; + } + nr_vertices++; + +#if 0 + /*if (min_index > 0)*/ + printf("%s min_index = %u\n", __FUNCTION__, min_index); +#endif + +#if 0 + printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u\n", + nr_indices, nr_vertices); + printf(" "); + for (i = 0; i < nr_indices; i += 3) { + printf("%u %u %u, ", indices[i+0], indices[i+1], indices[i+2]); + } + printf("\n"); +#elif 0 + printf("cell_vbuf_draw() nr_indices = %u nr_verts = %u indexes = [%u %u %u ...]\n", + nr_indices, nr_vertices, + indices[0], indices[1], indices[2]); + printf("ind space = %u, vert space = %u, space = %u\n", + nr_indices * 2, + nr_vertices * 4 * cell->vertex_info.size, + cell_batch_free_space(cell)); +#endif + + /* compute x/y bounding box */ + xmin = ymin = 1e50; + xmax = ymax = -1e50; + for (i = min_index; i < nr_vertices; i++) { + const float *v = (float *) ((ubyte *) vertices + i * vertex_size); + if (v[0] < xmin) + xmin = v[0]; + if (v[0] > xmax) + xmax = v[0]; + if (v[1] < ymin) + ymin = v[1]; + if (v[1] > ymax) + ymax = v[1]; + } +#if 0 + printf("PPU Bounds %g, %g .. %g, %g\n", xmin, ymin, xmax, ymax); + fflush(stdout); +#endif + + if (cvbr->prim != PIPE_PRIM_TRIANGLES) + return; /* only render tris for now */ + + /* build/insert batch RENDER command */ + { + const uint index_bytes = ROUNDUP8(nr_indices * 2); + const uint vertex_bytes = nr_vertices * 4 * cell->vertex_info.size; + const uint batch_size = sizeof(struct cell_command_render) + index_bytes; + + struct cell_command_render *render + = (struct cell_command_render *) + cell_batch_alloc(cell, batch_size); + + render->opcode = CELL_CMD_RENDER; + render->prim_type = cvbr->prim; + + render->num_indexes = nr_indices; + render->min_index = min_index; + + /* append indices after render command */ + memcpy(render + 1, indices, nr_indices * 2); + + /* if there's room, append vertices after the indices, else leave + * vertices in the original/separate buffer. + */ + render->vertex_size = 4 * cell->vertex_info.size; + render->num_verts = nr_vertices; + if (ALLOW_INLINE_VERTS && + min_index == 0 && + vertex_bytes + 16 <= cell_batch_free_space(cell)) { + /* vertex data inlined, after indices, at 16-byte boundary */ + void *dst = cell_batch_alloc_aligned(cell, vertex_bytes, 16); + memcpy(dst, vertices, vertex_bytes); + render->inline_verts = TRUE; + render->vertex_buf = ~0; + } + else { + /* vertex data in separate buffer */ + render->inline_verts = FALSE; + ASSERT(cvbr->vertex_buf >= 0); + render->vertex_buf = cvbr->vertex_buf; + } + + render->xmin = xmin; + render->ymin = ymin; + render->xmax = xmax; + render->ymax = ymax; + } + +#if 0 + /* helpful for debug */ + cell_flush_int(cell, CELL_FLUSH_WAIT); +#endif +} + + +static void +cell_vbuf_destroy(struct vbuf_render *vbr) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + cvbr->cell->vbuf_render = NULL; + FREE(cvbr); +} + + +/** + * Initialize the post-transform vertex buffer information for the given + * context. + */ +void +cell_init_vbuf(struct cell_context *cell) +{ + assert(cell->draw); + + cell->vbuf_render = CALLOC_STRUCT(cell_vbuf_render); + + /* The max number of indexes is what can fix into a batch buffer, + * minus the render and release-verts commands. + */ + cell->vbuf_render->base.max_indices + = (CELL_BUFFER_SIZE + - sizeof(struct cell_command_render) + - sizeof(struct cell_command_release_verts)) + / sizeof(ushort); + cell->vbuf_render->base.max_vertex_buffer_bytes = CELL_BUFFER_SIZE; + + cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info; + cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices; + cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive; + cell->vbuf_render->base.draw = cell_vbuf_draw; + cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices; + cell->vbuf_render->base.destroy = cell_vbuf_destroy; + + cell->vbuf_render->cell = cell; +#if 1 + cell->vbuf_render->vertex_buf = ~0; +#endif + + cell->vbuf = draw_vbuf_stage(cell->draw, &cell->vbuf_render->base); +} diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.h b/src/gallium/drivers/cell/ppu/cell_vbuf.h new file mode 100644 index 0000000000..d265cbf770 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef CELL_VBUF_H +#define CELL_VBUF_H + + +struct cell_context; + +extern void +cell_init_vbuf(struct cell_context *cell); + + +#endif /* CELL_VBUF_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c new file mode 100644 index 0000000000..18969005b0 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -0,0 +1,343 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <inttypes.h> +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_format.h" + +#include "../auxiliary/draw/draw_context.h" +#include "../auxiliary/draw/draw_private.h" + +#include "cell_context.h" +#include "rtasm/rtasm_ppc_spe.h" + + +/** + * Emit a 4x4 matrix transpose operation + * + * \param p Function that the transpose operation is to be appended to + * \param row0 Register containing row 0 of the source matrix + * \param row1 Register containing row 1 of the source matrix + * \param row2 Register containing row 2 of the source matrix + * \param row3 Register containing row 3 of the source matrix + * \param dest_ptr Register containing the address of the destination matrix + * \param shuf_ptr Register containing the address of the shuffled data + * \param count Number of colums to actually be written to the destination + * + * \note + * This function assumes that the registers named by \c row0, \c row1, + * \c row2, and \c row3 are scratch and can be modified by the generated code. + * Furthermore, these registers will be released, via calls to + * \c release_register, by this function. + * + * \note + * This function requires that four temporary are available on entry. + */ +static void +emit_matrix_transpose(struct spe_function *p, + unsigned row0, unsigned row1, unsigned row2, + unsigned row3, unsigned dest_ptr, + unsigned shuf_ptr, unsigned count) +{ + int shuf_hi = spe_allocate_available_register(p); + int shuf_lo = spe_allocate_available_register(p); + int t1 = spe_allocate_available_register(p); + int t2 = spe_allocate_available_register(p); + int t3; + int t4; + int col0; + int col1; + int col2; + int col3; + + + spe_lqd(p, shuf_hi, shuf_ptr, 3*16); + spe_lqd(p, shuf_lo, shuf_ptr, 4*16); + spe_shufb(p, t1, row0, row2, shuf_hi); + spe_shufb(p, t2, row0, row2, shuf_lo); + + + /* row0 and row2 are now no longer needed. Re-use those registers as + * temporaries. + */ + t3 = row0; + t4 = row2; + + spe_shufb(p, t3, row1, row3, shuf_hi); + spe_shufb(p, t4, row1, row3, shuf_lo); + + + /* row1 and row3 are now no longer needed. Re-use those registers as + * temporaries. + */ + col0 = row1; + col1 = row3; + + spe_shufb(p, col0, t1, t3, shuf_hi); + if (count > 1) { + spe_shufb(p, col1, t1, t3, shuf_lo); + } + + /* t1 and t3 are now no longer needed. Re-use those registers as + * temporaries. + */ + col2 = t1; + col3 = t3; + + if (count > 2) { + spe_shufb(p, col2, t2, t4, shuf_hi); + } + + if (count > 3) { + spe_shufb(p, col3, t2, t4, shuf_lo); + } + + + /* Store the results. Remember that the stqd instruction is encoded using + * the qword offset (stand-alone assemblers to the byte-offset to + * qword-offset conversion for you), so the byte-offset needs be divided by + * 16. + */ + switch (count) { + case 4: + spe_stqd(p, col3, dest_ptr, 3 * 16); + case 3: + spe_stqd(p, col2, dest_ptr, 2 * 16); + case 2: + spe_stqd(p, col1, dest_ptr, 1 * 16); + case 1: + spe_stqd(p, col0, dest_ptr, 0 * 16); + } + + + /* Release all of the temporary registers used. + */ + spe_release_register(p, col0); + spe_release_register(p, col1); + spe_release_register(p, col2); + spe_release_register(p, col3); + spe_release_register(p, shuf_hi); + spe_release_register(p, shuf_lo); + spe_release_register(p, t2); + spe_release_register(p, t4); +} + + +static void +emit_fetch(struct spe_function *p, + unsigned in_ptr, unsigned *offset, + unsigned out_ptr, unsigned shuf_ptr, + enum pipe_format format) +{ + const unsigned count = (pf_size_x(format) != 0) + (pf_size_y(format) != 0) + + (pf_size_z(format) != 0) + (pf_size_w(format) != 0); + const unsigned type = pf_type(format); + const unsigned bytes = pf_size_x(format); + + int v0 = spe_allocate_available_register(p); + int v1 = spe_allocate_available_register(p); + int v2 = spe_allocate_available_register(p); + int v3 = spe_allocate_available_register(p); + int tmp = spe_allocate_available_register(p); + int float_zero = -1; + int float_one = -1; + float scale_signed = 0.0; + float scale_unsigned = 0.0; + + spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16); + spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16); + spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16); + spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16); + offset[0] += 4; + + switch (bytes) { + case 1: + scale_signed = 1.0f / 127.0f; + scale_unsigned = 1.0f / 255.0f; + spe_lqd(p, tmp, shuf_ptr, 1 * 16); + spe_shufb(p, v0, v0, v0, tmp); + spe_shufb(p, v1, v1, v1, tmp); + spe_shufb(p, v2, v2, v2, tmp); + spe_shufb(p, v3, v3, v3, tmp); + break; + case 2: + scale_signed = 1.0f / 32767.0f; + scale_unsigned = 1.0f / 65535.0f; + spe_lqd(p, tmp, shuf_ptr, 2 * 16); + spe_shufb(p, v0, v0, v0, tmp); + spe_shufb(p, v1, v1, v1, tmp); + spe_shufb(p, v2, v2, v2, tmp); + spe_shufb(p, v3, v3, v3, tmp); + break; + case 4: + scale_signed = 1.0f / 2147483647.0f; + scale_unsigned = 1.0f / 4294967295.0f; + break; + default: + assert(0); + break; + } + + switch (type) { + case PIPE_FORMAT_TYPE_FLOAT: + break; + case PIPE_FORMAT_TYPE_UNORM: + spe_ilhu(p, tmp, ((unsigned) scale_unsigned) >> 16); + spe_iohl(p, tmp, ((unsigned) scale_unsigned) & 0x0ffff); + spe_cuflt(p, v0, v0, 0); + spe_fm(p, v0, v0, tmp); + break; + case PIPE_FORMAT_TYPE_SNORM: + spe_ilhu(p, tmp, ((unsigned) scale_signed) >> 16); + spe_iohl(p, tmp, ((unsigned) scale_signed) & 0x0ffff); + spe_csflt(p, v0, v0, 0); + spe_fm(p, v0, v0, tmp); + break; + case PIPE_FORMAT_TYPE_USCALED: + spe_cuflt(p, v0, v0, 0); + break; + case PIPE_FORMAT_TYPE_SSCALED: + spe_csflt(p, v0, v0, 0); + break; + } + + + if (count < 4) { + float_one = spe_allocate_available_register(p); + spe_il(p, float_one, 1); + spe_cuflt(p, float_one, float_one, 0); + + if (count < 3) { + float_zero = spe_allocate_available_register(p); + spe_il(p, float_zero, 0); + } + } + + spe_release_register(p, tmp); + + emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count); + + switch (count) { + case 1: + spe_stqd(p, float_zero, out_ptr, 1 * 16); + case 2: + spe_stqd(p, float_zero, out_ptr, 2 * 16); + case 3: + spe_stqd(p, float_one, out_ptr, 3 * 16); + } + + if (float_zero != -1) { + spe_release_register(p, float_zero); + } + + if (float_one != -1) { + spe_release_register(p, float_one); + } +} + + +void cell_update_vertex_fetch(struct draw_context *draw) +{ +#if 0 + struct cell_context *const cell = + (struct cell_context *) draw->driver_private; + struct spe_function *p = &cell->attrib_fetch; + unsigned function_index[PIPE_MAX_ATTRIBS]; + unsigned unique_attr_formats; + int out_ptr; + int in_ptr; + int shuf_ptr; + unsigned i; + unsigned j; + + + /* Determine how many unique input attribute formats there are. At the + * same time, store the index of the lowest numbered attribute that has + * the same format as any non-unique format. + */ + unique_attr_formats = 1; + function_index[0] = 0; + for (i = 1; i < draw->vertex_fetch.nr_attrs; i++) { + const enum pipe_format curr_fmt = draw->vertex_element[i].src_format; + + for (j = 0; j < i; j++) { + if (curr_fmt == draw->vertex_element[j].src_format) { + break; + } + } + + if (j == i) { + unique_attr_formats++; + } + + function_index[i] = j; + } + + + /* Each fetch function can be a maximum of 34 instructions (note: this is + * actually a slight over-estimate). + */ + spe_init_func(p, 34 * SPE_INST_SIZE * unique_attr_formats); + + + /* Allocate registers for the function's input parameters. + */ + out_ptr = spe_allocate_register(p, 3); + in_ptr = spe_allocate_register(p, 4); + shuf_ptr = spe_allocate_register(p, 5); + + + /* Generate code for the individual attribute fetch functions. + */ + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + unsigned offset; + + if (function_index[i] == i) { + cell->attrib_fetch_offsets[i] = (unsigned) ((void *) p->csr + - (void *) p->store); + + offset = 0; + emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr, + draw->vertex_element[i].src_format); + spe_bi(p, 0, 0, 0); + + /* Round up to the next 16-byte boundary. + */ + if ((((unsigned) p->store) & 0x0f) != 0) { + const unsigned align = ((unsigned) p->store) & 0x0f; + p->store = (uint32_t *) (((void *) p->store) + align); + } + } else { + /* Use the same function entry-point as a previously seen attribute + * with the same format. + */ + cell->attrib_fetch_offsets[i] = + cell->attrib_fetch_offsets[function_index[i]]; + } + } +#else + assert(0); +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c new file mode 100644 index 0000000000..2b10c116fa --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -0,0 +1,146 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file cell_vertex_shader.c + * Vertex shader interface routines for Cell. + * + * \author Ian Romanick <idr@us.ibm.com> + */ + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" +#include "util/u_math.h" + +#include "cell_context.h" +#include "cell_draw_arrays.h" +#include "cell_flush.h" +#include "cell_spu.h" +#include "cell_batch.h" + +#include "cell/common.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" + +/** + * Run the vertex shader on all vertices in the vertex queue. + * Called by the draw module when the vertx cache needs to be flushed. + */ +void +cell_vertex_shader_queue_flush(struct draw_context *draw) +{ +#if 0 + struct cell_context *const cell = + (struct cell_context *) draw->driver_private; + struct cell_command_vs *const vs = &cell_global.command[0].vs; + uint64_t *batch; + struct cell_array_info *array_info; + unsigned i, j; + struct cell_attribute_fetch_code *cf; + + assert(draw->vs.queue_nr != 0); + + /* XXX: do this on statechange: + */ + draw_update_vertex_fetch(draw); + cell_update_vertex_fetch(draw); + + + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*cf)); + batch[0] = CELL_CMD_STATE_ATTRIB_FETCH; + cf = (struct cell_attribute_fetch_code *) (&batch[1]); + cf->base = (uint64_t) cell->attrib_fetch.store; + cf->size = ROUNDUP16((unsigned)((void *) cell->attrib_fetch.csr + - (void *) cell->attrib_fetch.store)); + + + for (i = 0; i < draw->vertex_fetch.nr_attrs; i++) { + const enum pipe_format format = draw->vertex_element[i].src_format; + const unsigned count = ((pf_size_x(format) != 0) + + (pf_size_y(format) != 0) + + (pf_size_z(format) != 0) + + (pf_size_w(format) != 0)); + const unsigned size = pf_size_x(format) * count; + + batch = cell_batch_alloc(cell, sizeof(batch[0]) + sizeof(*array_info)); + + batch[0] = CELL_CMD_STATE_VS_ARRAY_INFO; + + array_info = (struct cell_array_info *) &batch[1]; + assert(draw->vertex_fetch.src_ptr[i] != NULL); + array_info->base = (uintptr_t) draw->vertex_fetch.src_ptr[i]; + array_info->attr = i; + array_info->pitch = draw->vertex_fetch.pitch[i]; + array_info->size = size; + array_info->function_offset = cell->attrib_fetch_offsets[i]; + } + + batch = cell_batch_alloc(cell, sizeof(batch[0]) + + sizeof(struct pipe_viewport_state)); + batch[0] = CELL_CMD_STATE_VIEWPORT; + (void) memcpy(&batch[1], &draw->viewport, + sizeof(struct pipe_viewport_state)); + + { + uint64_t uniforms = (uintptr_t) draw->user.constants; + + batch = cell_batch_alloc(cell, 2 *sizeof(batch[0])); + batch[0] = CELL_CMD_STATE_UNIFORMS; + batch[1] = uniforms; + } + + cell_batch_flush(cell); + + vs->opcode = CELL_CMD_VS_EXECUTE; + vs->nr_attrs = draw->vertex_fetch.nr_attrs; + + (void) memcpy(vs->plane, draw->plane, sizeof(draw->plane)); + vs->nr_planes = draw->nr_planes; + + for (i = 0; i < draw->vs.queue_nr; i += SPU_VERTS_PER_BATCH) { + const unsigned n = MIN2(SPU_VERTS_PER_BATCH, draw->vs.queue_nr - i); + + for (j = 0; j < n; j++) { + vs->elts[j] = draw->vs.queue[i + j].elt; + vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; + } + + for (/* empty */; j < SPU_VERTS_PER_BATCH; j++) { + vs->elts[j] = vs->elts[0]; + vs->vOut[j] = (uintptr_t) draw->vs.queue[i + j].vertex; + } + + vs->num_elts = n; + send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); + + cell_flush_int(cell, CELL_FLUSH_WAIT); + } + + draw->vs.post_nr = draw->vs.queue_nr; + draw->vs.queue_nr = 0; +#else + assert(0); +#endif +} diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.c b/src/gallium/drivers/cell/ppu/cell_winsys.c new file mode 100644 index 0000000000..d570bbd2f9 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_winsys.c @@ -0,0 +1,40 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "cell_winsys.h" + + +struct cell_winsys * +cell_get_winsys(uint format) +{ + struct cell_winsys *cws = CALLOC_STRUCT(cell_winsys); + if (cws) + cws->preferredFormat = format; + return cws; +} diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.h b/src/gallium/drivers/cell/ppu/cell_winsys.h new file mode 100644 index 0000000000..ae2af5696b --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_winsys.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_WINSYS_H +#define CELL_WINSYS_H + +#include "pipe/p_compiler.h" + + +/** + * Very simple winsys at this time. + * Will probably eventually add SPU control info. + */ +struct cell_winsys +{ + uint preferredFormat; +}; + + +extern struct cell_winsys * +cell_get_winsys(uint format); + + + +#endif diff --git a/src/gallium/drivers/cell/spu/.gitignore b/src/gallium/drivers/cell/spu/.gitignore new file mode 100644 index 0000000000..2be9a2d324 --- /dev/null +++ b/src/gallium/drivers/cell/spu/.gitignore @@ -0,0 +1 @@ +g3d_spu diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile new file mode 100644 index 0000000000..116453b79c --- /dev/null +++ b/src/gallium/drivers/cell/spu/Makefile @@ -0,0 +1,82 @@ +# Gallium3D Cell driver: SPU code + +# This makefile builds the g3d_spu.a file that's linked into the +# PPU code/library. + + +TOP = ../../../../.. +include $(TOP)/configs/current + + +PROG = g3d + +PROG_SPU = $(PROG)_spu +PROG_SPU_A = $(PROG)_spu.a +PROG_SPU_EMBED_O = $(PROG)_spu-embed.o + + +SOURCES = \ + spu_command.c \ + spu_dcache.c \ + spu_funcs.c \ + spu_main.c \ + spu_per_fragment_op.c \ + spu_render.c \ + spu_texture.c \ + spu_tile.c \ + spu_tri.c + +OLD_SOURCES = \ + spu_exec.c \ + spu_util.c \ + spu_vertex_fetch.c \ + spu_vertex_shader.c + + +SPU_OBJECTS = $(SOURCES:.c=.o) \ + +SPU_ASM_OUT = $(SOURCES:.c=.s) \ + +INCLUDE_DIRS = \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/gallium/drivers + + +.c.o: + $(SPU_CC) $(SPU_CFLAGS) -c $< + +.c.s: + $(SPU_CC) $(SPU_CFLAGS) -O3 -S $< + + +# The .a file will be linked into the main/PPU executable +default: $(PROG_SPU_A) + +$(PROG_SPU_A): $(PROG_SPU_EMBED_O) + $(SPU_AR) $(SPU_AR_FLAGS) $(PROG_SPU_A) $(PROG_SPU_EMBED_O) + +$(PROG_SPU_EMBED_O): $(PROG_SPU) + $(SPU_EMBED) $(SPU_EMBED_FLAGS) $(PROG_SPU) $(PROG_SPU) $(PROG_SPU_EMBED_O) + +$(PROG_SPU): $(SPU_OBJECTS) + $(SPU_CC) -o $(PROG_SPU) $(SPU_OBJECTS) $(SPU_LFLAGS) + + + +asmfiles: $(SPU_ASM_OUT) + + +clean: + rm -f *~ *.o *.a *.d *.s $(PROG_SPU) + + + +depend: $(SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(SOURCES) 2> /dev/null + +include depend + diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h new file mode 100644 index 0000000000..d7ce005524 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_colorpack.h @@ -0,0 +1,145 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#ifndef SPU_COLORPACK_H +#define SPU_COLORPACK_H + + +#include <transpose_matrix4x4.h> +#include <spu_intrinsics.h> + + +static INLINE unsigned int +spu_pack_R8G8B8A8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + + out = spu_shuffle(out, out, ((vector unsigned char) { + 0, 4, 8, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 }) ); + + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_A8R8G8B8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, ((vector unsigned char) { + 12, 0, 4, 8, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}) ); + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_B8G8R8A8(vector float rgba) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, ((vector unsigned char) { + 8, 4, 0, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}) ); + return spu_extract(out, 0); +} + + +static INLINE unsigned int +spu_pack_color_shuffle(vector float rgba, vector unsigned char shuffle) +{ + vector unsigned int out = spu_convtu(rgba, 32); + out = spu_shuffle(out, out, shuffle); + return spu_extract(out, 0); +} + + +static INLINE vector float +spu_unpack_B8G8R8A8(uint color) +{ + vector unsigned int color_u4 = spu_splats(color); + color_u4 = spu_shuffle(color_u4, color_u4, + ((vector unsigned char) { + 2, 2, 2, 2, + 1, 1, 1, 1, + 0, 0, 0, 0, + 3, 3, 3, 3}) ); + return spu_convtf(color_u4, 32); +} + + +static INLINE vector float +spu_unpack_A8R8G8B8(uint color) +{ + vector unsigned int color_u4 = spu_splats(color); + color_u4 = spu_shuffle(color_u4, color_u4, + ((vector unsigned char) { + 1, 1, 1, 1, + 2, 2, 2, 2, + 3, 3, 3, 3, + 0, 0, 0, 0}) ); + return spu_convtf(color_u4, 32); +} + + +/** + * \param color_in - array of 32-bit packed ARGB colors + * \param color_out - returns float colors in RRRR, GGGG, BBBB, AAAA order + */ +static INLINE void +spu_unpack_A8R8G8B8_transpose4(const vector unsigned int color_in[4], + vector float color_out[4]) +{ + vector unsigned int c0; + + c0 = spu_shuffle(color_in[0], color_in[0], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[0] = spu_convtf(c0, 32); + + c0 = spu_shuffle(color_in[1], color_in[1], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[1] = spu_convtf(c0, 32); + + c0 = spu_shuffle(color_in[2], color_in[2], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[2] = spu_convtf(c0, 32); + + c0 = spu_shuffle(color_in[3], color_in[3], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[3] = spu_convtf(c0, 32); + + _transpose_matrix4x4(color_out, color_out); +} + + + +#endif /* SPU_COLORPACK_H */ diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c new file mode 100644 index 0000000000..a6ed29ea63 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -0,0 +1,757 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * SPU command processing code + */ + + +#include <stdio.h> +#include <libmisc.h> + +#include "pipe/p_defines.h" + +#include "spu_command.h" +#include "spu_main.h" +#include "spu_render.h" +#include "spu_per_fragment_op.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_vertex_shader.h" +#include "spu_dcache.h" +#include "cell/common.h" + + +struct spu_vs_context draw; + + +/** + * Buffers containing dynamically generated SPU code: + */ +static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] + ALIGN16_ATTRIB; + + + +static INLINE int +align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + + + +/** + * Tell the PPU that this SPU has finished copying a buffer to + * local store and that it may be reused by the PPU. + * This is done by writting a 16-byte batch-buffer-status block back into + * main memory (in cell_context->buffer_status[]). + */ +static void +release_buffer(uint buffer) +{ + /* Evidently, using less than a 16-byte status doesn't work reliably */ + static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE}; + const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); + uint *dst = spu.init.buffer_status + index; + + ASSERT(buffer < CELL_NUM_BUFFERS); + + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_MISC, /* tag is unimportant */ + 0, /* tid */ + 0 /* rid */); +} + + +/** + * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory. + * There's a qword of status per SPU. + */ +static void +cmd_fence(struct cell_command_fence *fence_cmd) +{ + static const vector unsigned int status = {CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED}; + uint *dst = (uint *) fence_cmd->fence; + dst += 4 * spu.init.id; /* main store/memory address, not local store */ + + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_FENCE, /* tag */ + 0, /* tid */ + 0 /* rid */); +} + + +static void +cmd_clear_surface(const struct cell_command_clear_surface *clear) +{ + D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); + + if (clear->surface == 0) { + spu.fb.color_clear_value = clear->value; + if (spu.init.debug_flags & CELL_DEBUG_CHECKER) { + uint x = (spu.init.id << 4) | (spu.init.id << 12) | + (spu.init.id << 20) | (spu.init.id << 28); + spu.fb.color_clear_value ^= x; + } + } + else { + spu.fb.depth_clear_value = clear->value; + } + +#define CLEAR_OPT 1 +#if CLEAR_OPT + + /* Simply set all tiles' status to CLEAR. + * When we actually begin rendering into a tile, we'll initialize it to + * the clear value. If any tiles go untouched during the frame, + * really_clear_tiles() will set them to the clear value. + */ + if (clear->surface == 0) { + memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); + } + else { + memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); + } + +#else + + /* + * This path clears the whole framebuffer to the clear color right now. + */ + + /* + printf("SPU: %s num=%d w=%d h=%d\n", + __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); + */ + + /* init a single tile to the clear value */ + if (clear->surface == 0) { + clear_c_tile(&spu.ctile); + } + else { + clear_z_tile(&spu.ztile); + } + + /* walk over my tiles, writing the 'clear' tile's data */ + { + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (clear->surface == 0) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + else + put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); + } + } + + if (spu.init.debug_flags & CELL_DEBUG_SYNC) { + wait_on_mask(1 << TAG_SURFACE_CLEAR); + } + +#endif /* CLEAR_OPT */ + + D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n"); +} + + +static void +cmd_release_verts(const struct cell_command_release_verts *release) +{ + D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf); + ASSERT(release->vertex_buf != ~0U); + release_buffer(release->vertex_buf); +} + + +/** + * Process a CELL_CMD_STATE_FRAGMENT_OPS command. + * This involves installing new fragment ops SPU code. + * If this function is never called, we'll use a regular C fallback function + * for fragment processing. + */ +static void +cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) +{ + static int warned = 0; + + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n"); + /* Copy SPU code from batch buffer to spu buffer */ + memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); + /* Copy state info (for fallback case only) */ + memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); + memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); + memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color)); + + /* Parity twist! For now, always use the fallback code by default, + * only switching to codegen when specifically requested. This + * allows us to develop freely without risking taking down the + * branch. + * + * Later, the parity of this check will be reversed, so that + * codegen is *always* used, unless we specifically indicate that + * we don't want it. + * + * Eventually, the option will be removed completely, because in + * final code we'll always use codegen and won't even provide the + * raw state records that the fallback code requires. + */ + if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) { + spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; + } + else { + /* otherwise, the default fallback code remains in place */ + if (!warned) { + fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); + warned = 1; + } + } + + spu.read_depth = spu.depth_stencil_alpha.depth.enabled; + spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; +} + + +static void +cmd_state_fragment_program(const struct cell_command_fragment_program *fp) +{ + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n"); + /* Copy SPU code from batch buffer to spu buffer */ + memcpy(spu.fragment_program_code, fp->code, + SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); +#if 01 + /* Point function pointer at new code */ + spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; +#endif +} + + +static uint +cmd_state_fs_constants(const uint64_t *buffer, uint pos) +{ + const uint num_const = buffer[pos + 1]; + const float *constants = (const float *) &buffer[pos + 2]; + uint i; + + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const); + + /* Expand each float to float[4] for SOA execution */ + for (i = 0; i < num_const; i++) { + D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]); + spu.constants[i] = spu_splats(constants[i]); + } + + /* return new buffer pos (in 8-byte words) */ + return pos + 2 + num_const / 2; +} + + +static void +cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) +{ + D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", + cmd->width, + cmd->height, + cmd->color_start, + cmd->color_format, + cmd->depth_format); + + ASSERT_ALIGN16(cmd->color_start); + ASSERT_ALIGN16(cmd->depth_start); + + spu.fb.color_start = cmd->color_start; + spu.fb.depth_start = cmd->depth_start; + spu.fb.color_format = cmd->color_format; + spu.fb.depth_format = cmd->depth_format; + spu.fb.width = cmd->width; + spu.fb.height = cmd->height; + spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; + spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; + + switch (spu.fb.depth_format) { + case PIPE_FORMAT_Z32_UNORM: + spu.fb.zsize = 4; + spu.fb.zscale = (float) 0xffffffffu; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + spu.fb.zsize = 4; + spu.fb.zscale = (float) 0x00ffffffu; + break; + case PIPE_FORMAT_Z16_UNORM: + spu.fb.zsize = 2; + spu.fb.zscale = (float) 0xffffu; + break; + default: + spu.fb.zsize = 0; + break; + } +} + + +/** + * Tex texture mask_s/t and scale_s/t fields depend on the texture size and + * sampler wrap modes. + */ +static void +update_tex_masks(struct spu_texture *texture, + const struct pipe_sampler_state *sampler) +{ + uint i; + + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + int width = texture->level[i].width; + int height = texture->level[i].height; + + if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) + texture->level[i].mask_s = spu_splats(width - 1); + else + texture->level[i].mask_s = spu_splats(~0); + + if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT) + texture->level[i].mask_t = spu_splats(height - 1); + else + texture->level[i].mask_t = spu_splats(~0); + + if (sampler->normalized_coords) { + texture->level[i].scale_s = spu_splats((float) width); + texture->level[i].scale_t = spu_splats((float) height); + } + else { + texture->level[i].scale_s = spu_splats(1.0f); + texture->level[i].scale_t = spu_splats(1.0f); + } + } +} + + +static void +cmd_state_sampler(const struct cell_command_sampler *sampler) +{ + uint unit = sampler->unit; + + D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit); + + spu.sampler[unit] = sampler->state; + + switch (spu.sampler[unit].min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear; + break; + case PIPE_TEX_FILTER_ANISO: + /* fall-through, for now */ + case PIPE_TEX_FILTER_NEAREST: + spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest; + break; + default: + ASSERT(0); + } + + switch (spu.sampler[sampler->unit].mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear; + break; + case PIPE_TEX_FILTER_ANISO: + /* fall-through, for now */ + case PIPE_TEX_FILTER_NEAREST: + spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest; + break; + default: + ASSERT(0); + } + + switch (spu.sampler[sampler->unit].min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + case PIPE_TEX_MIPFILTER_LINEAR: + spu.sample_texture_2d[unit] = sample_texture_2d_lod; + break; + case PIPE_TEX_MIPFILTER_NONE: + spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit]; + break; + default: + ASSERT(0); + } + + update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); +} + + +static void +cmd_state_texture(const struct cell_command_texture *texture) +{ + const uint unit = texture->unit; + uint i; + + D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit); + + spu.texture[unit].max_level = 0; + spu.texture[unit].target = texture->target; + + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + uint width = texture->width[i]; + uint height = texture->height[i]; + uint depth = texture->depth[i]; + + D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i, + texture->start[i], texture->width[i], texture->height[i]); + + spu.texture[unit].level[i].start = texture->start[i]; + spu.texture[unit].level[i].width = width; + spu.texture[unit].level[i].height = height; + spu.texture[unit].level[i].depth = depth; + + spu.texture[unit].level[i].tiles_per_row = + (width + TILE_SIZE - 1) / TILE_SIZE; + + spu.texture[unit].level[i].bytes_per_image = + 4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth; + + spu.texture[unit].level[i].max_s = spu_splats((int) width - 1); + spu.texture[unit].level[i].max_t = spu_splats((int) height - 1); + + if (texture->start[i]) + spu.texture[unit].max_level = i; + } + + update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); +} + + +static void +cmd_state_vertex_info(const struct vertex_info *vinfo) +{ + D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); + ASSERT(vinfo->num_attribs >= 1); + ASSERT(vinfo->num_attribs <= 8); + memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); +} + + +static void +cmd_state_vs_array_info(const struct cell_array_info *vs_info) +{ + const unsigned attr = vs_info->attr; + + ASSERT(attr < PIPE_MAX_ATTRIBS); + draw.vertex_fetch.src_ptr[attr] = vs_info->base; + draw.vertex_fetch.pitch[attr] = vs_info->pitch; + draw.vertex_fetch.size[attr] = vs_info->size; + draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; + draw.vertex_fetch.dirty = 1; +} + + +static void +cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) +{ + mfc_get(attribute_fetch_code_buffer, + (unsigned int) code->base, /* src */ + code->size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + draw.vertex_fetch.code = attribute_fetch_code_buffer; +} + + +static void +cmd_finish(void) +{ + D_PRINTF(CELL_DEBUG_CMD, "FINISH\n"); + really_clear_tiles(0); + /* wait for all outstanding DMAs to finish */ + mfc_write_tag_mask(~0); + mfc_read_tag_status_all(); + /* send mbox message to PPU */ + spu_write_out_mbox(CELL_CMD_FINISH); +} + + +/** + * Execute a batch of commands which was sent to us by the PPU. + * See the cell_emit_state.c code to see where the commands come from. + * + * The opcode param encodes the location of the buffer and its size. + */ +static void +cmd_batch(uint opcode) +{ + const uint buf = (opcode >> 8) & 0xff; + uint size = (opcode >> 16); + uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB; + const unsigned usize = size / sizeof(buffer[0]); + uint pos; + + D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n", + buf, size, spu.init.buffers[buf]); + + ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + size = ROUNDUP16(size); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + mfc_get(buffer, /* dest */ + (unsigned int) spu.init.buffers[buf], /* src */ + size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + /* Tell PPU we're done copying the buffer to local store */ + D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf); + release_buffer(buf); + + /* + * Loop over commands in the batch buffer + */ + for (pos = 0; pos < usize; /* no incr */) { + switch (buffer[pos]) { + /* + * rendering commands + */ + case CELL_CMD_CLEAR_SURFACE: + { + struct cell_command_clear_surface *clr + = (struct cell_command_clear_surface *) &buffer[pos]; + cmd_clear_surface(clr); + pos += sizeof(*clr) / 8; + } + break; + case CELL_CMD_RENDER: + { + struct cell_command_render *render + = (struct cell_command_render *) &buffer[pos]; + uint pos_incr; + cmd_render(render, &pos_incr); + pos += pos_incr; + } + break; + /* + * state-update commands + */ + case CELL_CMD_STATE_FRAMEBUFFER: + { + struct cell_command_framebuffer *fb + = (struct cell_command_framebuffer *) &buffer[pos]; + cmd_state_framebuffer(fb); + pos += sizeof(*fb) / 8; + } + break; + case CELL_CMD_STATE_FRAGMENT_OPS: + { + struct cell_command_fragment_ops *fops + = (struct cell_command_fragment_ops *) &buffer[pos]; + cmd_state_fragment_ops(fops); + pos += sizeof(*fops) / 8; + } + break; + case CELL_CMD_STATE_FRAGMENT_PROGRAM: + { + struct cell_command_fragment_program *fp + = (struct cell_command_fragment_program *) &buffer[pos]; + cmd_state_fragment_program(fp); + pos += sizeof(*fp) / 8; + } + break; + case CELL_CMD_STATE_FS_CONSTANTS: + pos = cmd_state_fs_constants(buffer, pos); + break; + case CELL_CMD_STATE_RASTERIZER: + { + struct cell_command_rasterizer *rast = + (struct cell_command_rasterizer *) &buffer[pos]; + spu.rasterizer = rast->rasterizer; + pos += sizeof(*rast) / 8; + } + break; + case CELL_CMD_STATE_SAMPLER: + { + struct cell_command_sampler *sampler + = (struct cell_command_sampler *) &buffer[pos]; + cmd_state_sampler(sampler); + pos += sizeof(*sampler) / 8; + } + break; + case CELL_CMD_STATE_TEXTURE: + { + struct cell_command_texture *texture + = (struct cell_command_texture *) &buffer[pos]; + cmd_state_texture(texture); + pos += sizeof(*texture) / 8; + } + break; + case CELL_CMD_STATE_VERTEX_INFO: + cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8); + break; + case CELL_CMD_STATE_VIEWPORT: + (void) memcpy(& draw.viewport, &buffer[pos+1], + sizeof(struct pipe_viewport_state)); + pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); + break; + case CELL_CMD_STATE_UNIFORMS: + draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1]; + pos += 2; + break; + case CELL_CMD_STATE_VS_ARRAY_INFO: + cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); + break; + case CELL_CMD_STATE_BIND_VS: +#if 0 + spu_bind_vertex_shader(&draw, + (struct cell_shader_info *) &buffer[pos+1]); +#endif + pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); + break; + case CELL_CMD_STATE_ATTRIB_FETCH: + cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) + &buffer[pos+1]); + pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); + break; + /* + * misc commands + */ + case CELL_CMD_FINISH: + cmd_finish(); + pos += 1; + break; + case CELL_CMD_FENCE: + { + struct cell_command_fence *fence_cmd = + (struct cell_command_fence *) &buffer[pos]; + cmd_fence(fence_cmd); + pos += sizeof(*fence_cmd) / 8; + } + break; + case CELL_CMD_RELEASE_VERTS: + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) &buffer[pos]; + cmd_release_verts(release); + pos += sizeof(*release) / 8; + } + break; + case CELL_CMD_FLUSH_BUFFER_RANGE: { + struct cell_buffer_range *br = (struct cell_buffer_range *) + &buffer[pos+1]; + + spu_dcache_mark_dirty((unsigned) br->base, br->size); + pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8); + break; + } + default: + printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); + ASSERT(0); + break; + } + } + + D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n"); +} + + +#define PERF 0 + + +/** + * Main loop for SPEs: Get a command, execute it, repeat. + */ +void +command_loop(void) +{ + int exitFlag = 0; + uint t0, t1; + + D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); + + while (!exitFlag) { + unsigned opcode; + + D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); + + if (PERF) + spu_write_decrementer(~0); + + /* read/wait from mailbox */ + opcode = (unsigned int) spu_read_in_mbox(); + D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); + + if (PERF) + t0 = spu_read_decrementer(); + + switch (opcode & CELL_CMD_OPCODE_MASK) { + case CELL_CMD_EXIT: + D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); + exitFlag = 1; + break; + case CELL_CMD_VS_EXECUTE: +#if 0 + spu_execute_vertex_shader(&draw, &cmd.vs); +#endif + break; + case CELL_CMD_BATCH: + cmd_batch(opcode); + break; + default: + printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); + } + + if (PERF) { + t1 = spu_read_decrementer(); + printf("wait mbox time: %gms batch time: %gms\n", + (~0u - t0) * spu.init.inv_timebase, + (t0 - t1) * spu.init.inv_timebase); + } + } + + D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); + + if (spu.init.debug_flags & CELL_DEBUG_CACHE) + spu_dcache_report(); +} diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h new file mode 100644 index 0000000000..853e9aa549 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_command.h @@ -0,0 +1,7 @@ + + + +extern void +command_loop(void); + + diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c new file mode 100644 index 0000000000..a6d67634fd --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_dcache.c @@ -0,0 +1,145 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "cell/common.h" +#include "spu_main.h" +#include "spu_dcache.h" + +#define CACHELINE_LOG2SIZE 7 +#define LINE_SIZE (1U << 7) +#define ALIGN_MASK (~(LINE_SIZE - 1)) + +#define CACHE_NAME data +#define CACHED_TYPE qword +#define CACHE_TYPE CACHE_TYPE_RO +#define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0) +#define CACHE_LOG2NNWAY 2 +#define CACHE_LOG2NSETS 6 +#ifdef DEBUG +#define CACHE_STATS 1 +#endif +#include <cache-api.h> + +/* Yes folks, this is ugly. + */ +#undef CACHE_NWAY +#undef CACHE_NSETS +#define CACHE_NAME data +#define CACHE_NWAY 4 +#define CACHE_NSETS (1U << 6) + + +/** + * Fetch between arbitrary number of bytes from an unaligned address + * + * \param dst Destination data buffer + * \param ea Main memory effective address of source data + * \param size Number of bytes to read + * + * \warning + * As is hinted by the type of the \c dst pointer, this function writes + * multiples of 16-bytes. + */ +void +spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size) +{ + const int shift = ea & 0x0f; + const unsigned read_size = ROUNDUP16(size + shift); + const unsigned last_read = ROUNDUP16(ea + size); + const qword *const last_write = dst + (ROUNDUP16(size) / 16); + unsigned i; + + + if (shift == 0) { + /* Data is already aligned. Fetch directly into the destination buffer. + */ + for (i = 0; i < size; i += 16) { + *(dst++) = cache_rd(data, ea + i); + } + } else { + qword hi; + + + /* Please exercise extreme caution when modifying this code. This code + * must not read past the end of the page containing the source data, + * and it must not write more than ((size + 15) / 16) qwords to the + * destination buffer. + */ + ea &= ~0x0f; + hi = cache_rd(data, ea); + for (i = 16; i < read_size; i += 16) { + qword lo = cache_rd(data, ea + i); + + *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), + (qword) spu_rlmaskqwbyte(lo, shift - 16)); + hi = lo; + } + + if (dst != last_write) { + *(dst++) = si_or((qword) spu_slqwbyte(hi, shift), si_il(0)); + } + } + + ASSERT((ea + i) == last_read); + ASSERT(dst == last_write); +} + + +/** + * Notify the cache that a range of main memory may have been modified + */ +void +spu_dcache_mark_dirty(unsigned ea, unsigned size) +{ + unsigned i; + const unsigned aligned_start = (ea & ALIGN_MASK); + const unsigned aligned_end = (ea + size + (LINE_SIZE - 1)) + & ALIGN_MASK; + + + for (i = 0; i < (CACHE_NWAY * CACHE_NSETS); i++) { + const unsigned entry = __cache_dir[i]; + const unsigned addr = entry & ~0x0f; + + __cache_dir[i] = ((addr >= aligned_start) && (addr < aligned_end)) + ? (entry & ~CACHELINE_VALID) : entry; + } +} + + +/** + * Print cache utilization report + */ +void +spu_dcache_report(void) +{ +#ifdef CACHE_STATS + if (spu.init.id == 0) { + printf("SPU 0: Texture cache report:\n"); + cache_pr_stats(data); + } +#endif +} + + diff --git a/src/gallium/drivers/cell/spu/spu_dcache.h b/src/gallium/drivers/cell/spu/spu_dcache.h new file mode 100644 index 0000000000..39a19eb31b --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_dcache.h @@ -0,0 +1,37 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef SPU_DCACHE_H +#define SPU_DCACHE_H + +extern void +spu_dcache_fetch_unaligned(qword *dst, unsigned ea, unsigned size); + +extern void +spu_dcache_mark_dirty(unsigned ea, unsigned size); + +extern void +spu_dcache_report(void); + +#endif /* SPU_DCACHE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c new file mode 100644 index 0000000000..e27df2dfb3 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -0,0 +1,1933 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI interpretor/executor. + * + * Flow control information: + * + * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) + * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special + * care since a condition may be true for some quad components but false + * for other components. + * + * We basically execute all statements (even if they're in the part of + * an IF/ELSE clause that's "not taken") and use a special mask to + * control writing to destination registers. This is the ExecMask. + * See store_dest(). + * + * The ExecMask is computed from three other masks (CondMask, LoopMask and + * ContMask) which are controlled by the flow control instructions (namely: + * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). + * + * + * Authors: + * Michal Krol + * Brian Paul + */ + +#include <transpose_matrix4x4.h> +#include <simdmath/ceilf4.h> +#include <simdmath/cosf4.h> +#include <simdmath/divf4.h> +#include <simdmath/floorf4.h> +#include <simdmath/log2f4.h> +#include <simdmath/powf4.h> +#include <simdmath/sinf4.h> +#include <simdmath/sqrtf4.h> +#include <simdmath/truncf4.h> + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "spu_exec.h" +#include "spu_main.h" +#include "spu_vertex_shader.h" +#include "spu_dcache.h" +#include "cell/common.h" + +#define TILE_TOP_LEFT 0 +#define TILE_TOP_RIGHT 1 +#define TILE_BOTTOM_LEFT 2 +#define TILE_BOTTOM_RIGHT 3 + +/* + * Shorthand locations of various utility registers (_I = Index, _C = Channel) + */ +#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I +#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C +#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I +#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C +#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I +#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C +#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I +#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C +#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C +#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I +#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C +#define TEMP_128_I TGSI_EXEC_TEMP_128_I +#define TEMP_128_C TGSI_EXEC_TEMP_128_C +#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I +#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C +#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I +#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C +#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I +#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C +#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I +#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +#define FOR_EACH_CHANNEL(CHAN)\ + for (CHAN = 0; CHAN < 4; CHAN++) + +#define IS_CHANNEL_ENABLED(INST, CHAN)\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IS_CHANNEL_ENABLED2(INST, CHAN)\ + ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + +#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED2( INST, CHAN )) + + +/** The execution mask depends on the conditional mask and the loop mask */ +#define UPDATE_EXEC_MASK(MACH) \ + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + + + +/** + * Initialize machine state by expanding tokens to full instructions, + * allocating temporary storage, setting up constants, etc. + * After this, we can call spu_exec_machine_run() many times. + */ +void +spu_exec_machine_init(struct spu_exec_machine *mach, + uint numSamplers, + struct spu_sampler *samplers, + unsigned processor) +{ + const qword zero = si_il(0); + const qword not_zero = si_il(~0); + + (void) numSamplers; + mach->Samplers = samplers; + mach->Processor = processor; + mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; + + /* Setup constants. */ + mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q = zero; + mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].q = not_zero; + mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].q = si_shli(not_zero, -1); + mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].q = si_shli(not_zero, 31); + + mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q = (qword) spu_splats(1.0f); + mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q = (qword) spu_splats(2.0f); + mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q = (qword) spu_splats(128.0f); + mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q = (qword) spu_splats(-128.0f); +} + + +static INLINE qword +micro_abs(qword src) +{ + return si_rotmi(si_shli(src, 1), -1); +} + +static INLINE qword +micro_ceil(qword src) +{ + return (qword) _ceilf4((vec_float4) src); +} + +static INLINE qword +micro_cos(qword src) +{ + return (qword) _cosf4((vec_float4) src); +} + +static const qword br_shuf = { + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, + TILE_BOTTOM_RIGHT + 0, TILE_BOTTOM_RIGHT + 1, + TILE_BOTTOM_RIGHT + 2, TILE_BOTTOM_RIGHT + 3, +}; + +static const qword bl_shuf = { + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, + TILE_BOTTOM_LEFT + 0, TILE_BOTTOM_LEFT + 1, + TILE_BOTTOM_LEFT + 2, TILE_BOTTOM_LEFT + 3, +}; + +static const qword tl_shuf = { + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, + TILE_TOP_LEFT + 0, TILE_TOP_LEFT + 1, + TILE_TOP_LEFT + 2, TILE_TOP_LEFT + 3, +}; + +static qword +micro_ddx(qword src) +{ + qword bottom_right = si_shufb(src, src, br_shuf); + qword bottom_left = si_shufb(src, src, bl_shuf); + + return si_fs(bottom_right, bottom_left); +} + +static qword +micro_ddy(qword src) +{ + qword top_left = si_shufb(src, src, tl_shuf); + qword bottom_left = si_shufb(src, src, bl_shuf); + + return si_fs(top_left, bottom_left); +} + +static INLINE qword +micro_div(qword src0, qword src1) +{ + return (qword) _divf4((vec_float4) src0, (vec_float4) src1); +} + +static qword +micro_flr(qword src) +{ + return (qword) _floorf4((vec_float4) src); +} + +static qword +micro_frc(qword src) +{ + return si_fs(src, (qword) _floorf4((vec_float4) src)); +} + +static INLINE qword +micro_ge(qword src0, qword src1) +{ + return si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); +} + +static qword +micro_lg2(qword src) +{ + return (qword) _log2f4((vec_float4) src); +} + +static INLINE qword +micro_lt(qword src0, qword src1) +{ + const qword tmp = si_or(si_fceq(src0, src1), si_fcgt(src0, src1)); + + return si_xori(tmp, 0xff); +} + +static INLINE qword +micro_max(qword src0, qword src1) +{ + return si_selb(src1, src0, si_fcgt(src0, src1)); +} + +static INLINE qword +micro_min(qword src0, qword src1) +{ + return si_selb(src0, src1, si_fcgt(src0, src1)); +} + +static qword +micro_neg(qword src) +{ + return si_xor(src, (qword) spu_splats(0x80000000)); +} + +static qword +micro_set_sign(qword src) +{ + return si_or(src, (qword) spu_splats(0x80000000)); +} + +static qword +micro_pow(qword src0, qword src1) +{ + return (qword) _powf4((vec_float4) src0, (vec_float4) src1); +} + +static qword +micro_rnd(qword src) +{ + const qword half = (qword) spu_splats(0.5f); + + /* May be able to use _roundf4. There may be some difference, though. + */ + return (qword) _floorf4((vec_float4) si_fa(src, half)); +} + +static INLINE qword +micro_ishr(qword src0, qword src1) +{ + return si_rotma(src0, si_sfi(src1, 0)); +} + +static qword +micro_trunc(qword src) +{ + return (qword) _truncf4((vec_float4) src); +} + +static qword +micro_sin(qword src) +{ + return (qword) _sinf4((vec_float4) src); +} + +static INLINE qword +micro_sqrt(qword src) +{ + return (qword) _sqrtf4((vec_float4) src); +} + +static void +fetch_src_file_channel( + const struct spu_exec_machine *mach, + const uint file, + const uint swizzle, + const union spu_exec_channel *index, + union spu_exec_channel *chan ) +{ + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( file ) { + case TGSI_FILE_CONSTANT: { + unsigned i; + + for (i = 0; i < 4; i++) { + const float *ptr = mach->Consts[index->i[i]]; + float tmp[4]; + + spu_dcache_fetch_unaligned((qword *) tmp, + (uintptr_t)(ptr + swizzle), + sizeof(float)); + + chan->f[i] = tmp[0]; + } + break; + } + + case TGSI_FILE_INPUT: + chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_TEMPORARY: + chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_IMMEDIATE: + ASSERT( index->i[0] < (int) mach->ImmLimit ); + ASSERT( index->i[1] < (int) mach->ImmLimit ); + ASSERT( index->i[2] < (int) mach->ImmLimit ); + ASSERT( index->i[3] < (int) mach->ImmLimit ); + + chan->f[0] = mach->Imms[index->i[0]][swizzle]; + chan->f[1] = mach->Imms[index->i[1]][swizzle]; + chan->f[2] = mach->Imms[index->i[2]][swizzle]; + chan->f[3] = mach->Imms[index->i[3]][swizzle]; + break; + + case TGSI_FILE_ADDRESS: + chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_OUTPUT: + /* vertex/fragment output vars can be read too */ + chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + default: + ASSERT( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; + break; + + case TGSI_EXTSWIZZLE_ONE: + *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; + break; + + default: + ASSERT( 0 ); + } +} + +static void +fetch_source( + const struct spu_exec_machine *mach, + union spu_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index ) +{ + union spu_exec_channel index; + uint swizzle; + + index.i[0] = + index.i[1] = + index.i[2] = + index.i[3] = reg->SrcRegister.Index; + + if (reg->SrcRegister.Indirect) { + union spu_exec_channel index2; + union spu_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle(®->SrcRegisterInd, + CHAN_X); + fetch_src_file_channel( + mach, + reg->SrcRegisterInd.File, + swizzle, + &index2, + &indir_index ); + + index.q = si_a(index.q, indir_index.q); + } + + if( reg->SrcRegister.Dimension ) { + switch( reg->SrcRegister.File ) { + case TGSI_FILE_INPUT: + index.q = si_mpyi(index.q, 17); + break; + case TGSI_FILE_CONSTANT: + index.q = si_shli(index.q, 12); + break; + default: + ASSERT( 0 ); + } + + index.i[0] += reg->SrcRegisterDim.Index; + index.i[1] += reg->SrcRegisterDim.Index; + index.i[2] += reg->SrcRegisterDim.Index; + index.i[3] += reg->SrcRegisterDim.Index; + + if (reg->SrcRegisterDim.Indirect) { + union spu_exec_channel index2; + union spu_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterDimInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterDimInd.File, + swizzle, + &index2, + &indir_index ); + + index.q = si_a(index.q, indir_index.q); + } + } + + swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + fetch_src_file_channel( + mach, + reg->SrcRegister.File, + swizzle, + &index, + chan ); + + switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { + case TGSI_UTIL_SIGN_CLEAR: + chan->q = micro_abs(chan->q); + break; + + case TGSI_UTIL_SIGN_SET: + chan->q = micro_set_sign(chan->q); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + chan->q = micro_neg(chan->q); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } + + if (reg->SrcRegisterExtMod.Complement) { + chan->q = si_fs(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, chan->q); + } +} + +static void +store_dest( + struct spu_exec_machine *mach, + const union spu_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index ) +{ + union spu_exec_channel *dst; + + switch( reg->DstRegister.File ) { + case TGSI_FILE_NULL: + return; + + case TGSI_FILE_OUTPUT: + dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + + reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_TEMPORARY: + dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_ADDRESS: + dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; + break; + + default: + ASSERT( 0 ); + return; + } + + switch (inst->Instruction.Saturate) + { + case TGSI_SAT_NONE: + if (mach->ExecMask & 0x1) + dst->i[0] = chan->i[0]; + if (mach->ExecMask & 0x2) + dst->i[1] = chan->i[1]; + if (mach->ExecMask & 0x4) + dst->i[2] = chan->i[2]; + if (mach->ExecMask & 0x8) + dst->i[3] = chan->i[3]; + break; + + case TGSI_SAT_ZERO_ONE: + /* XXX need to obey ExecMask here */ + dst->q = micro_max(chan->q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + dst->q = micro_min(dst->q, mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + ASSERT( 0 ); + break; + + default: + ASSERT( 0 ); + } +} + +#define FETCH(VAL,INDEX,CHAN)\ + fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + +#define STORE(VAL,INDEX,CHAN)\ + store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + + +/** + * Execute ARB-style KIL which is predicated by a src register. + * Kill fragment if any of the four values is less than zero. + */ +static void +exec_kil(struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint uniquemask; + uint chan_index; + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + union spu_exec_channel r[1]; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + for (chan_index = 0; chan_index < 4; chan_index++) + { + uint swizzle; + uint i; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle ( + &inst->FullSrcRegisters[0], + chan_index); + + /* check if the component has not been already tested */ + if (uniquemask & (1 << swizzle)) + continue; + uniquemask |= 1 << swizzle; + + FETCH(&r[0], 0, chan_index); + for (i = 0; i < 4; i++) + if (r[0].f[i] < 0.0f) + kilmask |= 1 << i; + } + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + +/** + * Execute NVIDIA-style KIL which is predicated by a condition code. + * Kill fragment if the condition code is TRUE. + */ +static void +exec_kilp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + + /* TODO: build kilmask from CC mask */ + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + +/* + * Fetch a texel using STR texture coordinates. + */ +static void +fetch_texel( struct spu_sampler *sampler, + const union spu_exec_channel *s, + const union spu_exec_channel *t, + const union spu_exec_channel *p, + float lodbias, /* XXX should be float[4] */ + union spu_exec_channel *r, + union spu_exec_channel *g, + union spu_exec_channel *b, + union spu_exec_channel *a ) +{ + qword rgba[4]; + qword out[4]; + + sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, + (float (*)[4]) rgba); + + _transpose_matrix4x4((vec_float4 *) out, (vec_float4 *) rgba); + r->q = out[0]; + g->q = out[1]; + b->q = out[2]; + a->q = out[3]; +} + + +static void +exec_tex(struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst, + boolean biasLod, boolean projected) +{ + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + union spu_exec_channel r[8]; + uint chan_index; + float lodBias; + + /* printf("Sampler %u unit %u\n", sampler, unit); */ + + switch (inst->InstructionExtTexture.Texture) { + case TGSI_TEXTURE_1D: + + FETCH(&r[0], 0, CHAN_X); + + if (projected) { + FETCH(&r[1], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[1].q); + } + + if (biasLod) { + FETCH(&r[1], 0, CHAN_W); + lodBias = r[2].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (projected) { + FETCH(&r[3], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[3].q); + r[1].q = micro_div(r[1].q, r[3].q); + r[2].q = micro_div(r[2].q, r[3].q); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (projected) { + FETCH(&r[3], 0, CHAN_W); + r[0].q = micro_div(r[0].q, r[3].q); + r[1].q = micro_div(r[1].q, r[3].q); + r[2].q = micro_div(r[2].q, r[3].q); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + ASSERT (0); + } + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[chan_index], 0, chan_index ); + } +} + + + +static void +constant_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; + } +} + +static void +linear_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + mach->Inputs[attrib].xyzw[chan].f[0] = a0; + mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; + mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; + mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; +} + +static void +perspective_interpolation( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + const float *w = mach->QuadPos.xyzw[3].f; + /* divide by W here */ + mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; + mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; + mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; + mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; +} + + +typedef void (* interpolation_func)( + struct spu_exec_machine *mach, + unsigned attrib, + unsigned chan ); + +static void +exec_declaration(struct spu_exec_machine *mach, + const struct tgsi_full_declaration *decl) +{ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + interpolation_func interp; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + interp = constant_interpolation; + break; + + case TGSI_INTERPOLATE_LINEAR: + interp = linear_interpolation; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + interp = perspective_interpolation; + break; + + default: + ASSERT( 0 ); + } + + if( mask == TGSI_WRITEMASK_XYZW ) { + unsigned i, j; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + interp( mach, i, j ); + } + } + } + else { + unsigned i, j; + + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + interp( mach, i, j ); + } + } + } + } + } + } +} + +static void +exec_instruction( + struct spu_exec_machine *mach, + const struct tgsi_full_instruction *inst, + int *pc ) +{ + uint chan_index; + union spu_exec_channel r[8]; + + (*pc)++; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_cflts(r[0].q, 0); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_X ); + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + r[0].q = micro_max(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[1], 0, CHAN_Y ); + r[1].q = micro_max(r[1].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + + FETCH( &r[2], 0, CHAN_W ); + r[2].q = micro_min(r[2].q, mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].q); + r[2].q = micro_max(r[2].q, mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].q); + r[1].q = micro_pow(r[1].q, r[2].q); + + /* r0 = (r0 > 0.0) ? r1 : 0.0 + */ + r[0].q = si_fcgt(r[0].q, mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q); + r[0].q = si_selb(mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].q, r[1].q, + r[0].q); + STORE( &r[0], 0, CHAN_Z ); + } + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_sqrt(r[0].q); + r[0].q = micro_div(mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].q, r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + ASSERT (0); + break; + + case TGSI_OPCODE_LOG: + ASSERT (0); + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = si_fm(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_fa(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + + FETCH( &r[1], 0, CHAN_Z ); + FETCH( &r[2], 1, CHAN_Z ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_W); + FETCH(&r[2], 1, CHAN_W); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + FETCH( &r[0], 0, CHAN_Y ); + FETCH( &r[1], 1, CHAN_Y); + r[0].q = si_fm(r[0].q, r[1].q); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_Z ); + STORE( &r[0], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + FETCH( &r[0], 1, CHAN_W ); + STORE( &r[0], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = micro_min(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = micro_max(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = micro_ge(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = micro_ge(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + FETCH( &r[2], 2, chan_index ); + r[0].q = si_fma(r[0].q, r[1].q, r[2].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + r[0].q = si_fs(r[0].q, r[1].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + r[1].q = si_fs(r[1].q, r[2].q); + r[0].q = si_fma(r[0].q, r[1].q, r[2].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_CND: + ASSERT (0); + break; + + case TGSI_OPCODE_CND0: + ASSERT (0); + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + ASSERT (0); + break; + + case TGSI_OPCODE_INDEX: + ASSERT (0); + break; + + case TGSI_OPCODE_NEGATE: + ASSERT (0); + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_frc(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + ASSERT (0); + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_flr(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_rnd(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH(&r[0], 0, CHAN_X); + + r[0].q = micro_pow(mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_lg2(r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = micro_pow(r[0].q, r[1].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + FETCH(&r[0], 0, CHAN_Y); + FETCH(&r[1], 1, CHAN_Z); + FETCH(&r[3], 0, CHAN_Z); + FETCH(&r[4], 1, CHAN_Y); + + /* r2 = (r0 * r1) - (r3 * r5) + */ + r[2].q = si_fm(r[3].q, r[5].q); + r[2].q = si_fms(r[0].q, r[1].q, r[2].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[2], 0, CHAN_X ); + } + + FETCH(&r[2], 1, CHAN_X); + FETCH(&r[5], 0, CHAN_X); + + /* r3 = (r3 * r2) - (r1 * r5) + */ + r[1].q = si_fm(r[1].q, r[5].q); + r[3].q = si_fms(r[3].q, r[2].q, r[1].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + STORE( &r[3], 0, CHAN_Y ); + } + + /* r5 = (r5 * r4) - (r0 * r2) + */ + r[0].q = si_fm(r[0].q, r[2].q); + r[5].q = si_fms(r[5].q, r[4].q, r[0].q); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[5], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + ASSERT (0); + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + + r[0].q = micro_abs(r[0].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_RCC: + ASSERT (0); + break; + + case TGSI_OPCODE_DPH: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FETCH(&r[1], 1, CHAN_W); + + r[0].q = si_fa(r[0].q, r[1].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH(&r[0], 0, CHAN_X); + + r[0].q = micro_cos(r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ddx(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDY: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ddy(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_KILP: + exec_kilp (mach, inst); + break; + + case TGSI_OPCODE_KIL: + exec_kil (mach, inst); + break; + + case TGSI_OPCODE_PK2H: + ASSERT (0); + break; + + case TGSI_OPCODE_PK2US: + ASSERT (0); + break; + + case TGSI_OPCODE_PK4B: + ASSERT (0); + break; + + case TGSI_OPCODE_PK4UB: + ASSERT (0); + break; + + case TGSI_OPCODE_RFL: + ASSERT (0); + break; + + case TGSI_OPCODE_SEQ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fceq(r[0].q, r[1].q); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SFL: + ASSERT (0); + break; + + case TGSI_OPCODE_SGT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_fcgt(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SIN: + FETCH( &r[0], 0, CHAN_X ); + r[0].q = micro_sin(r[0].q); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fcgt(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SNE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_fceq(r[0].q, r[1].q); + r[0].q = si_xori(r[0].q, 0xff); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_STR: + ASSERT (0); + break; + + case TGSI_OPCODE_TEX: + /* simple texture lookup */ + /* src[0] = texcoord */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE, FALSE); + break; + + case TGSI_OPCODE_TXB: + /* Texture lookup with lod bias */ + /* src[0] = texcoord (src[0].w = load bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXD: + /* Texture lookup with explict partial derivatives */ + /* src[0] = texcoord */ + /* src[1] = d[strq]/dx */ + /* src[2] = d[strq]/dy */ + /* src[3] = sampler unit */ + ASSERT (0); + break; + + case TGSI_OPCODE_TXL: + /* Texture lookup with explit LOD */ + /* src[0] = texcoord (src[0].w = load bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXP: + /* Texture lookup with projection */ + /* src[0] = texcoord (src[0].w = projection) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE, TRUE); + break; + + case TGSI_OPCODE_UP2H: + ASSERT (0); + break; + + case TGSI_OPCODE_UP2US: + ASSERT (0); + break; + + case TGSI_OPCODE_UP4B: + ASSERT (0); + break; + + case TGSI_OPCODE_UP4UB: + ASSERT (0); + break; + + case TGSI_OPCODE_X2D: + ASSERT (0); + break; + + case TGSI_OPCODE_ARA: + ASSERT (0); + break; + + case TGSI_OPCODE_ARR: + ASSERT (0); + break; + + case TGSI_OPCODE_BRA: + ASSERT (0); + break; + + case TGSI_OPCODE_CAL: + /* skip the call if no execution channels are enabled */ + if (mach->ExecMask) { + /* do the call */ + + /* push the Cond, Loop, Cont stacks */ + ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + + ASSERT(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; + + /* note that PC was already incremented above */ + mach->CallStack[mach->CallStackTop++] = *pc; + *pc = inst->InstructionExtLabel.Label; + } + break; + + case TGSI_OPCODE_RET: + mach->FuncMask &= ~mach->ExecMask; + UPDATE_EXEC_MASK(mach); + + if (mach->ExecMask == 0x0) { + /* really return now (otherwise, keep executing */ + + if (mach->CallStackTop == 0) { + /* returning from main() */ + *pc = -1; + return; + } + *pc = mach->CallStack[--mach->CallStackTop]; + + /* pop the Cond, Loop, Cont stacks */ + ASSERT(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + ASSERT(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + ASSERT(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + ASSERT(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + UPDATE_EXEC_MASK(mach); + } + break; + + case TGSI_OPCODE_SSG: + ASSERT (0); + break; + + case TGSI_OPCODE_CMP: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + /* r0 = (r0 < 0.0) ? r1 : r2 + */ + r[3].q = si_xor(r[3].q, r[3].q); + r[0].q = micro_lt(r[0].q, r[3].q); + r[0].q = si_selb(r[1].q, r[2].q, r[0].q); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_SCS: + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( &r[0], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + r[1].q = micro_cos(r[0].q); + STORE( &r[1], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + r[1].q = micro_sin(r[0].q); + STORE( &r[1], 0, CHAN_Y ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_NRM: + ASSERT (0); + break; + + case TGSI_OPCODE_DIV: + ASSERT( 0 ); + break; + + case TGSI_OPCODE_DP2: + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + r[0].q = si_fm(r[0].q, r[1].q); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + r[0].q = si_fma(r[1].q, r[2].q, r[0].q); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_IF: + /* push CondMask */ + ASSERT(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + FETCH( &r[0], 0, CHAN_X ); + /* update CondMask */ + if( ! r[0].u[0] ) { + mach->CondMask &= ~0x1; + } + if( ! r[0].u[1] ) { + mach->CondMask &= ~0x2; + } + if( ! r[0].u[2] ) { + mach->CondMask &= ~0x4; + } + if( ! r[0].u[3] ) { + mach->CondMask &= ~0x8; + } + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ELSE */ + break; + + case TGSI_OPCODE_ELSE: + /* invert CondMask wrt previous mask */ + { + uint prevMask; + ASSERT(mach->CondStackTop > 0); + prevMask = mach->CondStack[mach->CondStackTop - 1]; + mach->CondMask = ~mach->CondMask & prevMask; + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ENDIF */ + } + break; + + case TGSI_OPCODE_ENDIF: + /* pop CondMask */ + ASSERT(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_END: + /* halt execution */ + *pc = -1; + break; + + case TGSI_OPCODE_REP: + ASSERT (0); + break; + + case TGSI_OPCODE_ENDREP: + ASSERT (0); + break; + + case TGSI_OPCODE_PUSHA: + ASSERT (0); + break; + + case TGSI_OPCODE_POPA: + ASSERT (0); + break; + + case TGSI_OPCODE_CEIL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_ceil(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_I2F: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_csflt(r[0].q, 0); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_NOT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = si_xorbi(r[0].q, 0xff); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_TRUNC: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + r[0].q = micro_trunc(r[0].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + + r[0].q = si_shl(r[0].q, r[1].q); + + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = micro_ishr(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_AND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_and(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_OR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_or(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOD: + ASSERT (0); + break; + + case TGSI_OPCODE_XOR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + r[0].q = si_xor(r[0].q, r[1].q); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SAD: + ASSERT (0); + break; + + case TGSI_OPCODE_TXF: + ASSERT (0); + break; + + case TGSI_OPCODE_TXQ: + ASSERT (0); + break; + + case TGSI_OPCODE_EMIT: + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + break; + + case TGSI_OPCODE_ENDPRIM: + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + break; + + case TGSI_OPCODE_LOOP: + /* fall-through (for now) */ + case TGSI_OPCODE_BGNLOOP2: + /* push LoopMask and ContMasks */ + ASSERT(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + ASSERT(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + break; + + case TGSI_OPCODE_ENDLOOP: + /* fall-through (for now at least) */ + case TGSI_OPCODE_ENDLOOP2: + /* Restore ContMask, but don't pop */ + ASSERT(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; + if (mach->LoopMask) { + /* repeat loop: jump to instruction just past BGNLOOP */ + *pc = inst->InstructionExtLabel.Label + 1; + } + else { + /* exit loop: pop LoopMask */ + ASSERT(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + /* pop ContMask */ + ASSERT(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + } + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BRK: + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_CONT: + /* turn off cont channels for each enabled exec channel */ + mach->ContMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BGNSUB: + /* no-op */ + break; + + case TGSI_OPCODE_ENDSUB: + /* no-op */ + break; + + case TGSI_OPCODE_NOISE1: + ASSERT( 0 ); + break; + + case TGSI_OPCODE_NOISE2: + ASSERT( 0 ); + break; + + case TGSI_OPCODE_NOISE3: + ASSERT( 0 ); + break; + + case TGSI_OPCODE_NOISE4: + ASSERT( 0 ); + break; + + case TGSI_OPCODE_NOP: + break; + + default: + ASSERT( 0 ); + } +} + + +/** + * Run TGSI interpreter. + * \return bitmask of "alive" quad components + */ +uint +spu_exec_machine_run( struct spu_exec_machine *mach ) +{ + uint i; + int pc = 0; + + mach->CondMask = 0xf; + mach->LoopMask = 0xf; + mach->ContMask = 0xf; + mach->FuncMask = 0xf; + mach->ExecMask = 0xf; + + mach->CondStackTop = 0; /* temporarily subvert this ASSERTion */ + ASSERT(mach->CondStackTop == 0); + ASSERT(mach->LoopStackTop == 0); + ASSERT(mach->ContStackTop == 0); + ASSERT(mach->CallStackTop == 0); + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; + + if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; + mach->Primitives[0] = 0; + } + + + /* execute declarations (interpolants) */ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + for (i = 0; i < mach->NumDeclarations; i++) { + union { + struct tgsi_full_declaration decl; + qword buffer[ROUNDUP16(sizeof(struct tgsi_full_declaration)) / 16]; + } d ALIGN16_ATTRIB; + unsigned ea = (unsigned) (mach->Declarations + pc); + + spu_dcache_fetch_unaligned(d.buffer, ea, sizeof(d.decl)); + + exec_declaration( mach, &d.decl ); + } + } + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + union { + struct tgsi_full_instruction inst; + qword buffer[ROUNDUP16(sizeof(struct tgsi_full_instruction)) / 16]; + } i ALIGN16_ATTRIB; + unsigned ea = (unsigned) (mach->Instructions + pc); + + spu_dcache_fetch_unaligned(i.buffer, ea, sizeof(i.inst)); + exec_instruction( mach, & i.inst, &pc ); + } + +#if 0 + /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + /* + * Scale back depth component. + */ + for (i = 0; i < 4; i++) + mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; + } +#endif + + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; +} + + diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h new file mode 100644 index 0000000000..8605679940 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -0,0 +1,172 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#if !defined SPU_EXEC_H +#define SPU_EXEC_H + +#include "pipe/p_compiler.h" +#include "tgsi/tgsi_exec.h" + +#if defined __cplusplus +extern "C" { +#endif + +/** + * Registers may be treated as float, signed int or unsigned int. + */ +union spu_exec_channel +{ + float f[QUAD_SIZE]; + int i[QUAD_SIZE]; + unsigned u[QUAD_SIZE]; + qword q; +}; + +/** + * A vector[RGBA] of channels[4 pixels] + */ +struct spu_exec_vector +{ + union spu_exec_channel xyzw[NUM_CHANNELS]; +}; + +/** + * For fragment programs, information for computing fragment input + * values from plane equation of the triangle/line. + */ +struct spu_interp_coef +{ + float a0[NUM_CHANNELS]; /* in an xyzw layout */ + float dadx[NUM_CHANNELS]; + float dady[NUM_CHANNELS]; +}; + + +struct softpipe_tile_cache; /**< Opaque to TGSI */ + +/** + * Information for sampling textures, which must be implemented + * by code outside the TGSI executor. + */ +struct spu_sampler +{ + const struct pipe_sampler_state *state; + struct pipe_texture *texture; + /** Get samples for four fragments in a quad */ + void (*get_samples)(struct spu_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + void *pipe; /*XXX temporary*/ + struct softpipe_tile_cache *cache; +}; + + +/** + * Run-time virtual machine state for executing TGSI shader. + */ +struct spu_exec_machine +{ + /* + * 32 program temporaries + * 4 internal temporaries + * 1 address + */ + struct spu_exec_vector Temps[TGSI_EXEC_NUM_TEMPS + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1] + ALIGN16_ATTRIB; + + struct spu_exec_vector *Addrs; + + struct spu_sampler *Samplers; + + float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; + unsigned ImmLimit; + float (*Consts)[4]; + struct spu_exec_vector *Inputs; + struct spu_exec_vector *Outputs; + unsigned Processor; + + /* GEOMETRY processor only. */ + unsigned *Primitives; + + /* FRAGMENT processor only. */ + const struct spu_interp_coef *InterpCoefs; + struct spu_exec_vector QuadPos; + + /* Conditional execution masks */ + uint CondMask; /**< For IF/ELSE/ENDIF */ + uint LoopMask; /**< For BGNLOOP/ENDLOOP */ + uint ContMask; /**< For loop CONT statements */ + uint FuncMask; /**< For function calls */ + uint ExecMask; /**< = CondMask & LoopMask */ + + /** Condition mask stack (for nested conditionals) */ + uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; + int CondStackTop; + + /** Loop mask stack (for nested loops) */ + uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopStackTop; + + /** Loop continue mask stack (see comments in tgsi_exec.c) */ + uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int ContStackTop; + + /** Function execution mask stack (for executing subroutine code) */ + uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; + int FuncStackTop; + + /** Function call stack for saving/restoring the program counter */ + uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; + int CallStackTop; + + struct tgsi_full_instruction *Instructions; + uint NumInstructions; + + struct tgsi_full_declaration *Declarations; + uint NumDeclarations; +}; + + +extern void +spu_exec_machine_init(struct spu_exec_machine *mach, + uint numSamplers, + struct spu_sampler *samplers, + unsigned processor); + +extern uint +spu_exec_machine_run( struct spu_exec_machine *mach ); + + +#if defined __cplusplus +} /* extern "C" */ +#endif + +#endif /* SPU_EXEC_H */ diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c new file mode 100644 index 0000000000..3534b35000 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -0,0 +1,189 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * SPU functions accessed by shaders. + * + * Authors: Brian Paul + */ + + +#include <string.h> +#include <libmisc.h> +#include <math.h> +#include <cos14_v.h> +#include <sin14_v.h> +#include <transpose_matrix4x4.h> + +#include "cell/common.h" +#include "spu_main.h" +#include "spu_funcs.h" +#include "spu_texture.h" + + +/** For "return"-ing four vectors */ +struct vec_4x4 +{ + vector float v[4]; +}; + + +static vector float +spu_cos(vector float x) +{ + return _cos14_v(x); +} + +static vector float +spu_sin(vector float x) +{ + return _sin14_v(x); +} + +static vector float +spu_pow(vector float x, vector float y) +{ + float z0 = powf(spu_extract(x,0), spu_extract(y,0)); + float z1 = powf(spu_extract(x,1), spu_extract(y,1)); + float z2 = powf(spu_extract(x,2), spu_extract(y,2)); + float z3 = powf(spu_extract(x,3), spu_extract(y,3)); + return (vector float) {z0, z1, z2, z3}; +} + +static vector float +spu_exp2(vector float x) +{ + float z0 = powf(2.0f, spu_extract(x,0)); + float z1 = powf(2.0f, spu_extract(x,1)); + float z2 = powf(2.0f, spu_extract(x,2)); + float z3 = powf(2.0f, spu_extract(x,3)); + return (vector float) {z0, z1, z2, z3}; +} + +static vector float +spu_log2(vector float x) +{ + /* + * log_base_2(x) = log(x) / log(2) + * 1.442695 = 1/log(2). + */ + static const vector float k = {1.442695F, 1.442695F, 1.442695F, 1.442695F}; + float z0 = logf(spu_extract(x,0)); + float z1 = logf(spu_extract(x,1)); + float z2 = logf(spu_extract(x,2)); + float z3 = logf(spu_extract(x,3)); + vector float v = (vector float) {z0, z1, z2, z3}; + return spu_mul(v, k); +} + + +static struct vec_4x4 +spu_tex_2d(vector float s, vector float t, vector float r, vector float q, + unsigned unit) +{ + struct vec_4x4 colors; + (void) r; + (void) q; + spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); + return colors; +} + +static struct vec_4x4 +spu_tex_3d(vector float s, vector float t, vector float r, vector float q, + unsigned unit) +{ + struct vec_4x4 colors; + (void) r; + (void) q; + spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); + return colors; +} + +static struct vec_4x4 +spu_tex_cube(vector float s, vector float t, vector float r, vector float q, + unsigned unit) +{ + struct vec_4x4 colors; + (void) q; + sample_texture_cube(s, t, r, unit, colors.v); + return colors; +} + + +/** + * Add named function to list of "exported" functions that will be + * made available to the PPU-hosted code generator. + */ +static void +export_func(struct cell_spu_function_info *spu_functions, + const char *name, void *addr) +{ + uint n = spu_functions->num; + ASSERT(strlen(name) < 16); + strcpy(spu_functions->names[n], name); + spu_functions->addrs[n] = (uint) addr; + spu_functions->num++; + ASSERT(spu_functions->num <= 16); +} + + +/** + * Return info about the SPU's function to the PPU / main memory. + * The PPU needs to know the address of some SPU-side functions so + * that we can generate shader code with function calls. + */ +void +return_function_info(void) +{ + struct cell_spu_function_info funcs ALIGN16_ATTRIB; + int tag = TAG_MISC; + + ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ + + funcs.num = 0; + export_func(&funcs, "spu_cos", &spu_cos); + export_func(&funcs, "spu_sin", &spu_sin); + export_func(&funcs, "spu_pow", &spu_pow); + export_func(&funcs, "spu_exp2", &spu_exp2); + export_func(&funcs, "spu_log2", &spu_log2); + export_func(&funcs, "spu_tex_2d", &spu_tex_2d); + export_func(&funcs, "spu_tex_3d", &spu_tex_3d); + export_func(&funcs, "spu_tex_cube", &spu_tex_cube); + + /* Send the function info back to the PPU / main memory */ + mfc_put((void *) &funcs, /* src in local store */ + (unsigned int) spu.init.spu_functions, /* dst in main memory */ + sizeof(funcs), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << tag); +} + + + diff --git a/src/gallium/drivers/cell/spu/spu_funcs.h b/src/gallium/drivers/cell/spu/spu_funcs.h new file mode 100644 index 0000000000..3adb6ae99f --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_funcs.h @@ -0,0 +1,35 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_FUNCS_H +#define SPU_FUNCS_H + +extern void +return_function_info(void); + +#endif + diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c new file mode 100644 index 0000000000..c8bb251905 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -0,0 +1,122 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/* main() for Cell SPU code */ + + +#include <stdio.h> +#include <libmisc.h> + +#include "pipe/p_defines.h" + +#include "spu_funcs.h" +#include "spu_command.h" +#include "spu_main.h" +#include "spu_per_fragment_op.h" +#include "spu_texture.h" +//#include "spu_test.h" +#include "cell/common.h" + + +/* +helpful headers: +/usr/lib/gcc/spu/4.1.1/include/spu_mfcio.h +/opt/cell/sdk/usr/include/libmisc.h +*/ + +struct spu_global spu; + + +static void +one_time_init(void) +{ + memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); + memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); + invalidate_tex_cache(); + + /* Install default/fallback fragment processing function. + * This will normally be overriden by a code-gen'd function + * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. + */ + spu.fragment_ops = spu_fallback_fragment_ops; +} + + + +/* In some versions of the SDK the SPE main takes 'unsigned long' as a + * parameter. In others it takes 'unsigned long long'. Use a define to + * select between the two. + */ +#ifdef SPU_MAIN_PARAM_LONG_LONG +typedef unsigned long long main_param_t; +#else +typedef unsigned long main_param_t; +#endif + +/** + * SPE entrypoint. + */ +int +main(main_param_t speid, main_param_t argp) +{ + int tag = 0; + + (void) speid; + + ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); + ASSERT(sizeof(struct cell_command_render) % 8 == 0); + ASSERT(((unsigned long) &spu.fragment_ops_code) % 8 == 0); + ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0); + + one_time_init(); + + D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid); + D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); + + /* get initialization data */ + mfc_get(&spu.init, /* dest */ + (unsigned int) argp, /* src */ + sizeof(struct cell_init_info), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask( 1 << tag ); + + if (spu.init.id == 0) { + return_function_info(); + } + +#if 0 + if (spu.init.id==0) + spu_test_misc(spu.init.id); +#endif + + command_loop(); + + return 0; +} diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h new file mode 100644 index 0000000000..668af10be2 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -0,0 +1,255 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_MAIN_H +#define SPU_MAIN_H + + +#include <spu_mfcio.h> + +#include "cell/common.h" +#include "draw/draw_vertex.h" +#include "pipe/p_state.h" + + +#if DEBUG +/* These debug macros use the unusual construction ", ##__VA_ARGS__" + * which expands to the expected comma + args if variadic arguments + * are supplied, but swallows the comma if there are no variadic + * arguments (which avoids syntax errors that would otherwise occur). + */ +#define D_PRINTF(flag, format,...) \ + if (spu.init.debug_flags & (flag)) \ + printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) +#else +#define D_PRINTF(...) +#endif + + +/** + * A tile is basically a TILE_SIZE x TILE_SIZE block of 4-byte pixels. + * The data may be addressed through several different types. + */ +typedef union { + ushort us[TILE_SIZE][TILE_SIZE]; + uint ui[TILE_SIZE][TILE_SIZE]; + vector unsigned short us8[TILE_SIZE/2][TILE_SIZE/4]; + vector unsigned int ui4[TILE_SIZE/2][TILE_SIZE/2]; +} tile_t; + + +#define TILE_STATUS_CLEAR 1 +#define TILE_STATUS_DEFINED 2 /**< defined in FB, but not in local store */ +#define TILE_STATUS_CLEAN 3 /**< in local store, but not changed */ +#define TILE_STATUS_DIRTY 4 /**< modified locally, but not put back yet */ +#define TILE_STATUS_GETTING 5 /**< mfc_get() called but not yet arrived */ + + +/** Function for sampling textures */ +typedef void (*spu_sample_texture_2d_func)(vector float s, + vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + + +/** Function for performing per-fragment ops */ +typedef void (*spu_fragment_ops_func)(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragRed, + vector float fragGreen, + vector float fragBlue, + vector float fragAlpha, + vector unsigned int mask, + uint facing); + +/** Function for running fragment program */ +typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, + vector float *outputs, + vector float *constants); + + +struct spu_framebuffer +{ + void *color_start; /**< addr of color surface in main memory */ + void *depth_start; /**< addr of depth surface in main memory */ + enum pipe_format color_format; + enum pipe_format depth_format; + uint width, height; /**< size in pixels */ + uint width_tiles, height_tiles; /**< width and height in tiles */ + + uint color_clear_value; + uint depth_clear_value; + + uint zsize; /**< 0, 2 or 4 bytes per Z */ + float zscale; /**< 65535.0, 2^24-1 or 2^32-1 */ +} ALIGN16_ATTRIB; + + +/** per-texture level info */ +struct spu_texture_level +{ + void *start; + ushort width, height, depth; + ushort tiles_per_row; + uint bytes_per_image; + /** texcoord scale factors */ + vector float scale_s, scale_t, scale_r; + /** texcoord masks (if REPEAT then size-1, else ~0) */ + vector signed int mask_s, mask_t, mask_r; + /** texcoord clamp limits */ + vector signed int max_s, max_t, max_r; +} ALIGN16_ATTRIB; + + +struct spu_texture +{ + struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; + uint max_level; + uint target; /**< PIPE_TEXTURE_x */ +} ALIGN16_ATTRIB; + + +/** + * All SPU global/context state will be in a singleton object of this type: + */ +struct spu_global +{ + /** One-time init/constant info */ + struct cell_init_info init; + + /* + * Current state + */ + struct spu_framebuffer fb; + struct pipe_depth_stencil_alpha_state depth_stencil_alpha; + struct pipe_blend_state blend; + struct pipe_blend_color blend_color; + struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; + struct pipe_rasterizer_state rasterizer; + struct spu_texture texture[PIPE_MAX_SAMPLERS]; + struct vertex_info vertex_info; + + /** Current color and Z tiles */ + tile_t ctile ALIGN16_ATTRIB; + tile_t ztile ALIGN16_ATTRIB; + + /** Read depth/stencil tiles? */ + boolean read_depth; + boolean read_stencil; + + /** Current tiles' status */ + ubyte cur_ctile_status, cur_ztile_status; + + /** Status of all tiles in framebuffer */ + ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + + /** Current fragment ops machine code, at 8-byte boundary */ + uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB; + /** Current fragment ops function */ + spu_fragment_ops_func fragment_ops; + + /** Current fragment program machine code, at 8-byte boundary */ + uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; + /** Current fragment ops function */ + spu_fragment_program_func fragment_program; + + /** Current texture sampler function */ + spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS]; + spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS]; + spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS]; + + /** Fragment program constants */ + vector float constants[4 * CELL_MAX_CONSTANTS]; + +} ALIGN16_ATTRIB; + + +extern struct spu_global spu; + + + +/* DMA TAGS */ + +#define TAG_SURFACE_CLEAR 10 +#define TAG_VERTEX_BUFFER 11 +#define TAG_READ_TILE_COLOR 12 +#define TAG_READ_TILE_Z 13 +#define TAG_WRITE_TILE_COLOR 14 +#define TAG_WRITE_TILE_Z 15 +#define TAG_INDEX_BUFFER 16 +#define TAG_BATCH_BUFFER 17 +#define TAG_MISC 18 +#define TAG_DCACHE0 20 +#define TAG_DCACHE1 21 +#define TAG_DCACHE2 22 +#define TAG_DCACHE3 23 +#define TAG_FENCE 24 + + +static INLINE void +wait_on_mask(unsigned tagMask) +{ + mfc_write_tag_mask( tagMask ); + /* wait for completion of _any_ DMAs specified by tagMask */ + mfc_read_tag_status_any(); +} + + +static INLINE void +wait_on_mask_all(unsigned tagMask) +{ + mfc_write_tag_mask( tagMask ); + /* wait for completion of _any_ DMAs specified by tagMask */ + mfc_read_tag_status_all(); +} + + + + + +static INLINE void +memset16(ushort *d, ushort value, uint count) +{ + uint i; + for (i = 0; i < count; i++) + d[i] = value; +} + + +static INLINE void +memset32(uint *d, uint value, uint count) +{ + uint i; + for (i = 0; i < count; i++) + d[i] = value; +} + + +#endif /* SPU_MAIN_H */ diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c new file mode 100644 index 0000000000..f8ffc70492 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -0,0 +1,632 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \author Brian Paul + */ + + +#include <transpose_matrix4x4.h> +#include "pipe/p_format.h" +#include "spu_main.h" +#include "spu_colorpack.h" +#include "spu_per_fragment_op.h" + + +#define LINEAR_QUAD_LAYOUT 1 + + +static INLINE vector float +spu_min(vector float a, vector float b) +{ + vector unsigned int m; + m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ + return spu_sel(a, b, m); +} + + +static INLINE vector float +spu_max(vector float a, vector float b) +{ + vector unsigned int m; + m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ + return spu_sel(b, a, m); +} + + +/** + * Called by rasterizer for each quad after the shader has run. Do + * all the per-fragment operations including alpha test, z test, + * stencil test, blend, colormask and logicops. This is a + * fallback/debug function. In reality we'll use a generated function + * produced by the PPU. But this function is useful for + * debug/validation. + */ +void +spu_fallback_fragment_ops(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragR, + vector float fragG, + vector float fragB, + vector float fragA, + vector unsigned int mask, + uint facing) +{ + vector float frag_aos[4]; + unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */ + unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */ + + /* + * Do alpha test + */ + if (spu.depth_stencil_alpha.alpha.enabled) { + vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref); + vector unsigned int amask; + + switch (spu.depth_stencil_alpha.alpha.func) { + case PIPE_FUNC_LESS: + amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */ + break; + case PIPE_FUNC_GREATER: + amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */ + break; + case PIPE_FUNC_GEQUAL: + amask = spu_cmpgt(ref, fragA); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_LEQUAL: + amask = spu_cmpgt(fragA, ref); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_EQUAL: + amask = spu_cmpeq(ref, fragA); + break; + case PIPE_FUNC_NOTEQUAL: + amask = spu_cmpeq(ref, fragA); + amask = spu_nor(amask, amask); + break; + case PIPE_FUNC_ALWAYS: + amask = spu_splats(0xffffffffU); + break; + case PIPE_FUNC_NEVER: + amask = spu_splats( 0x0U); + break; + default: + ; + } + + mask = spu_and(mask, amask); + } + + + /* + * Z and/or stencil testing... + */ + if (spu.depth_stencil_alpha.depth.enabled || + spu.depth_stencil_alpha.stencil[0].enabled) { + + /* get four Z/Stencil values from tile */ + vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU); + vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2]; + vector unsigned int ifbZ = spu_and(ifbZS, mask24); + vector unsigned int ifbS = spu_andc(ifbZS, mask24); + + if (spu.depth_stencil_alpha.stencil[0].enabled) { + /* do stencil test */ + ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM); + + } + else if (spu.depth_stencil_alpha.depth.enabled) { + /* do depth test */ + + ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM || + spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM); + + vector unsigned int ifragZ; + vector unsigned int zmask; + + /* convert four fragZ from float to uint */ + fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff)); + ifragZ = spu_convtu(fragZ, 0); + + /* do depth comparison, setting zmask with results */ + switch (spu.depth_stencil_alpha.depth.func) { + case PIPE_FUNC_LESS: + zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */ + break; + case PIPE_FUNC_GREATER: + zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */ + break; + case PIPE_FUNC_GEQUAL: + zmask = spu_cmpgt(ifbZ, ifragZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_LEQUAL: + zmask = spu_cmpgt(ifragZ, ifbZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_EQUAL: + zmask = spu_cmpeq(ifbZ, ifragZ); + break; + case PIPE_FUNC_NOTEQUAL: + zmask = spu_cmpeq(ifbZ, ifragZ); + zmask = spu_nor(zmask, zmask); + break; + case PIPE_FUNC_ALWAYS: + zmask = spu_splats(0xffffffffU); + break; + case PIPE_FUNC_NEVER: + zmask = spu_splats( 0x0U); + break; + default: + ; + } + + mask = spu_and(mask, zmask); + + /* merge framebuffer Z and fragment Z according to the mask */ + ifbZ = spu_or(spu_and(ifragZ, mask), + spu_andc(ifbZ, mask)); + } + + if (spu_extract(spu_orx(mask), 0)) { + /* put new fragment Z/Stencil values back into Z/Stencil tile */ + depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS); + + spu.cur_ztile_status = TILE_STATUS_DIRTY; + } + } + + + /* + * If we'll need the current framebuffer/tile colors for blending + * or logicop or colormask, fetch them now. + */ + if (spu.blend.blend_enable || + spu.blend.logicop_enable || + spu.blend.colormask != 0xf) { + +#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ + fbc0 = colorTile->ui[y][x*2+0]; + fbc1 = colorTile->ui[y][x*2+1]; + fbc2 = colorTile->ui[y][x*2+2]; + fbc3 = colorTile->ui[y][x*2+3]; +#else + fbc0 = colorTile->ui[y+0][x+0]; + fbc1 = colorTile->ui[y+0][x+1]; + fbc2 = colorTile->ui[y+1][x+0]; + fbc3 = colorTile->ui[y+1][x+1]; +#endif + } + + + /* + * Do blending + */ + if (spu.blend.blend_enable) { + /* blending terms, misc regs */ + vector float term1r, term1g, term1b, term1a; + vector float term2r, term2g, term2b, term2a; + vector float one, tmp; + + vector float fbRGBA[4]; /* current framebuffer colors */ + + /* convert framebuffer colors from packed int to vector float */ + { + vector float temp[4]; /* float colors in AOS form */ + switch (spu.fb.color_format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + temp[0] = spu_unpack_B8G8R8A8(fbc0); + temp[1] = spu_unpack_B8G8R8A8(fbc1); + temp[2] = spu_unpack_B8G8R8A8(fbc2); + temp[3] = spu_unpack_B8G8R8A8(fbc3); + break; + case PIPE_FORMAT_A8R8G8B8_UNORM: + temp[0] = spu_unpack_A8R8G8B8(fbc0); + temp[1] = spu_unpack_A8R8G8B8(fbc1); + temp[2] = spu_unpack_A8R8G8B8(fbc2); + temp[3] = spu_unpack_A8R8G8B8(fbc3); + break; + default: + ASSERT(0); + } + _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */ + } + + /* + * Compute Src RGB terms (fragment color * factor) + */ + switch (spu.blend.rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + term1r = fragR; + term1g = fragG; + term1b = fragB; + break; + case PIPE_BLENDFACTOR_ZERO: + term1r = + term1g = + term1b = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term1r = spu_mul(fragR, fragR); + term1g = spu_mul(fragG, fragG); + term1b = spu_mul(fragB, fragB); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term1r = spu_mul(fragR, fragA); + term1g = spu_mul(fragG, fragA); + term1b = spu_mul(fragB, fragA); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + term1r = spu_mul(fragR, fbRGBA[0]); + term1g = spu_mul(fragG, fbRGBA[1]); + term1b = spu_mul(fragB, fbRGBA[1]); + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + term1r = spu_mul(fragR, fbRGBA[3]); + term1g = spu_mul(fragG, fbRGBA[3]); + term1b = spu_mul(fragB, fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0])); + term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1])); + term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2])); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); + term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3])); + term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3])); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Src Alpha term (fragment alpha * factor) + */ + switch (spu.blend.alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + term1a = fragA; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term1a = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term1a = spu_mul(fragA, fragA); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + term1a = spu_mul(fragA, fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest RGB terms (framebuffer color * factor) + */ + switch (spu.blend.rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + term2r = fbRGBA[0]; + term2g = fbRGBA[1]; + term2b = fbRGBA[2]; + break; + case PIPE_BLENDFACTOR_ZERO: + term2r = + term2g = + term2b = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term2r = spu_mul(fbRGBA[0], fragR); + term2g = spu_mul(fbRGBA[1], fragG); + term2b = spu_mul(fbRGBA[2], fragB); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term2r = spu_mul(fbRGBA[0], fragA); + term2g = spu_mul(fbRGBA[1], fragA); + term2b = spu_mul(fbRGBA[2], fragA); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + one = spu_splats(1.0f); + tmp = spu_sub(one, fragA); + term2r = spu_mul(fbRGBA[0], tmp); + term2g = spu_mul(fbRGBA[1], tmp); + term2b = spu_mul(fbRGBA[2], tmp); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + term2r = spu_mul(fbRGBA[0], fbRGBA[0]); + term2g = spu_mul(fbRGBA[1], fbRGBA[1]); + term2b = spu_mul(fbRGBA[2], fbRGBA[2]); + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + term2r = spu_mul(fbRGBA[0], fbRGBA[3]); + term2g = spu_mul(fbRGBA[1], fbRGBA[3]); + term2b = spu_mul(fbRGBA[2], fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0])); + term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1])); + term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2])); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3])); + term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3])); + term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3])); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Compute Dest Alpha term (framebuffer alpha * factor) + */ + switch (spu.blend.alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + term2a = fbRGBA[3]; + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + term2a = spu_splats(0.0f); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + term2a = spu_mul(fbRGBA[3], fragA); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + one = spu_splats(1.0f); + tmp = spu_sub(one, fragA); + term2a = spu_mul(fbRGBA[3], tmp); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + term2a = spu_mul(fbRGBA[3], fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3])); + break; + /* XXX more cases */ + default: + ASSERT(0); + } + + /* + * Combine Src/Dest RGB terms + */ + switch (spu.blend.rgb_func) { + case PIPE_BLEND_ADD: + fragR = spu_add(term1r, term2r); + fragG = spu_add(term1g, term2g); + fragB = spu_add(term1b, term2b); + break; + case PIPE_BLEND_SUBTRACT: + fragR = spu_sub(term1r, term2r); + fragG = spu_sub(term1g, term2g); + fragB = spu_sub(term1b, term2b); + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + fragR = spu_sub(term2r, term1r); + fragG = spu_sub(term2g, term1g); + fragB = spu_sub(term2b, term1b); + break; + case PIPE_BLEND_MIN: + fragR = spu_min(term1r, term2r); + fragG = spu_min(term1g, term2g); + fragB = spu_min(term1b, term2b); + break; + case PIPE_BLEND_MAX: + fragR = spu_max(term1r, term2r); + fragG = spu_max(term1g, term2g); + fragB = spu_max(term1b, term2b); + break; + default: + ASSERT(0); + } + + /* + * Combine Src/Dest A term + */ + switch (spu.blend.alpha_func) { + case PIPE_BLEND_ADD: + fragA = spu_add(term1a, term2a); + break; + case PIPE_BLEND_SUBTRACT: + fragA = spu_sub(term1a, term2a); + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + fragA = spu_sub(term2a, term1a); + break; + case PIPE_BLEND_MIN: + fragA = spu_min(term1a, term2a); + break; + case PIPE_BLEND_MAX: + fragA = spu_max(term1a, term2a); + break; + default: + ASSERT(0); + } + } + + + /* + * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA. + */ +#if 0 + /* original code */ + { + vector float frag_soa[4]; + frag_soa[0] = fragR; + frag_soa[1] = fragG; + frag_soa[2] = fragB; + frag_soa[3] = fragA; + _transpose_matrix4x4(frag_aos, frag_soa); + } +#else + /* short-cut relying on function parameter layout: */ + _transpose_matrix4x4(frag_aos, &fragR); + (void) fragG; + (void) fragB; +#endif + + /* + * Pack fragment float colors into 32-bit RGBA words. + */ + switch (spu.fb.color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + fragc0 = spu_pack_A8R8G8B8(frag_aos[0]); + fragc1 = spu_pack_A8R8G8B8(frag_aos[1]); + fragc2 = spu_pack_A8R8G8B8(frag_aos[2]); + fragc3 = spu_pack_A8R8G8B8(frag_aos[3]); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + fragc0 = spu_pack_B8G8R8A8(frag_aos[0]); + fragc1 = spu_pack_B8G8R8A8(frag_aos[1]); + fragc2 = spu_pack_B8G8R8A8(frag_aos[2]); + fragc3 = spu_pack_B8G8R8A8(frag_aos[3]); + break; + default: + fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n"); + ASSERT(0); + } + + + /* + * Do color masking + */ + if (spu.blend.colormask != 0xf) { + uint cmask = 0x0; /* each byte corresponds to a color channel */ + + /* Form bitmask depending on color buffer format and colormask bits */ + switch (spu.fb.color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + if (spu.blend.colormask & PIPE_MASK_R) + cmask |= 0x00ff0000; /* red */ + if (spu.blend.colormask & PIPE_MASK_G) + cmask |= 0x0000ff00; /* green */ + if (spu.blend.colormask & PIPE_MASK_B) + cmask |= 0x000000ff; /* blue */ + if (spu.blend.colormask & PIPE_MASK_A) + cmask |= 0xff000000; /* alpha */ + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + if (spu.blend.colormask & PIPE_MASK_R) + cmask |= 0x0000ff00; /* red */ + if (spu.blend.colormask & PIPE_MASK_G) + cmask |= 0x00ff0000; /* green */ + if (spu.blend.colormask & PIPE_MASK_B) + cmask |= 0xff000000; /* blue */ + if (spu.blend.colormask & PIPE_MASK_A) + cmask |= 0x000000ff; /* alpha */ + break; + default: + ASSERT(0); + } + + /* + * Apply color mask to the 32-bit packed colors. + * if (cmask[i]) + * frag color[i] = frag color[i]; + * else + * frag color[i] = framebuffer color[i]; + */ + fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask); + fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask); + fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask); + fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask); + } + + + /* + * Do logic ops + */ + if (spu.blend.logicop_enable) { + /* XXX to do */ + /* apply logicop to 32-bit packed colors (fragcx and fbcx) */ + } + + + /* + * If mask is non-zero, mark tile as dirty. + */ + if (spu_extract(spu_orx(mask), 0)) { + spu.cur_ctile_status = TILE_STATUS_DIRTY; + } + else { + /* write no fragments */ + return; + } + + + /* + * Write new fragment/quad colors to the framebuffer/tile. + * Only write pixels where the corresponding mask word is set. + */ +#if LINEAR_QUAD_LAYOUT + /* + * Quad layout: + * +--+--+--+--+ + * |p0|p1|p2|p3|... + * +--+--+--+--+ + */ + if (spu_extract(mask, 0)) + colorTile->ui[y][x*2] = fragc0; + if (spu_extract(mask, 1)) + colorTile->ui[y][x*2+1] = fragc1; + if (spu_extract(mask, 2)) + colorTile->ui[y][x*2+2] = fragc2; + if (spu_extract(mask, 3)) + colorTile->ui[y][x*2+3] = fragc3; +#else + /* + * Quad layout: + * +--+--+ + * |p0|p1|... + * +--+--+ + * |p2|p3|... + * +--+--+ + */ + if (spu_extract(mask, 0)) + colorTile->ui[y+0][x+0] = fragc0; + if (spu_extract(mask, 1)) + colorTile->ui[y+0][x+1] = fragc1; + if (spu_extract(mask, 2)) + colorTile->ui[y+1][x+0] = fragc2; + if (spu_extract(mask, 3)) + colorTile->ui[y+1][x+1] = fragc3; +#endif +} diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h new file mode 100644 index 0000000000..a61689c83a --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_PER_FRAGMENT_OP +#define SPU_PER_FRAGMENT_OP + + +extern void +spu_fallback_fragment_ops(uint x, uint y, + tile_t *colorTile, + tile_t *depthStencilTile, + vector float fragZ, + vector float fragRed, + vector float fragGreen, + vector float fragBlue, + vector float fragAlpha, + vector unsigned int mask, + uint facing); + + +#endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c new file mode 100644 index 0000000000..5515bb55c9 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -0,0 +1,295 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdio.h> +#include <libmisc.h> +#include <spu_mfcio.h> + +#include "spu_main.h" +#include "spu_render.h" +#include "spu_tri.h" +#include "spu_tile.h" +#include "cell/common.h" +#include "util/u_memory.h" + + +/** + * Given a rendering command's bounding box (in pixels) compute the + * location of the corresponding screen tile bounding box. + */ +static INLINE void +tile_bounding_box(const struct cell_command_render *render, + uint *txmin, uint *tymin, + uint *box_num_tiles, uint *box_width_tiles) +{ +#if 0 + /* Debug: full-window bounding box */ + uint txmax = spu.fb.width_tiles - 1; + uint tymax = spu.fb.height_tiles - 1; + *txmin = 0; + *tymin = 0; + *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + *box_width_tiles = spu.fb.width_tiles; + (void) render; + (void) txmax; + (void) tymax; +#else + uint txmax, tymax, box_height_tiles; + + *txmin = (uint) render->xmin / TILE_SIZE; + *tymin = (uint) render->ymin / TILE_SIZE; + txmax = (uint) render->xmax / TILE_SIZE; + tymax = (uint) render->ymax / TILE_SIZE; + if (txmax >= spu.fb.width_tiles) + txmax = spu.fb.width_tiles-1; + if (tymax >= spu.fb.height_tiles) + tymax = spu.fb.height_tiles-1; + *box_width_tiles = txmax - *txmin + 1; + box_height_tiles = tymax - *tymin + 1; + *box_num_tiles = *box_width_tiles * box_height_tiles; +#endif +#if 0 + printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id, + render->xmin, render->ymin, render->xmax, render->ymax); + printf("SPU %u: tiles: %u, %u .. %u, %u\n", + spu.init.id, *txmin, *tymin, txmax, tymax); + ASSERT(render->xmin <= render->xmax); + ASSERT(render->ymin <= render->ymax); +#endif +} + + +/** Check if the tile at (tx,ty) belongs to this SPU */ +static INLINE boolean +my_tile(uint tx, uint ty) +{ + return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id; +} + + +/** + * Start fetching non-clear color/Z tiles from main memory + */ +static INLINE void +get_cz_tiles(uint tx, uint ty) +{ + if (spu.read_depth) { + if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); + get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_GETTING; + } + } + + if (spu.cur_ctile_status != TILE_STATUS_CLEAR) { + //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty); + get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_GETTING; + } +} + + +/** + * Start putting dirty color/Z tiles back to main memory + */ +static INLINE void +put_cz_tiles(uint tx, uint ty) +{ + if (spu.cur_ztile_status == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty); + put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1); + spu.cur_ztile_status = TILE_STATUS_DEFINED; + } + else if (spu.cur_ztile_status == TILE_STATUS_GETTING) { + /* tile was never used */ + spu.cur_ztile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty); + } + + if (spu.cur_ctile_status == TILE_STATUS_DIRTY) { + /* tile was modified and needs to be written back */ + //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty); + put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0); + spu.cur_ctile_status = TILE_STATUS_DEFINED; + } + else if (spu.cur_ctile_status == TILE_STATUS_GETTING) { + /* tile was never used */ + spu.cur_ctile_status = TILE_STATUS_DEFINED; + //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty); + } +} + + +/** + * Wait for 'put' of color/z tiles to complete. + */ +static INLINE void +wait_put_cz_tiles(void) +{ + wait_on_mask(1 << TAG_WRITE_TILE_COLOR); + if (spu.read_depth) { + wait_on_mask(1 << TAG_WRITE_TILE_Z); + } +} + + +/** + * Render primitives + * \param pos_incr returns value indicating how may words to skip after + * this command in the batch buffer + */ +void +cmd_render(const struct cell_command_render *render, uint *pos_incr) +{ + /* we'll DMA into these buffers */ + ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB; + const uint vertex_size = render->vertex_size; /* in bytes */ + /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size; + uint index_bytes; + const ubyte *vertices; + const ushort *indexes; + uint i, j; + uint num_tiles; + + D_PRINTF(CELL_DEBUG_CMD, + "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n", + render->prim_type, + render->num_verts, + render->num_indexes, + render->inline_verts); + + ASSERT(sizeof(*render) % 4 == 0); + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES); + ASSERT(render->num_indexes % 3 == 0); + + + /* indexes are right after the render command in the batch buffer */ + indexes = (const ushort *) (render + 1); + index_bytes = ROUNDUP8(render->num_indexes * 2); + *pos_incr = index_bytes / 8 + sizeof(*render) / 8; + + + if (render->inline_verts) { + /* Vertices are after indexes in batch buffer at next 16-byte addr */ + vertices = (const ubyte *) render + (*pos_incr * 8); + vertices = (const ubyte *) align_pointer((void *) vertices, 16); + ASSERT_ALIGN16(vertices); + *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8; + } + else { + /* Begin DMA fetch of vertex buffer */ + ubyte *src = spu.init.buffers[render->vertex_buf]; + ubyte *dest = vertex_data; + + /* skip vertex data we won't use */ +#if 01 + src += render->min_index * vertex_size; + dest += render->min_index * vertex_size; + total_vertex_bytes -= render->min_index * vertex_size; +#endif + ASSERT(total_vertex_bytes % 16 == 0); + ASSERT_ALIGN16(dest); + ASSERT_ALIGN16(src); + + mfc_get(dest, /* in vertex_data[] array */ + (unsigned int) src, /* src in main memory */ + total_vertex_bytes, /* size */ + TAG_VERTEX_BUFFER, + 0, /* tid */ + 0 /* rid */); + + vertices = vertex_data; + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + } + + + /** + ** find tiles which intersect the prim bounding box + **/ + uint txmin, tymin, box_width_tiles, box_num_tiles; + tile_bounding_box(render, &txmin, &tymin, + &box_num_tiles, &box_width_tiles); + + + /* make sure any pending clears have completed */ + wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ + + + num_tiles = 0; + + /** + ** loop over tiles, rendering tris + **/ + for (i = 0; i < box_num_tiles; i++) { + const uint tx = txmin + i % box_width_tiles; + const uint ty = tymin + i / box_width_tiles; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + + if (!my_tile(tx, ty)) + continue; + + num_tiles++; + + spu.cur_ctile_status = spu.ctile_status[ty][tx]; + spu.cur_ztile_status = spu.ztile_status[ty][tx]; + + get_cz_tiles(tx, ty); + + uint drawn = 0; + + /* loop over tris */ + for (j = 0; j < render->num_indexes; j += 3) { + const float *v0, *v1, *v2; + + v0 = (const float *) (vertices + indexes[j+0] * vertex_size); + v1 = (const float *) (vertices + indexes[j+1] * vertex_size); + v2 = (const float *) (vertices + indexes[j+2] * vertex_size); + + drawn += tri_draw(v0, v1, v2, tx, ty); + } + + //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); + + /* write color/z tiles back to main framebuffer, if dirtied */ + put_cz_tiles(tx, ty); + + wait_put_cz_tiles(); /* XXX seems unnecessary... */ + + spu.ctile_status[ty][tx] = spu.cur_ctile_status; + spu.ztile_status[ty][tx] = spu.cur_ztile_status; + } + + D_PRINTF(CELL_DEBUG_CMD, + "RENDER done (%u tiles hit)\n", + num_tiles); +} diff --git a/src/gallium/drivers/cell/spu/spu_render.h b/src/gallium/drivers/cell/spu/spu_render.h new file mode 100644 index 0000000000..493434f087 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_render.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_RENDER_H +#define SPU_RENDER_H + +#include "cell/common.h" + +extern void +cmd_render(const struct cell_command_render *render, uint *pos_incr); + +#endif /* SPU_RENDER_H */ + diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c new file mode 100644 index 0000000000..69784c8978 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -0,0 +1,641 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <math.h> + +#include "pipe/p_compiler.h" +#include "spu_main.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_colorpack.h" +#include "spu_dcache.h" + + +/** + * Mark all tex cache entries as invalid. + */ +void +invalidate_tex_cache(void) +{ + uint lvl; + for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) { + uint unit = 0; + uint bytes = 4 * spu.texture[unit].level[lvl].width + * spu.texture[unit].level[lvl].height; + + if (spu.texture[unit].target == PIPE_TEXTURE_CUBE) + bytes *= 6; + else if (spu.texture[unit].target == PIPE_TEXTURE_3D) + bytes *= spu.texture[unit].level[lvl].depth; + + spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); + } +} + + +/** + * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ... + * + * NOTE: in the typical case of bilinear filtering, the four texels + * are in a 2x2 group so we could get by with just two dcache fetches + * (two side-by-side texels per fetch). But when bilinear filtering + * wraps around a texture edge, we'll probably need code like we have + * now. + * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time, + * it's quite likely that the four pixels in a quad will need some of the + * same texels. So look into doing texture fetches for four pixels at + * a time. + */ +static void +get_four_texels(const struct spu_texture_level *tlevel, uint face, + vec_int4 x, vec_int4 y, + vec_uint4 *texels) +{ + unsigned texture_ea = (uintptr_t) tlevel->start; + const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ + const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ + const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ + const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ + + const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row); + const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); + + qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); + tile_offset = si_mpy((qword) tile_offset, tile_size); + + qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x); + texel_offset = si_mpyui(texel_offset, 4); + + vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset); + + texture_ea = texture_ea + face * tlevel->bytes_per_image; + + spu_dcache_fetch_unaligned((qword *) & texels[0], + texture_ea + spu_extract(offset, 0), 4); + spu_dcache_fetch_unaligned((qword *) & texels[1], + texture_ea + spu_extract(offset, 1), 4); + spu_dcache_fetch_unaligned((qword *) & texels[2], + texture_ea + spu_extract(offset, 2), 4); + spu_dcache_fetch_unaligned((qword *) & texels[3], + texture_ea + spu_extract(offset, 3), 4); +} + + +/** clamp vec to [0, max] */ +static INLINE vector signed int +spu_clamp(vector signed int vec, vector signed int max) +{ + static const vector signed int zero = {0,0,0,0}; + vector unsigned int c; + c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */ + vec = spu_sel(zero, vec, c); + c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */ + vec = spu_sel(vec, max, c); + return vec; +} + + + +/** + * Do nearest texture sampling for four pixels. + * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). + */ +void +sample_texture_2d_nearest(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) +{ + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + vector float ss = spu_mul(s, tlevel->scale_s); + vector float tt = spu_mul(t, tlevel->scale_t); + vector signed int is = spu_convts(ss, 0); + vector signed int it = spu_convts(tt, 0); + vec_uint4 texels[4]; + + /* PIPE_TEX_WRAP_REPEAT */ + is = spu_and(is, tlevel->mask_s); + it = spu_and(it, tlevel->mask_t); + + /* PIPE_TEX_WRAP_CLAMP */ + is = spu_clamp(is, tlevel->max_s); + it = spu_clamp(it, tlevel->max_t); + + get_four_texels(tlevel, face, is, it, texels); + + /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ + spu_unpack_A8R8G8B8_transpose4(texels, colors); +} + + +/** + * Do bilinear texture sampling for four pixels. + * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). + */ +void +sample_texture_2d_bilinear(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) +{ + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; + + vector float ss = spu_madd(s, tlevel->scale_s, half); + vector float tt = spu_madd(t, tlevel->scale_t, half); + + vector signed int is0 = spu_convts(ss, 0); + vector signed int it0 = spu_convts(tt, 0); + + /* is + 1, it + 1 */ + vector signed int is1 = spu_add(is0, 1); + vector signed int it1 = spu_add(it0, 1); + + /* PIPE_TEX_WRAP_REPEAT */ + is0 = spu_and(is0, tlevel->mask_s); + it0 = spu_and(it0, tlevel->mask_t); + is1 = spu_and(is1, tlevel->mask_s); + it1 = spu_and(it1, tlevel->mask_t); + + /* PIPE_TEX_WRAP_CLAMP */ + is0 = spu_clamp(is0, tlevel->max_s); + it0 = spu_clamp(it0, tlevel->max_t); + is1 = spu_clamp(is1, tlevel->max_s); + it1 = spu_clamp(it1, tlevel->max_t); + + /* get packed int texels */ + vector unsigned int texels[16]; + get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */ + get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */ + get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */ + get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ + + /* convert packed int texels to float colors */ + vector float ftexels[16]; + spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0); + spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4); + spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8); + spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12); + + /* Compute weighting factors in [0,1] + * Multiply texcoord by 1024, AND with 1023, convert back to float. + */ + vector float ss1024 = spu_mul(ss, spu_splats(1024.0f)); + vector signed int iss1024 = spu_convts(ss1024, 0); + iss1024 = spu_and(iss1024, 1023); + vector float sWeights0 = spu_convtf(iss1024, 10); + + vector float tt1024 = spu_mul(tt, spu_splats(1024.0f)); + vector signed int itt1024 = spu_convts(tt1024, 0); + itt1024 = spu_and(itt1024, 1023); + vector float tWeights0 = spu_convtf(itt1024, 10); + + /* 1 - sWeight and 1 - tWeight */ + vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0); + vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0); + + /* reds, for four pixels */ + ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]), + spu_add(ftexels[8], ftexels[12])); + + /* greens, for four pixels */ + ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]), + spu_add(ftexels[9], ftexels[13])); + + /* blues, for four pixels */ + ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]), + spu_add(ftexels[10], ftexels[14])); + + /* alphas, for four pixels */ + ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]), + spu_add(ftexels[11], ftexels[15])); +} + + + +/** + * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h + */ +static INLINE void +transpose(vector unsigned int *mOut0, + vector unsigned int *mOut1, + vector unsigned int *mOut2, + vector unsigned int *mOut3, + vector unsigned int *mIn) +{ + vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */ + vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */ + vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */ + + vector unsigned char shufflehi = ((vector unsigned char) { + 0x00, 0x01, 0x02, 0x03, + 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, + 0x14, 0x15, 0x16, 0x17}); + vector unsigned char shufflelo = ((vector unsigned char) { + 0x08, 0x09, 0x0A, 0x0B, + 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x1C, 0x1D, 0x1E, 0x1F}); + abcd = *(mIn+0); + efgh = *(mIn+1); + ijkl = *(mIn+2); + mnop = *(mIn+3); + + aibj = spu_shuffle(abcd, ijkl, shufflehi); + ckdl = spu_shuffle(abcd, ijkl, shufflelo); + emfn = spu_shuffle(efgh, mnop, shufflehi); + gohp = spu_shuffle(efgh, mnop, shufflelo); + + aeim = spu_shuffle(aibj, emfn, shufflehi); + bfjn = spu_shuffle(aibj, emfn, shufflelo); + cgko = spu_shuffle(ckdl, gohp, shufflehi); + dhlp = spu_shuffle(ckdl, gohp, shufflelo); + + *mOut0 = aeim; + *mOut1 = bfjn; + *mOut2 = cgko; + *mOut3 = dhlp; +} + + +/** + * Bilinear filtering, using int instead of float arithmetic for computing + * sample weights. + */ +void +sample_texture_2d_bilinear_int(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) +{ + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; + + /* Scale texcoords by size of texture, and add half pixel bias */ + vector float ss = spu_madd(s, tlevel->scale_s, half); + vector float tt = spu_madd(t, tlevel->scale_t, half); + + /* convert float coords to fixed-pt coords with 7 fraction bits */ + vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */ + vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */ + + /* compute integer texel weights in [0, 127] */ + vector signed int sWeights0 = spu_and(is, 127); + vector signed int tWeights0 = spu_and(it, 127); + vector signed int sWeights1 = spu_sub(127, sWeights0); + vector signed int tWeights1 = spu_sub(127, tWeights0); + + /* texel coords: is0 = is / 128, it0 = is / 128 */ + vector signed int is0 = spu_rlmask(is, -7); + vector signed int it0 = spu_rlmask(it, -7); + + /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */ + vector signed int is1 = spu_add(is0, 1); + vector signed int it1 = spu_add(it0, 1); + + /* PIPE_TEX_WRAP_REPEAT */ + is0 = spu_and(is0, tlevel->mask_s); + it0 = spu_and(it0, tlevel->mask_t); + is1 = spu_and(is1, tlevel->mask_s); + it1 = spu_and(it1, tlevel->mask_t); + + /* PIPE_TEX_WRAP_CLAMP */ + is0 = spu_clamp(is0, tlevel->max_s); + it0 = spu_clamp(it0, tlevel->max_t); + is1 = spu_clamp(is1, tlevel->max_s); + it1 = spu_clamp(it1, tlevel->max_t); + + /* get packed int texels */ + vector unsigned int texels[16]; + get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */ + get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */ + get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */ + get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ + + /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ + { + static const unsigned char ZERO = 0x80; + int i; + for (i = 0; i < 16; i++) { + texels[i] = spu_shuffle(texels[i], texels[i], + ((vector unsigned char) { + ZERO, ZERO, ZERO, 1, + ZERO, ZERO, ZERO, 2, + ZERO, ZERO, ZERO, 3, + ZERO, ZERO, ZERO, 0})); + } + } + + /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */ + vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7, + texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15; + transpose(&texel0, &texel1, &texel2, &texel3, texels + 0); + transpose(&texel4, &texel5, &texel6, &texel7, texels + 4); + transpose(&texel8, &texel9, &texel10, &texel11, texels + 8); + transpose(&texel12, &texel13, &texel14, &texel15, texels + 12); + + /* computed weighted colors */ + vector unsigned int c0, c1, c2, c3, cSum; + + /* red */ + c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[0] = spu_convtf(cSum, 22); + + /* green */ + c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[1] = spu_convtf(cSum, 22); + + /* blue */ + c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[2] = spu_convtf(cSum, 22); + + /* alpha */ + c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[3] = spu_convtf(cSum, 22); +} + + + +/** + * Compute level of detail factor from texcoords. + */ +static INLINE float +compute_lambda_2d(uint unit, vector float s, vector float t) +{ + uint baseLevel = 0; + float width = spu.texture[unit].level[baseLevel].width; + float height = spu.texture[unit].level[baseLevel].width; + float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0)); + float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0)); + float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0)); + float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0)); +#if 0 + /* ideal value */ + float x = dsdx * dsdx + dtdx * dtdx; + float y = dsdy * dsdy + dtdy * dtdy; + float rho = x > y ? x : y; + rho = sqrtf(rho); +#else + /* approximation */ + dsdx = fabsf(dsdx); + dsdy = fabsf(dsdy); + dtdx = fabsf(dtdx); + dtdy = fabsf(dtdy); + float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5; +#endif + float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */ + return lambda; +} + + +/** + * Blend two sets of colors according to weight. + */ +static void +blend_colors(vector float c0[4], const vector float c1[4], float weight) +{ + vector float t = spu_splats(weight); + vector float dc0 = spu_sub(c1[0], c0[0]); + vector float dc1 = spu_sub(c1[1], c0[1]); + vector float dc2 = spu_sub(c1[2], c0[2]); + vector float dc3 = spu_sub(c1[3], c0[3]); + c0[0] = spu_madd(dc0, t, c0[0]); + c0[1] = spu_madd(dc1, t, c0[1]); + c0[2] = spu_madd(dc2, t, c0[2]); + c0[3] = spu_madd(dc3, t, c0[3]); +} + + +/** + * Texture sampling with level of detail selection and possibly mipmap + * interpolation. + */ +void +sample_texture_2d_lod(vector float s, vector float t, + uint unit, uint level_ignored, uint face, + vector float colors[4]) +{ + /* + * Note that we're computing a lambda/lod here that's used for all + * four pixels in the quad. + */ + float lambda = compute_lambda_2d(unit, s, t); + + (void) face; + (void) level_ignored; + + /* apply lod bias */ + lambda += spu.sampler[unit].lod_bias; + + /* clamp */ + if (lambda < spu.sampler[unit].min_lod) + lambda = spu.sampler[unit].min_lod; + else if (lambda > spu.sampler[unit].max_lod) + lambda = spu.sampler[unit].max_lod; + + if (lambda <= 0.0f) { + /* magnify */ + spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors); + } + else { + /* minify */ + if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { + /* sample two mipmap levels and interpolate */ + int level = (int) lambda; + if (level > (int) spu.texture[unit].max_level) + level = spu.texture[unit].max_level; + spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); + if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { + /* sample second mipmap level */ + float weight = lambda - (float) level; + level++; + if (level <= (int) spu.texture[unit].max_level) { + vector float colors2[4]; + spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2); + blend_colors(colors, colors2, weight); + } + } + } + else { + /* sample one mipmap level */ + int level = (int) (lambda + 0.5f); + if (level > (int) spu.texture[unit].max_level) + level = spu.texture[unit].max_level; + spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); + } + } +} + + +/** XXX need a SIMD version of this */ +static unsigned +choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +{ + /* + major axis + direction target sc tc ma + ---------- ------------------------------- --- --- --- + +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx + -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx + +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry + -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry + +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz + -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz + */ + const float arx = fabsf(rx); + const float ary = fabsf(ry); + const float arz = fabsf(rz); + unsigned face; + float sc, tc, ma; + + if (arx > ary && arx > arz) { + if (rx >= 0.0F) { + face = PIPE_TEX_FACE_POS_X; + sc = -rz; + tc = -ry; + ma = arx; + } + else { + face = PIPE_TEX_FACE_NEG_X; + sc = rz; + tc = -ry; + ma = arx; + } + } + else if (ary > arx && ary > arz) { + if (ry >= 0.0F) { + face = PIPE_TEX_FACE_POS_Y; + sc = rx; + tc = rz; + ma = ary; + } + else { + face = PIPE_TEX_FACE_NEG_Y; + sc = rx; + tc = -rz; + ma = ary; + } + } + else { + if (rz > 0.0F) { + face = PIPE_TEX_FACE_POS_Z; + sc = rx; + tc = -ry; + ma = arz; + } + else { + face = PIPE_TEX_FACE_NEG_Z; + sc = -rx; + tc = -ry; + ma = arz; + } + } + + *newS = (sc / ma + 1.0F) * 0.5F; + *newT = (tc / ma + 1.0F) * 0.5F; + + return face; +} + + + +void +sample_texture_cube(vector float s, vector float t, vector float r, + uint unit, vector float colors[4]) +{ + uint p, faces[4], level = 0; + float newS[4], newT[4]; + + /* Compute cube faces referenced by the four sets of texcoords. + * XXX we should SIMD-ize this. + */ + for (p = 0; p < 4; p++) { + float rx = spu_extract(s, p); + float ry = spu_extract(t, p); + float rz = spu_extract(r, p); + faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]); + } + + if (faces[0] == faces[1] && + faces[0] == faces[2] && + faces[0] == faces[3]) { + /* GOOD! All four texcoords refer to the same cube face */ + s = (vector float) {newS[0], newS[1], newS[2], newS[3]}; + t = (vector float) {newT[0], newT[1], newT[2], newT[3]}; + spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors); + } + else { + /* BAD! The four texcoords refer to different faces */ + for (p = 0; p < 4; p++) { + vector float c[4]; + + spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]), + unit, level, faces[p], c); + + float red = spu_extract(c[0], p); + float green = spu_extract(c[1], p); + float blue = spu_extract(c[2], p); + float alpha = spu_extract(c[3], p); + + colors[0] = spu_insert(red, colors[0], p); + colors[1] = spu_insert(green, colors[1], p); + colors[2] = spu_insert(blue, colors[2], p); + colors[3] = spu_insert(alpha, colors[3], p); + } + } +} diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h new file mode 100644 index 0000000000..7b75b007b5 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -0,0 +1,67 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_TEXTURE_H +#define SPU_TEXTURE_H + + +#include "pipe/p_compiler.h" + + +extern void +invalidate_tex_cache(void); + + +extern void +sample_texture_2d_nearest(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + + +extern void +sample_texture_2d_bilinear(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + +extern void +sample_texture_2d_bilinear_int(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + + +extern void +sample_texture_2d_lod(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + + +extern void +sample_texture_cube(vector float s, vector float t, vector float r, + uint unit, vector float colors[4]); + + +#endif /* SPU_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c new file mode 100644 index 0000000000..6905015a48 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tile.c @@ -0,0 +1,126 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#include "spu_tile.h" +#include "spu_main.h" + + +/** + * Get tile of color or Z values from main memory, put into SPU memory. + */ +void +get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf) +{ + const uint offset = ty * spu.fb.width_tiles + tx; + const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); + const ubyte *src = zBuf ? spu.fb.depth_start : spu.fb.color_start; + + src += offset * bytesPerTile; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + ASSERT_ALIGN16(tile); + /* + printf("get_tile: dest: %p src: 0x%x size: %d\n", + tile, (unsigned int) src, bytesPerTile); + */ + mfc_get(tile->ui, /* dest in local memory */ + (unsigned int) src, /* src in main memory */ + bytesPerTile, + tag, + 0, /* tid */ + 0 /* rid */); +} + + +/** + * Move tile of color or Z values from SPU memory to main memory. + */ +void +put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) +{ + const uint offset = ty * spu.fb.width_tiles + tx; + const uint bytesPerTile = TILE_SIZE * TILE_SIZE * (zBuf ? spu.fb.zsize : 4); + ubyte *dst = zBuf ? spu.fb.depth_start : spu.fb.color_start; + + dst += offset * bytesPerTile; + + ASSERT(tx < spu.fb.width_tiles); + ASSERT(ty < spu.fb.height_tiles); + ASSERT_ALIGN16(tile); + /* + printf("SPU %u: put_tile: src: %p dst: 0x%x size: %d\n", + spu.init.id, + tile, (unsigned int) dst, bytesPerTile); + */ + mfc_put((void *) tile->ui, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + bytesPerTile, + tag, + 0, /* tid */ + 0 /* rid */); +} + + +/** + * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled + * tiles back to the main framebuffer. + */ +void +really_clear_tiles(uint surfaceIndex) +{ + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + + if (surfaceIndex == 0) { + clear_c_tile(&spu.ctile); + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + } + } + } + else { + clear_z_tile(&spu.ztile); + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); + } + } + +#if 0 + wait_on_mask(1 << TAG_SURFACE_CLEAR); +#endif +} diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h new file mode 100644 index 0000000000..7bfb52be8f --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tile.h @@ -0,0 +1,75 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SPU_TILE_H +#define SPU_TILE_H + + +#include <libmisc.h> +#include <spu_mfcio.h> +#include "spu_main.h" +#include "cell/common.h" + + + +extern void +get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); + +extern void +put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf); + +extern void +really_clear_tiles(uint surfaceIndex); + + +static INLINE void +clear_c_tile(tile_t *ctile) +{ + memset32((uint*) ctile->ui, + spu.fb.color_clear_value, + TILE_SIZE * TILE_SIZE); +} + + +static INLINE void +clear_z_tile(tile_t *ztile) +{ + if (spu.fb.zsize == 2) { + memset16((ushort*) ztile->us, + spu.fb.depth_clear_value, + TILE_SIZE * TILE_SIZE); + } + else { + ASSERT(spu.fb.zsize != 0); + memset32((uint*) ztile->ui, + spu.fb.depth_clear_value, + TILE_SIZE * TILE_SIZE); + } +} + + +#endif /* SPU_TILE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c new file mode 100644 index 0000000000..4caf7d6b61 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -0,0 +1,798 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Triangle rendering within a tile. + */ + +#include <transpose_matrix4x4.h> +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "util/u_math.h" +#include "spu_colorpack.h" +#include "spu_main.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_tri.h" + + +/** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ +typedef vector unsigned int mask_t; + + + +/** + * Simplified types taken from other parts of Gallium + */ +struct vertex_header { + vector float data[1]; +}; + + + +/* XXX fix this */ +#undef CEILF +#define CEILF(X) ((float) (int) ((X) + 0.99999)) + + +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 +#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) +#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) +#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) +#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) +#define MASK_ALL 0xf + + +#define DEBUG_VERTS 0 + +/** + * Triangle edge info + */ +struct edge { + float dx; /**< X(v1) - X(v0), used only during setup */ + float dy; /**< Y(v1) - Y(v0), used only during setup */ + float dxdy; /**< dx/dy */ + float sx, sy; /**< first sample point coord */ + int lines; /**< number of lines on this edge */ +}; + + +struct interp_coef +{ + vector float a0; + vector float dadx; + vector float dady; +}; + + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_stage { + + /* Vertices are just an array of floats making up each attribute in + * turn. Currently fixed at 4 floats, but should change in time. + * Codegen will help cope with this. + */ + const struct vertex_header *vmax; + const struct vertex_header *vmid; + const struct vertex_header *vmin; + const struct vertex_header *vprovoke; + + struct edge ebot; + struct edge etop; + struct edge emaj; + + float oneOverArea; /* XXX maybe make into vector? */ + + uint facing; + + uint tx, ty; /**< position of current tile (x, y) */ + + int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; + + struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + + struct { + int left[2]; /**< [0] = row0, [1] = row1 */ + int right[2]; + int y; + unsigned y_flags; + unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ + } span; +}; + + +static struct setup_stage setup; + + +/** + * Evaluate attribute coefficients (plane equations) to compute + * attribute values for the four fragments in a quad. + * Eg: four colors will be computed (in AoS format). + */ +static INLINE void +eval_coeff(uint slot, float x, float y, vector float w, vector float result[4]) +{ + switch (spu.vertex_info.attrib[slot].interp_mode) { + case INTERP_CONSTANT: + result[QUAD_TOP_LEFT] = + result[QUAD_TOP_RIGHT] = + result[QUAD_BOTTOM_LEFT] = + result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0; + break; + case INTERP_LINEAR: + { + vector float dadx = setup.coef[slot].dadx; + vector float dady = setup.coef[slot].dady; + vector float topLeft = + spu_add(setup.coef[slot].a0, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); + + result[QUAD_TOP_LEFT] = topLeft; + result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx); + result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady); + result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady); + } + break; + case INTERP_PERSPECTIVE: + { + vector float dadx = setup.coef[slot].dadx; + vector float dady = setup.coef[slot].dady; + vector float topLeft = + spu_add(setup.coef[slot].a0, + spu_add(spu_mul(spu_splats(x), dadx), + spu_mul(spu_splats(y), dady))); + + vector float wInv = spu_re(w); /* 1.0 / w */ + + result[QUAD_TOP_LEFT] = spu_mul(topLeft, wInv); + result[QUAD_TOP_RIGHT] = spu_mul(spu_add(topLeft, dadx), wInv); + result[QUAD_BOTTOM_LEFT] = spu_mul(spu_add(topLeft, dady), wInv); + result[QUAD_BOTTOM_RIGHT] = spu_mul(spu_add(spu_add(topLeft, dadx), dady), wInv); + } + break; + case INTERP_POS: + case INTERP_NONE: + break; + default: + ASSERT(0); + } +} + + +/** + * As above, but return 4 vectors in SOA format. + * XXX this will all be re-written someday. + */ +static INLINE void +eval_coeff_soa(uint slot, float x, float y, vector float w, vector float result[4]) +{ + eval_coeff(slot, x, y, w, result); + _transpose_matrix4x4(result, result); +} + + +/** Evalute coefficients to get Z for four pixels in a quad */ +static INLINE vector float +eval_z(float x, float y) +{ + const uint slot = 0; + const float dzdx = spu_extract(setup.coef[slot].dadx, 2); + const float dzdy = spu_extract(setup.coef[slot].dady, 2); + const float topLeft = spu_extract(setup.coef[slot].a0, 2) + x * dzdx + y * dzdy; + const vector float topLeftv = spu_splats(topLeft); + const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy }; + return spu_add(topLeftv, derivs); +} + + +/** Evalute coefficients to get W for four pixels in a quad */ +static INLINE vector float +eval_w(float x, float y) +{ + const uint slot = 0; + const float dwdx = spu_extract(setup.coef[slot].dadx, 3); + const float dwdy = spu_extract(setup.coef[slot].dady, 3); + const float topLeft = spu_extract(setup.coef[slot].a0, 3) + x * dwdx + y * dwdy; + const vector float topLeftv = spu_splats(topLeft); + const vector float derivs = (vector float) { 0.0, dwdx, dwdy, dwdx + dwdy }; + return spu_add(topLeftv, derivs); +} + + +/** + * Emit a quad (pass to next stage). No clipping is done. + * Note: about 1/5 to 1/7 of the time, mask is zero and this function + * should be skipped. But adding the test for that slows things down + * overall. + */ +static INLINE void +emit_quad( int x, int y, mask_t mask) +{ + /* If any bits in mask are set... */ + if (spu_extract(spu_orx(mask), 0)) { + const int ix = x - setup.cliprect_minx; + const int iy = y - setup.cliprect_miny; + + spu.cur_ctile_status = TILE_STATUS_DIRTY; + spu.cur_ztile_status = TILE_STATUS_DIRTY; + + { + /* + * Run fragment shader, execute per-fragment ops, update fb/tile. + */ + vector float inputs[4*4], outputs[2*4]; + vector float fragZ = eval_z((float) x, (float) y); + vector float fragW = eval_w((float) x, (float) y); + vector unsigned int kill_mask; + + /* setup inputs */ +#if 0 + eval_coeff_soa(1, (float) x, (float) y, fragW, inputs); +#else + uint i; + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + eval_coeff_soa(i+1, (float) x, (float) y, fragW, inputs + i * 4); + } +#endif + ASSERT(spu.fragment_program); + ASSERT(spu.fragment_ops); + + /* Execute the current fragment program */ + kill_mask = spu.fragment_program(inputs, outputs, spu.constants); + + mask = spu_andc(mask, kill_mask); + + /* Execute per-fragment/quad operations, including: + * alpha test, z test, stencil test, blend and framebuffer writing. + */ + spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile, + fragZ, + outputs[0*4+0], + outputs[0*4+1], + outputs[0*4+2], + outputs[0*4+3], + mask, + setup.facing); + } + } +} + + +/** + * Given an X or Y coordinate, return the block/quad coordinate that it + * belongs to. + */ +static INLINE int +block(int x) +{ + return x & ~1; +} + + +/** + * Compute mask which indicates which pixels in the 2x2 quad are actually inside + * the triangle's bounds. + * The mask is a uint4 vector and each element will be 0 or 0xffffffff. + */ +static INLINE mask_t +calculate_mask(int x) +{ + /* This is a little tricky. + * Use & instead of && to avoid branches. + * Use negation to convert true/false to ~0/0 values. + */ + mask_t mask; + mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0); + mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1); + mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2); + mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3); + return mask; +} + + +/** + * Render a horizontal span of quads + */ +static void +flush_spans(void) +{ + int minleft, maxright; + int x; + + switch (setup.span.y_flags) { + case 0x3: + /* both odd and even lines written (both quad rows) */ + minleft = MIN2(setup.span.left[0], setup.span.left[1]); + maxright = MAX2(setup.span.right[0], setup.span.right[1]); + break; + + case 0x1: + /* only even line written (quad top row) */ + minleft = setup.span.left[0]; + maxright = setup.span.right[0]; + break; + + case 0x2: + /* only odd line written (quad bottom row) */ + minleft = setup.span.left[1]; + maxright = setup.span.right[1]; + break; + + default: + return; + } + + /* OK, we're very likely to need the tile data now. + * clear or finish waiting if needed. + */ + if (spu.cur_ctile_status == TILE_STATUS_GETTING) { + /* wait for mfc_get() to complete */ + //printf("SPU: %u: waiting for ctile\n", spu.init.id); + wait_on_mask(1 << TAG_READ_TILE_COLOR); + spu.cur_ctile_status = TILE_STATUS_CLEAN; + } + else if (spu.cur_ctile_status == TILE_STATUS_CLEAR) { + //printf("SPU %u: clearing C tile %u, %u\n", spu.init.id, setup.tx, setup.ty); + clear_c_tile(&spu.ctile); + spu.cur_ctile_status = TILE_STATUS_DIRTY; + } + ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); + + if (spu.read_depth) { + if (spu.cur_ztile_status == TILE_STATUS_GETTING) { + /* wait for mfc_get() to complete */ + //printf("SPU: %u: waiting for ztile\n", spu.init.id); + wait_on_mask(1 << TAG_READ_TILE_Z); + spu.cur_ztile_status = TILE_STATUS_CLEAN; + } + else if (spu.cur_ztile_status == TILE_STATUS_CLEAR) { + //printf("SPU %u: clearing Z tile %u, %u\n", spu.init.id, setup.tx, setup.ty); + clear_z_tile(&spu.ztile); + spu.cur_ztile_status = TILE_STATUS_DIRTY; + } + ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); + } + + /* XXX this loop could be moved into the above switch cases and + * calculate_mask() could be simplified a bit... + */ + for (x = block(minleft); x <= block(maxright); x += 2) { + emit_quad( x, setup.span.y, calculate_mask( x )); + } + + setup.span.y = 0; + setup.span.y_flags = 0; + setup.span.right[0] = 0; + setup.span.right[1] = 0; +} + + +#if DEBUG_VERTS +static void +print_vertex(const struct vertex_header *v) +{ + uint i; + fprintf(stderr, " Vertex: (%p)\n", v); + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + fprintf(stderr, " %d: %f %f %f %f\n", i, + spu_extract(v->data[i], 0), + spu_extract(v->data[i], 1), + spu_extract(v->data[i], 2), + spu_extract(v->data[i], 3)); + } +} +#endif + + +/** + * Sort vertices from top to bottom. + * Compute area and determine front vs. back facing. + * Do coarse clip test against tile bounds + * \return FALSE if tri is totally outside tile, TRUE otherwise + */ +static boolean +setup_sort_vertices(const struct vertex_header *v0, + const struct vertex_header *v1, + const struct vertex_header *v2) +{ + float area, sign; + +#if DEBUG_VERTS + if (spu.init.id==0) { + fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id); + print_vertex(v0); + print_vertex(v1); + print_vertex(v2); + } +#endif + + /* determine bottom to top order of vertices */ + { + float y0 = spu_extract(v0->data[0], 1); + float y1 = spu_extract(v1->data[0], 1); + float y2 = spu_extract(v2->data[0], 1); + if (y0 <= y1) { + if (y1 <= y2) { + /* y0<=y1<=y2 */ + setup.vmin = v0; + setup.vmid = v1; + setup.vmax = v2; + sign = -1.0f; + } + else if (y2 <= y0) { + /* y2<=y0<=y1 */ + setup.vmin = v2; + setup.vmid = v0; + setup.vmax = v1; + sign = -1.0f; + } + else { + /* y0<=y2<=y1 */ + setup.vmin = v0; + setup.vmid = v2; + setup.vmax = v1; + sign = 1.0f; + } + } + else { + if (y0 <= y2) { + /* y1<=y0<=y2 */ + setup.vmin = v1; + setup.vmid = v0; + setup.vmax = v2; + sign = 1.0f; + } + else if (y2 <= y1) { + /* y2<=y1<=y0 */ + setup.vmin = v2; + setup.vmid = v1; + setup.vmax = v0; + sign = 1.0f; + } + else { + /* y1<=y2<=y0 */ + setup.vmin = v1; + setup.vmid = v2; + setup.vmax = v0; + sign = -1.0f; + } + } + } + + /* Check if triangle is completely outside the tile bounds */ + if (spu_extract(setup.vmin->data[0], 1) > setup.cliprect_maxy) + return FALSE; + if (spu_extract(setup.vmax->data[0], 1) < setup.cliprect_miny) + return FALSE; + if (spu_extract(setup.vmin->data[0], 0) < setup.cliprect_minx && + spu_extract(setup.vmid->data[0], 0) < setup.cliprect_minx && + spu_extract(setup.vmax->data[0], 0) < setup.cliprect_minx) + return FALSE; + if (spu_extract(setup.vmin->data[0], 0) > setup.cliprect_maxx && + spu_extract(setup.vmid->data[0], 0) > setup.cliprect_maxx && + spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx) + return FALSE; + + setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0); + setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1); + setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0); + setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1); + setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0); + setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1); + + /* + * Compute triangle's area. Use 1/area to compute partial + * derivatives of attributes later. + */ + area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy; + + setup.oneOverArea = 1.0f / area; + + /* The product of area * sign indicates front/back orientation (0/1) */ + setup.facing = (area * sign > 0.0f) + ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW); + + setup.vprovoke = v2; + + return TRUE; +} + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + * The value value comes from vertex->data[slot]. + * The result will be put into setup.coef[slot].a0. + * \param slot which attribute slot + */ +static INLINE void +const_coeff4(uint slot) +{ + setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].a0 = setup.vprovoke->data[slot]; +} + + +/** + * As above, but interp setup all four vector components. + */ +static INLINE void +tri_linear_coeff4(uint slot) +{ + const vector float vmin_d = setup.vmin->data[slot]; + const vector float vmid_d = setup.vmid->data[slot]; + const vector float vmax_d = setup.vmax->data[slot]; + const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); + const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); + + vector float botda = vmid_d - vmin_d; + vector float majda = vmax_d - vmin_d; + + vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), + spu_mul(botda, spu_splats(setup.emaj.dy))); + vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), + spu_mul(majda, spu_splats(setup.ebot.dx))); + + setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); + setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); + + vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); + + setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void +tri_persp_coeff4(uint slot) +{ + const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); + const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); + + const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3)); + const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3)); + const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3)); + + vector float vmin_d = setup.vmin->data[slot]; + vector float vmid_d = setup.vmid->data[slot]; + vector float vmax_d = setup.vmax->data[slot]; + + vmin_d = spu_mul(vmin_d, vmin_w); + vmid_d = spu_mul(vmid_d, vmid_w); + vmax_d = spu_mul(vmax_d, vmax_w); + + vector float botda = vmid_d - vmin_d; + vector float majda = vmax_d - vmin_d; + + vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), + spu_mul(botda, spu_splats(setup.emaj.dy))); + vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), + spu_mul(majda, spu_splats(setup.ebot.dx))); + + setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); + setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); + + vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); + + setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); +} + + + +/** + * Compute the setup.coef[] array dadx, dady, a0 values. + * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized. + */ +static void +setup_tri_coefficients(void) +{ + uint i; + + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + switch (spu.vertex_info.attrib[i].interp_mode) { + case INTERP_NONE: + break; + case INTERP_CONSTANT: + const_coeff4(i); + break; + case INTERP_POS: + /* fall-through */ + case INTERP_LINEAR: + tri_linear_coeff4(i); + break; + case INTERP_PERSPECTIVE: + tri_persp_coeff4(i); + break; + default: + ASSERT(0); + } + } +} + + +static void +setup_tri_edges(void) +{ + float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f; + float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f; + + float vmin_y = spu_extract(setup.vmin->data[0], 1) - 0.5f; + float vmid_y = spu_extract(setup.vmid->data[0], 1) - 0.5f; + float vmax_y = spu_extract(setup.vmax->data[0], 1) - 0.5f; + + setup.emaj.sy = CEILF(vmin_y); + setup.emaj.lines = (int) CEILF(vmax_y - setup.emaj.sy); + setup.emaj.dxdy = setup.emaj.dx / setup.emaj.dy; + setup.emaj.sx = vmin_x + (setup.emaj.sy - vmin_y) * setup.emaj.dxdy; + + setup.etop.sy = CEILF(vmid_y); + setup.etop.lines = (int) CEILF(vmax_y - setup.etop.sy); + setup.etop.dxdy = setup.etop.dx / setup.etop.dy; + setup.etop.sx = vmid_x + (setup.etop.sy - vmid_y) * setup.etop.dxdy; + + setup.ebot.sy = CEILF(vmin_y); + setup.ebot.lines = (int) CEILF(vmid_y - setup.ebot.sy); + setup.ebot.dxdy = setup.ebot.dx / setup.ebot.dy; + setup.ebot.sx = vmin_x + (setup.ebot.sy - vmin_y) * setup.ebot.dxdy; +} + + +/** + * Render the upper or lower half of a triangle. + * Scissoring/cliprect is applied here too. + */ +static void +subtriangle(struct edge *eleft, struct edge *eright, unsigned lines) +{ + const int minx = setup.cliprect_minx; + const int maxx = setup.cliprect_maxx; + const int miny = setup.cliprect_miny; + const int maxy = setup.cliprect_maxy; + int y, start_y, finish_y; + int sy = (int)eleft->sy; + + ASSERT((int)eleft->sy == (int) eright->sy); + + /* clip top/bottom */ + start_y = sy; + finish_y = sy + lines; + + if (start_y < miny) + start_y = miny; + + if (finish_y > maxy) + finish_y = maxy; + + start_y -= sy; + finish_y -= sy; + + /* + _mesa_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); + */ + + for (y = start_y; y < finish_y; y++) { + + /* avoid accumulating adds as floats don't have the precision to + * accurately iterate large triangle edges that way. luckily we + * can just multiply these days. + * + * this is all drowned out by the attribute interpolation anyway. + */ + int left = (int)(eleft->sx + y * eleft->dxdy); + int right = (int)(eright->sx + y * eright->dxdy); + + /* clip left/right */ + if (left < minx) + left = minx; + if (right > maxx) + right = maxx; + + if (left < right) { + int _y = sy + y; + if (block(_y) != setup.span.y) { + flush_spans(); + setup.span.y = block(_y); + } + + setup.span.left[_y&1] = left; + setup.span.right[_y&1] = right; + setup.span.y_flags |= 1<<(_y&1); + } + } + + + /* save the values so that emaj can be restarted: + */ + eleft->sx += lines * eleft->dxdy; + eright->sx += lines * eright->dxdy; + eleft->sy += lines; + eright->sy += lines; +} + + +/** + * Draw triangle into tile at (tx, ty) (tile coords) + * The tile data should have already been fetched. + */ +boolean +tri_draw(const float *v0, const float *v1, const float *v2, + uint tx, uint ty) +{ + setup.tx = tx; + setup.ty = ty; + + /* set clipping bounds to tile bounds */ + setup.cliprect_minx = tx * TILE_SIZE; + setup.cliprect_miny = ty * TILE_SIZE; + setup.cliprect_maxx = (tx + 1) * TILE_SIZE; + setup.cliprect_maxy = (ty + 1) * TILE_SIZE; + + if (!setup_sort_vertices((struct vertex_header *) v0, + (struct vertex_header *) v1, + (struct vertex_header *) v2)) { + return FALSE; /* totally clipped */ + } + + setup_tri_coefficients(); + setup_tri_edges(); + + setup.span.y = 0; + setup.span.y_flags = 0; + setup.span.right[0] = 0; + setup.span.right[1] = 0; + + if (setup.oneOverArea < 0.0) { + /* emaj on left */ + subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines ); + subtriangle( &setup.emaj, &setup.etop, setup.etop.lines ); + } + else { + /* emaj on right */ + subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines ); + subtriangle( &setup.etop, &setup.emaj, setup.etop.lines ); + } + + flush_spans(); + + return TRUE; +} diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h new file mode 100644 index 0000000000..aa694dd7c9 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_tri.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SPU_TRI_H +#define SPU_TRI_H + + +extern boolean +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); + + +#endif /* SPU_TRI_H */ diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c new file mode 100644 index 0000000000..b8a0d4a265 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -0,0 +1,167 @@ + +#include "cell/common.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_debug.h" +#include "tgsi/tgsi_parse.h" +//#include "tgsi_build.h" +#include "tgsi/tgsi_util.h" + +unsigned +tgsi_util_get_src_register_swizzle( + const struct tgsi_src_register *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->SwizzleX; + case 1: + return reg->SwizzleY; + case 2: + return reg->SwizzleZ; + case 3: + return reg->SwizzleW; + default: + ASSERT( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_src_register_extswizzle( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->ExtSwizzleX; + case 1: + return reg->ExtSwizzleY; + case 2: + return reg->ExtSwizzleZ; + case 3: + return reg->ExtSwizzleW; + default: + ASSERT( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_full_src_register_extswizzle( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned swizzle; + + /* + * First, calculate the extended swizzle for a given channel. This will give + * us either a channel index into the simple swizzle or a constant 1 or 0. + */ + swizzle = tgsi_util_get_src_register_extswizzle( + ®->SrcRegisterExtSwz, + component ); + + ASSERT (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); + ASSERT (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); + ASSERT (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); + ASSERT (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); + ASSERT (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); + ASSERT (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); + + /* + * Second, calculate the simple swizzle for the unswizzled channel index. + * Leave the constants intact, they are not affected by the simple swizzle. + */ + if( swizzle <= TGSI_SWIZZLE_W ) { + swizzle = tgsi_util_get_src_register_swizzle( + ®->SrcRegister, + component ); + } + + return swizzle; +} + +unsigned +tgsi_util_get_src_register_extnegate( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->NegateX; + case 1: + return reg->NegateY; + case 2: + return reg->NegateZ; + case 3: + return reg->NegateW; + default: + ASSERT( 0 ); + } + return 0; +} + +void +tgsi_util_set_src_register_extnegate( + struct tgsi_src_register_ext_swz *reg, + unsigned negate, + unsigned component ) +{ + switch( component ) { + case 0: + reg->NegateX = negate; + break; + case 1: + reg->NegateY = negate; + break; + case 2: + reg->NegateZ = negate; + break; + case 3: + reg->NegateW = negate; + break; + default: + ASSERT( 0 ); + } +} + +unsigned +tgsi_util_get_full_src_register_sign_mode( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned sign_mode; + + if( reg->SrcRegisterExtMod.Absolute ) { + /* Consider only the post-abs negation. */ + + if( reg->SrcRegisterExtMod.Negate ) { + sign_mode = TGSI_UTIL_SIGN_SET; + } + else { + sign_mode = TGSI_UTIL_SIGN_CLEAR; + } + } + else { + /* Accumulate the three negations. */ + + unsigned negate; + + negate = reg->SrcRegister.Negate; + if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { + negate = !negate; + } + if( reg->SrcRegisterExtMod.Negate ) { + negate = !negate; + } + + if( negate ) { + sign_mode = TGSI_UTIL_SIGN_TOGGLE; + } + else { + sign_mode = TGSI_UTIL_SIGN_KEEP; + } + } + + return sign_mode; +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c new file mode 100644 index 0000000000..03375d84a5 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -0,0 +1,145 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Ian Romanick <idr@us.ibm.com> + */ + +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "spu_exec.h" +#include "spu_vertex_shader.h" +#include "spu_main.h" +#include "spu_dcache.h" + +typedef void (*spu_fetch_func)(qword *out, const qword *in, + const qword *shuffle_data); + + +static const qword fetch_shuffle_data[5] ALIGN16_ATTRIB = { + /* Shuffle used by CVT_64_FLOAT + */ + { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + }, + + /* Shuffle used by CVT_8_USCALED and CVT_8_SSCALED + */ + { + 0x00, 0x80, 0x80, 0x80, 0x01, 0x80, 0x80, 0x80, + 0x02, 0x80, 0x80, 0x80, 0x03, 0x80, 0x80, 0x80, + }, + + /* Shuffle used by CVT_16_USCALED and CVT_16_SSCALED + */ + { + 0x00, 0x01, 0x80, 0x80, 0x02, 0x03, 0x80, 0x80, + 0x04, 0x05, 0x80, 0x80, 0x06, 0x07, 0x80, 0x80, + }, + + /* High value shuffle used by trans4x4. + */ + { + 0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17 + }, + + /* Low value shuffle used by trans4x4. + */ + { + 0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F + } +}; + + +/** + * Fetch vertex attributes for 'count' vertices. + */ +static void generic_vertex_fetch(struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count) +{ + unsigned nr_attrs = draw->vertex_fetch.nr_attrs; + unsigned attr; + + ASSERT(count <= 4); + +#if DRAW_DBG + printf("SPU: %s count = %u, nr_attrs = %u\n", + __FUNCTION__, count, nr_attrs); +#endif + + /* loop over vertex attributes (vertex shader inputs) + */ + for (attr = 0; attr < nr_attrs; attr++) { + const unsigned pitch = draw->vertex_fetch.pitch[attr]; + const uint64_t src = draw->vertex_fetch.src_ptr[attr]; + const spu_fetch_func fetch = (spu_fetch_func) + (draw->vertex_fetch.code + draw->vertex_fetch.code_offset[attr]); + unsigned i; + unsigned idx; + const unsigned bytes_per_entry = draw->vertex_fetch.size[attr]; + const unsigned quads_per_entry = (bytes_per_entry + 15) / 16; + qword in[2 * 4] ALIGN16_ATTRIB; + + + /* Fetch four attributes for four vertices. + */ + idx = 0; + for (i = 0; i < count; i++) { + const uint64_t addr = src + (elts[i] * pitch); + +#if DRAW_DBG + printf("SPU: fetching = 0x%llx\n", addr); +#endif + + spu_dcache_fetch_unaligned(& in[idx], addr, bytes_per_entry); + idx += quads_per_entry; + } + + /* Be nice and zero out any missing vertices. + */ + (void) memset(& in[idx], 0, (8 - idx) * sizeof(qword)); + + + /* Convert all 4 vertices to vectors of float. + */ + (*fetch)(&machine->Inputs[attr].xyzw[0].q, in, fetch_shuffle_data); + } +} + + +void spu_update_vertex_fetch( struct spu_vs_context *draw ) +{ + draw->vertex_fetch.fetch_func = generic_vertex_fetch; +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c new file mode 100644 index 0000000000..fbe5b34d39 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -0,0 +1,244 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + * Ian Romanick <idr@us.ibm.com> + */ + +#include <spu_mfcio.h> + +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "draw/draw_private.h" +#include "draw/draw_context.h" +#include "cell/common.h" +#include "spu_vertex_shader.h" +#include "spu_exec.h" +#include "spu_main.h" + + +#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) + + +#define CLIP_RIGHT_BIT 0x01 +#define CLIP_LEFT_BIT 0x02 +#define CLIP_TOP_BIT 0x04 +#define CLIP_BOTTOM_BIT 0x08 +#define CLIP_FAR_BIT 0x10 +#define CLIP_NEAR_BIT 0x20 + + +static INLINE float +dot4(const float *a, const float *b) +{ + return (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); +} + +static INLINE unsigned +compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0; + unsigned i; + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; + if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; + if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; + if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; + if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; + if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1<<i); + } + + return mask; +} + + +/** + * Transform vertices with the current vertex program/shader + * Up to four vertices can be shaded at a time. + * \param vbuffer the input vertex data + * \param elts indexes of four input vertices + * \param count number of vertices to shade [1..4] + * \param vOut array of pointers to four output vertices + */ +static void +run_vertex_program(struct spu_vs_context *draw, + unsigned elts[4], unsigned count, + const uint64_t *vOut) +{ + struct spu_exec_machine *machine = &draw->machine; + unsigned int j; + + ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); + ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); + const float *scale = draw->viewport.scale; + const float *trans = draw->viewport.translate; + + ASSERT(count <= 4); + + machine->Processor = TGSI_PROCESSOR_VERTEX; + + ASSERT_ALIGN16(draw->constants); + machine->Consts = (float (*)[4]) draw->constants; + + machine->Inputs = ALIGN16_ASSIGN(inputs); + machine->Outputs = ALIGN16_ASSIGN(outputs); + + spu_vertex_fetch( draw, machine, elts, count ); + + /* run shader */ + spu_exec_machine_run( machine ); + + + /* store machine results */ + for (j = 0; j < count; j++) { + unsigned slot; + float x, y, z, w; + unsigned char buffer[sizeof(struct vertex_header) + + MAX_VERTEX_SIZE] ALIGN16_ATTRIB; + struct vertex_header *const tmpOut = + (struct vertex_header *) buffer; + const unsigned vert_size = ROUNDUP16(sizeof(struct vertex_header) + + (sizeof(float) * 4 + * draw->num_vs_outputs)); + + mfc_get(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + + /* Handle attr[0] (position) specially: + * + * XXX: Computing the clipmask should be done in the vertex + * program as a set of DP4 instructions appended to the + * user-provided code. + */ + x = tmpOut->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = tmpOut->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = tmpOut->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = tmpOut->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + tmpOut->clipmask = compute_clipmask(tmpOut->clip, draw->plane, + draw->nr_planes); + tmpOut->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + tmpOut->data[0][0] = x * scale[0] + trans[0]; + tmpOut->data[0][1] = y * scale[1] + trans[1]; + tmpOut->data[0][2] = z * scale[2] + trans[2]; + tmpOut->data[0][3] = w; + + /* Remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + tmpOut->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + tmpOut->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + tmpOut->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + tmpOut->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + + mfc_put(tmpOut, vOut[j], vert_size, TAG_VERTEX_BUFFER, 0, 0); + } /* loop over vertices */ +} + + +unsigned char immediates[(sizeof(float) * 4 * TGSI_EXEC_NUM_IMMEDIATES) + 32] + ALIGN16_ATTRIB; + + +void +spu_bind_vertex_shader(struct spu_vs_context *draw, + struct cell_shader_info *vs) +{ + const unsigned immediate_addr = vs->immediates; + const unsigned immediate_size = + ROUNDUP16((sizeof(float) * 4 * vs->num_immediates) + + (immediate_addr & 0x0f)); + + + mfc_get(immediates, immediate_addr & ~0x0f, immediate_size, + TAG_VERTEX_BUFFER, 0, 0); + + draw->machine.Instructions = (struct tgsi_full_instruction *) + vs->instructions; + draw->machine.NumInstructions = vs->num_instructions; + + draw->machine.Declarations = (struct tgsi_full_declaration *) + vs->declarations; + draw->machine.NumDeclarations = vs->num_declarations; + + draw->num_vs_outputs = vs->num_outputs; + + /* specify the shader to interpret/execute */ + spu_exec_machine_init(&draw->machine, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/, + PIPE_SHADER_VERTEX); + + wait_on_mask(1 << TAG_VERTEX_BUFFER); + + (void) memcpy(& draw->machine.Imms, &immediates[immediate_addr & 0x0f], + sizeof(float) * 4 * vs->num_immediates); +} + + +void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs) +{ + unsigned i; + + (void) memcpy(draw->plane, vs->plane, sizeof(float) * 4 * vs->nr_planes); + draw->nr_planes = vs->nr_planes; + draw->vertex_fetch.nr_attrs = vs->nr_attrs; + + for (i = 0; i < vs->num_elts; i += 4) { + const unsigned batch_size = MIN2(vs->num_elts - i, 4); + + run_vertex_program(draw, & vs->elts[i], batch_size, &vs->vOut[i]); + } +} diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h new file mode 100644 index 0000000000..4c74f5e74d --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h @@ -0,0 +1,66 @@ +#ifndef SPU_VERTEX_SHADER_H +#define SPU_VERTEX_SHADER_H + +#include "cell/common.h" +#include "pipe/p_format.h" +#include "spu_exec.h" + +struct spu_vs_context; + +typedef void (*spu_full_fetch_func)( struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count ); + +struct spu_vs_context { + struct pipe_viewport_state viewport; + + struct { + uint64_t src_ptr[PIPE_MAX_ATTRIBS]; + unsigned pitch[PIPE_MAX_ATTRIBS]; + unsigned size[PIPE_MAX_ATTRIBS]; + unsigned code_offset[PIPE_MAX_ATTRIBS]; + unsigned nr_attrs; + boolean dirty; + + spu_full_fetch_func fetch_func; + void *code; + } vertex_fetch; + + /* Clip derived state: + */ + float plane[12][4]; + unsigned nr_planes; + + struct spu_exec_machine machine; + const float (*constants)[4]; + + unsigned num_vs_outputs; +}; + +extern void spu_update_vertex_fetch(struct spu_vs_context *draw); + +static INLINE void spu_vertex_fetch(struct spu_vs_context *draw, + struct spu_exec_machine *machine, + const unsigned *elts, + unsigned count) +{ + if (draw->vertex_fetch.dirty) { + spu_update_vertex_fetch(draw); + draw->vertex_fetch.dirty = 0; + } + + (*draw->vertex_fetch.fetch_func)(draw, machine, elts, count); +} + +struct cell_command_vs; + +extern void +spu_bind_vertex_shader(struct spu_vs_context *draw, + struct cell_shader_info *vs); + +extern void +spu_execute_vertex_shader(struct spu_vs_context *draw, + const struct cell_command_vs *vs); + +#endif /* SPU_VERTEX_SHADER_H */ diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile new file mode 100644 index 0000000000..f08b8df07a --- /dev/null +++ b/src/gallium/drivers/failover/Makefile @@ -0,0 +1,14 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = failover + +C_SOURCES = \ + fo_state.c \ + fo_state_emit.c \ + fo_context.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/failover/SConscript b/src/gallium/drivers/failover/SConscript new file mode 100644 index 0000000000..f8e9b1b491 --- /dev/null +++ b/src/gallium/drivers/failover/SConscript @@ -0,0 +1,13 @@ +Import('*') + +env = env.Clone() + +failover = env.ConvenienceLibrary( + target = 'failover', + source = [ + 'fo_state.c', + 'fo_state_emit.c', + 'fo_context.c', + ]) + +Export('failover') diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c new file mode 100644 index 0000000000..10c4ffc209 --- /dev/null +++ b/src/gallium/drivers/failover/fo_context.c @@ -0,0 +1,159 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "util/u_memory.h" +#include "pipe/p_context.h" + +#include "fo_context.h" +#include "fo_winsys.h" + + + +static void failover_destroy( struct pipe_context *pipe ) +{ + struct failover_context *failover = failover_context( pipe ); + + free( failover ); +} + + + +static boolean failover_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + struct failover_context *failover = failover_context( pipe ); + + /* If there has been any statechange since last time, try hardware + * rendering again: + */ + if (failover->dirty) { + failover->mode = FO_HW; + } + + /* Try hardware: + */ + if (failover->mode == FO_HW) { + if (!failover->hw->draw_elements( failover->hw, + indexBuffer, + indexSize, + prim, + start, + count )) { + + failover->hw->flush( failover->hw, ~0, NULL ); + failover->mode = FO_SW; + } + } + + /* Possibly try software: + */ + if (failover->mode == FO_SW) { + + if (failover->dirty) + failover_state_emit( failover ); + + failover->sw->draw_elements( failover->sw, + indexBuffer, + indexSize, + prim, + start, + count ); + + /* Be ready to switch back to hardware rendering without an + * intervening flush. Unlikely to be much performance impact to + * this: + */ + failover->sw->flush( failover->sw, ~0, NULL ); + } + + return TRUE; +} + + +static boolean failover_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return failover_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + +struct pipe_context *failover_create( struct pipe_context *hw, + struct pipe_context *sw ) +{ + struct failover_context *failover = CALLOC_STRUCT(failover_context); + if (failover == NULL) + return NULL; + + failover->hw = hw; + failover->sw = sw; + failover->pipe.winsys = hw->winsys; + failover->pipe.screen = hw->screen; + failover->pipe.destroy = failover_destroy; +#if 0 + failover->pipe.is_format_supported = hw->is_format_supported; + failover->pipe.get_name = hw->get_name; + failover->pipe.get_vendor = hw->get_vendor; + failover->pipe.get_param = hw->get_param; + failover->pipe.get_paramf = hw->get_paramf; +#endif + + failover->pipe.draw_arrays = failover_draw_arrays; + failover->pipe.draw_elements = failover_draw_elements; + failover->pipe.clear = hw->clear; + + /* No software occlusion fallback (or other optional functionality) + * at this point - if the hardware doesn't support it, don't + * advertise it to the application. + */ + failover->pipe.begin_query = hw->begin_query; + failover->pipe.end_query = hw->end_query; + + failover_init_state_functions( failover ); + + failover->pipe.surface_copy = hw->surface_copy; + failover->pipe.surface_fill = hw->surface_fill; + +#if 0 + failover->pipe.texture_create = hw->texture_create; + failover->pipe.texture_release = hw->texture_release; + failover->pipe.get_tex_surface = hw->get_tex_surface; + failover->pipe.texture_update = hw->texture_update; +#endif + + failover->pipe.flush = hw->flush; + + failover->dirty = 0; + + return &failover->pipe; +} + diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h new file mode 100644 index 0000000000..c6409fe1e1 --- /dev/null +++ b/src/gallium/drivers/failover/fo_context.h @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef FO_CONTEXT_H +#define FO_CONTEXT_H + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + + + +#define FO_NEW_VIEWPORT 0x1 +#define FO_NEW_RASTERIZER 0x2 +#define FO_NEW_FRAGMENT_SHADER 0x4 +#define FO_NEW_BLEND 0x8 +#define FO_NEW_CLIP 0x10 +#define FO_NEW_SCISSOR 0x20 +#define FO_NEW_STIPPLE 0x40 +#define FO_NEW_FRAMEBUFFER 0x80 +#define FO_NEW_ALPHA_TEST 0x100 +#define FO_NEW_DEPTH_STENCIL 0x200 +#define FO_NEW_SAMPLER 0x400 +#define FO_NEW_TEXTURE 0x800 +#define FO_NEW_VERTEX 0x2000 +#define FO_NEW_VERTEX_SHADER 0x4000 +#define FO_NEW_BLEND_COLOR 0x8000 +#define FO_NEW_CLEAR_COLOR 0x10000 +#define FO_NEW_VERTEX_BUFFER 0x20000 +#define FO_NEW_VERTEX_ELEMENT 0x40000 + + + +#define FO_HW 0 +#define FO_SW 1 + +struct fo_state { + void *sw_state; + void *hw_state; +}; +struct failover_context { + struct pipe_context pipe; /**< base class */ + + + /* The most recent drawing state as set by the driver: + */ + const struct fo_state *blend; + const struct fo_state *sampler[PIPE_MAX_SAMPLERS]; + const struct fo_state *depth_stencil; + const struct fo_state *rasterizer; + const struct fo_state *fragment_shader; + const struct fo_state *vertex_shader; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS]; + + uint num_vertex_buffers; + uint num_vertex_elements; + + void *sw_sampler_state[PIPE_MAX_SAMPLERS]; + void *hw_sampler_state[PIPE_MAX_SAMPLERS]; + + unsigned dirty; + + unsigned num_samplers; + unsigned num_textures; + + unsigned mode; + struct pipe_context *hw; + struct pipe_context *sw; +}; + + + +void failover_init_state_functions( struct failover_context *failover ); +void failover_state_emit( struct failover_context *failover ); + +static INLINE struct failover_context * +failover_context( struct pipe_context *pipe ) +{ + return (struct failover_context *)pipe; +} + + +#endif /* FO_CONTEXT_H */ diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c new file mode 100644 index 0000000000..6a79706632 --- /dev/null +++ b/src/gallium/drivers/failover/fo_state.c @@ -0,0 +1,483 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "pipe/p_inlines.h" + +#include "fo_context.h" + + +/* This looks like a lot of work at the moment - we're keeping a + * duplicate copy of the state up-to-date. + * + * This can change in two ways: + * - With constant state objects we would only need to save a pointer, + * not the whole object. + * - By adding a callback in the state tracker to re-emit state. The + * state tracker knows the current state already and can re-emit it + * without additional complexity. + * + * This works as a proof-of-concept, but a final version will have + * lower overheads. + */ + + + +static void * +failover_create_blend_state( struct pipe_context *pipe, + const struct pipe_blend_state *blend ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_blend_state(failover->sw, blend); + state->hw_state = failover->hw->create_blend_state(failover->hw, blend); + + return state; +} + +static void +failover_bind_blend_state( struct pipe_context *pipe, + void *blend ) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state *)blend; + failover->blend = state; + failover->dirty |= FO_NEW_BLEND; + failover->sw->bind_blend_state( failover->sw, state->sw_state ); + failover->hw->bind_blend_state( failover->hw, state->hw_state ); +} + +static void +failover_delete_blend_state( struct pipe_context *pipe, + void *blend ) +{ + struct fo_state *state = (struct fo_state*)blend; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_blend_state(failover->sw, state->sw_state); + failover->hw->delete_blend_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void +failover_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->blend_color = *blend_color; + failover->dirty |= FO_NEW_BLEND_COLOR; + failover->sw->set_blend_color( failover->sw, blend_color ); + failover->hw->set_blend_color( failover->hw, blend_color ); +} + +static void +failover_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->clip = *clip; + failover->dirty |= FO_NEW_CLIP; + failover->sw->set_clip_state( failover->sw, clip ); + failover->hw->set_clip_state( failover->hw, clip ); +} + + +static void * +failover_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_depth_stencil_alpha_state(failover->sw, templ); + state->hw_state = failover->hw->create_depth_stencil_alpha_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state *)depth_stencil; + failover->depth_stencil = state; + failover->dirty |= FO_NEW_DEPTH_STENCIL; + failover->sw->bind_depth_stencil_alpha_state(failover->sw, state->sw_state); + failover->hw->bind_depth_stencil_alpha_state(failover->hw, state->hw_state); +} + +static void +failover_delete_depth_stencil_state(struct pipe_context *pipe, + void *ds) +{ + struct fo_state *state = (struct fo_state*)ds; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_depth_stencil_alpha_state(failover->sw, state->sw_state); + failover->hw->delete_depth_stencil_alpha_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void +failover_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *framebuffer) +{ + struct failover_context *failover = failover_context(pipe); + + failover->framebuffer = *framebuffer; + failover->dirty |= FO_NEW_FRAMEBUFFER; + failover->sw->set_framebuffer_state( failover->sw, framebuffer ); + failover->hw->set_framebuffer_state( failover->hw, framebuffer ); +} + + +static void * +failover_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_fs_state(failover->sw, templ); + state->hw_state = failover->hw->create_fs_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state*)fs; + failover->fragment_shader = state; + failover->dirty |= FO_NEW_FRAGMENT_SHADER; + failover->sw->bind_fs_state(failover->sw, state->sw_state); + failover->hw->bind_fs_state(failover->hw, state->hw_state); +} + +static void +failover_delete_fs_state(struct pipe_context *pipe, + void *fs) +{ + struct fo_state *state = (struct fo_state*)fs; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_fs_state(failover->sw, state->sw_state); + failover->hw->delete_fs_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void * +failover_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_vs_state(failover->sw, templ); + state->hw_state = failover->hw->create_vs_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_vs_state(struct pipe_context *pipe, + void *vs) +{ + struct failover_context *failover = failover_context(pipe); + + struct fo_state *state = (struct fo_state*)vs; + failover->vertex_shader = state; + failover->dirty |= FO_NEW_VERTEX_SHADER; + failover->sw->bind_vs_state(failover->sw, state->sw_state); + failover->hw->bind_vs_state(failover->hw, state->hw_state); +} + +static void +failover_delete_vs_state(struct pipe_context *pipe, + void *vs) +{ + struct fo_state *state = (struct fo_state*)vs; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_vs_state(failover->sw, state->sw_state); + failover->hw->delete_vs_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + +static void +failover_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->poly_stipple = *stipple; + failover->dirty |= FO_NEW_STIPPLE; + failover->sw->set_polygon_stipple( failover->sw, stipple ); + failover->hw->set_polygon_stipple( failover->hw, stipple ); +} + + +static void * +failover_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_rasterizer_state(failover->sw, templ); + state->hw_state = failover->hw->create_rasterizer_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_rasterizer_state(struct pipe_context *pipe, + void *raster) +{ + struct failover_context *failover = failover_context(pipe); + + struct fo_state *state = (struct fo_state*)raster; + failover->rasterizer = state; + failover->dirty |= FO_NEW_RASTERIZER; + failover->sw->bind_rasterizer_state(failover->sw, state->sw_state); + failover->hw->bind_rasterizer_state(failover->hw, state->hw_state); +} + +static void +failover_delete_rasterizer_state(struct pipe_context *pipe, + void *raster) +{ + struct fo_state *state = (struct fo_state*)raster; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_rasterizer_state(failover->sw, state->sw_state); + failover->hw->delete_rasterizer_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + + +static void +failover_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->scissor = *scissor; + failover->dirty |= FO_NEW_SCISSOR; + failover->sw->set_scissor_state( failover->sw, scissor ); + failover->hw->set_scissor_state( failover->hw, scissor ); +} + + +static void * +failover_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *templ) +{ + struct fo_state *state = malloc(sizeof(struct fo_state)); + struct failover_context *failover = failover_context(pipe); + + state->sw_state = failover->sw->create_sampler_state(failover->sw, templ); + state->hw_state = failover->hw->create_sampler_state(failover->hw, templ); + + return state; +} + +static void +failover_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct failover_context *failover = failover_context(pipe); + struct fo_state *state = (struct fo_state*)sampler; + uint i; + assert(num <= PIPE_MAX_SAMPLERS); + /* Check for no-op */ + if (num == failover->num_samplers && + !memcmp(failover->sampler, sampler, num * sizeof(void *))) + return; + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + failover->sw_sampler_state[i] = i < num ? state[i].sw_state : NULL; + failover->hw_sampler_state[i] = i < num ? state[i].hw_state : NULL; + } + failover->dirty |= FO_NEW_SAMPLER; + failover->num_samplers = num; + failover->sw->bind_sampler_states(failover->sw, num, + failover->sw_sampler_state); + failover->hw->bind_sampler_states(failover->hw, num, + failover->hw_sampler_state); +} + +static void +failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) +{ + struct fo_state *state = (struct fo_state*)sampler; + struct failover_context *failover = failover_context(pipe); + + failover->sw->delete_sampler_state(failover->sw, state->sw_state); + failover->hw->delete_sampler_state(failover->hw, state->hw_state); + state->sw_state = 0; + state->hw_state = 0; + free(state); +} + + +static void +failover_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct failover_context *failover = failover_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == failover->num_textures && + !memcmp(failover->texture, texture, num * sizeof(struct pipe_texture *))) + return; + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &failover->texture[i], + texture[i]); + for (i = num; i < failover->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &failover->texture[i], + NULL); + failover->dirty |= FO_NEW_TEXTURE; + failover->num_textures = num; + failover->sw->set_sampler_textures( failover->sw, num, texture ); + failover->hw->set_sampler_textures( failover->hw, num, texture ); +} + + +static void +failover_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct failover_context *failover = failover_context(pipe); + + failover->viewport = *viewport; + failover->dirty |= FO_NEW_VIEWPORT; + failover->sw->set_viewport_state( failover->sw, viewport ); + failover->hw->set_viewport_state( failover->hw, viewport ); +} + + +static void +failover_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *vertex_buffers) +{ + struct failover_context *failover = failover_context(pipe); + + memcpy(failover->vertex_buffers, vertex_buffers, + count * sizeof(vertex_buffers[0])); + failover->dirty |= FO_NEW_VERTEX_BUFFER; + failover->num_vertex_buffers = count; + failover->sw->set_vertex_buffers( failover->sw, count, vertex_buffers ); + failover->hw->set_vertex_buffers( failover->hw, count, vertex_buffers ); +} + + +static void +failover_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *vertex_elements) +{ + struct failover_context *failover = failover_context(pipe); + + memcpy(failover->vertex_elements, vertex_elements, + count * sizeof(vertex_elements[0])); + + failover->dirty |= FO_NEW_VERTEX_ELEMENT; + failover->num_vertex_elements = count; + failover->sw->set_vertex_elements( failover->sw, count, vertex_elements ); + failover->hw->set_vertex_elements( failover->hw, count, vertex_elements ); +} + +void +failover_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct failover_context *failover = failover_context(pipe); + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + failover->sw->set_constant_buffer(failover->sw, shader, index, buf); + failover->hw->set_constant_buffer(failover->hw, shader, index, buf); +} + + +void +failover_init_state_functions( struct failover_context *failover ) +{ + failover->pipe.create_blend_state = failover_create_blend_state; + failover->pipe.bind_blend_state = failover_bind_blend_state; + failover->pipe.delete_blend_state = failover_delete_blend_state; + failover->pipe.create_sampler_state = failover_create_sampler_state; + failover->pipe.bind_sampler_states = failover_bind_sampler_states; + failover->pipe.delete_sampler_state = failover_delete_sampler_state; + failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state; + failover->pipe.bind_depth_stencil_alpha_state = failover_bind_depth_stencil_state; + failover->pipe.delete_depth_stencil_alpha_state = failover_delete_depth_stencil_state; + failover->pipe.create_rasterizer_state = failover_create_rasterizer_state; + failover->pipe.bind_rasterizer_state = failover_bind_rasterizer_state; + failover->pipe.delete_rasterizer_state = failover_delete_rasterizer_state; + failover->pipe.create_fs_state = failover_create_fs_state; + failover->pipe.bind_fs_state = failover_bind_fs_state; + failover->pipe.delete_fs_state = failover_delete_fs_state; + failover->pipe.create_vs_state = failover_create_vs_state; + failover->pipe.bind_vs_state = failover_bind_vs_state; + failover->pipe.delete_vs_state = failover_delete_vs_state; + + failover->pipe.set_blend_color = failover_set_blend_color; + failover->pipe.set_clip_state = failover_set_clip_state; + failover->pipe.set_framebuffer_state = failover_set_framebuffer_state; + failover->pipe.set_polygon_stipple = failover_set_polygon_stipple; + failover->pipe.set_scissor_state = failover_set_scissor_state; + failover->pipe.set_sampler_textures = failover_set_sampler_textures; + failover->pipe.set_viewport_state = failover_set_viewport_state; + failover->pipe.set_vertex_buffers = failover_set_vertex_buffers; + failover->pipe.set_vertex_elements = failover_set_vertex_elements; + failover->pipe.set_constant_buffer = failover_set_constant_buffer; +} diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c new file mode 100644 index 0000000000..bd4fce9d20 --- /dev/null +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -0,0 +1,117 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "fo_context.h" + +/* This looks like a lot of work at the moment - we're keeping a + * duplicate copy of the state up-to-date. + * + * This can change in two ways: + * - With constant state objects we would only need to save a pointer, + * not the whole object. + * - By adding a callback in the state tracker to re-emit state. The + * state tracker knows the current state already and can re-emit it + * without additional complexity. + * + * This works as a proof-of-concept, but a final version will have + * lower overheads. + */ + + +/* Bring the software pipe uptodate with current state. + * + * With constant state objects we would probably just send all state + * to both rasterizers all the time??? + */ +void +failover_state_emit( struct failover_context *failover ) +{ + if (failover->dirty & FO_NEW_BLEND) + failover->sw->bind_blend_state( failover->sw, + failover->blend->sw_state ); + + if (failover->dirty & FO_NEW_BLEND_COLOR) + failover->sw->set_blend_color( failover->sw, &failover->blend_color ); + + if (failover->dirty & FO_NEW_CLIP) + failover->sw->set_clip_state( failover->sw, &failover->clip ); + + if (failover->dirty & FO_NEW_DEPTH_STENCIL) + failover->sw->bind_depth_stencil_alpha_state( failover->sw, + failover->depth_stencil->sw_state ); + + if (failover->dirty & FO_NEW_FRAMEBUFFER) + failover->sw->set_framebuffer_state( failover->sw, &failover->framebuffer ); + + if (failover->dirty & FO_NEW_FRAGMENT_SHADER) + failover->sw->bind_fs_state( failover->sw, + failover->fragment_shader->sw_state ); + + if (failover->dirty & FO_NEW_VERTEX_SHADER) + failover->sw->bind_vs_state( failover->sw, + failover->vertex_shader->sw_state ); + + if (failover->dirty & FO_NEW_STIPPLE) + failover->sw->set_polygon_stipple( failover->sw, &failover->poly_stipple ); + + if (failover->dirty & FO_NEW_RASTERIZER) + failover->sw->bind_rasterizer_state( failover->sw, + failover->rasterizer->sw_state ); + + if (failover->dirty & FO_NEW_SCISSOR) + failover->sw->set_scissor_state( failover->sw, &failover->scissor ); + + if (failover->dirty & FO_NEW_VIEWPORT) + failover->sw->set_viewport_state( failover->sw, &failover->viewport ); + + if (failover->dirty & FO_NEW_SAMPLER) { + failover->sw->bind_sampler_states( failover->sw, failover->num_samplers, + failover->sw_sampler_state ); + } + + if (failover->dirty & FO_NEW_TEXTURE) { + failover->sw->set_sampler_textures( failover->sw, failover->num_textures, + failover->texture ); + } + + if (failover->dirty & FO_NEW_VERTEX_BUFFER) { + failover->sw->set_vertex_buffers( failover->sw, + failover->num_vertex_buffers, + failover->vertex_buffers ); + } + + if (failover->dirty & FO_NEW_VERTEX_ELEMENT) { + failover->sw->set_vertex_elements( failover->sw, + failover->num_vertex_elements, + failover->vertex_elements ); + } + + failover->dirty = 0; +} diff --git a/src/gallium/drivers/failover/fo_winsys.h b/src/gallium/drivers/failover/fo_winsys.h new file mode 100644 index 0000000000..a8ce997a1f --- /dev/null +++ b/src/gallium/drivers/failover/fo_winsys.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef FO_WINSYS_H +#define FO_WINSYS_H + + +/* This is the interface that failover requires any window system + * hosting it to implement. This is the only include file in failover + * which is public. + */ + + +struct pipe_context; + + +struct pipe_context *failover_create( struct pipe_context *hw, + struct pipe_context *sw ); + + +#endif /* FO_WINSYS_H */ diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile new file mode 100644 index 0000000000..41a61a0020 --- /dev/null +++ b/src/gallium/drivers/i915simple/Makefile @@ -0,0 +1,31 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = i915simple + +C_SOURCES = \ + i915_blit.c \ + i915_clear.c \ + i915_flush.c \ + i915_context.c \ + i915_context.c \ + i915_debug.c \ + i915_debug_fp.c \ + i915_state.c \ + i915_state_immediate.c \ + i915_state_dynamic.c \ + i915_state_derived.c \ + i915_state_emit.c \ + i915_state_sampler.c \ + i915_screen.c \ + i915_prim_emit.c \ + i915_prim_vbuf.c \ + i915_texture.c \ + i915_fpc_emit.c \ + i915_fpc_translate.c \ + i915_surface.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/i915simple/SConscript b/src/gallium/drivers/i915simple/SConscript new file mode 100644 index 0000000000..2366e1247f --- /dev/null +++ b/src/gallium/drivers/i915simple/SConscript @@ -0,0 +1,29 @@ +Import('*') + +env = env.Clone() + +i915simple = env.ConvenienceLibrary( + target = 'i915simple', + source = [ + 'i915_blit.c', + 'i915_clear.c', + 'i915_context.c', + 'i915_debug.c', + 'i915_debug_fp.c', + 'i915_flush.c', + 'i915_fpc_emit.c', + 'i915_fpc_translate.c', + 'i915_prim_emit.c', + 'i915_prim_vbuf.c', + 'i915_screen.c', + 'i915_state.c', + 'i915_state_derived.c', + 'i915_state_dynamic.c', + 'i915_state_emit.c', + 'i915_state_immediate.c', + 'i915_state_sampler.c', + 'i915_surface.c', + 'i915_texture.c', + ]) + +Export('i915simple') diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h new file mode 100644 index 0000000000..45bf4f4028 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_batch.h @@ -0,0 +1,116 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_BATCH_H +#define I915_BATCH_H + +#include "i915_winsys.h" + +struct i915_batchbuffer +{ + struct pipe_buffer *buffer; + struct i915_winsys *winsys; + + unsigned char *map; + unsigned char *ptr; + + size_t size; + size_t actual_size; + + size_t relocs; + size_t max_relocs; +}; + +static INLINE boolean +i915_batchbuffer_check( struct i915_batchbuffer *batch, + size_t dwords, + size_t relocs ) +{ + /** TODO JB: Check relocs */ + return dwords * 4 <= batch->size - (batch->ptr - batch->map); +} + +static INLINE size_t +i915_batchbuffer_space( struct i915_batchbuffer *batch ) +{ + return batch->size - (batch->ptr - batch->map); +} + +static INLINE void +i915_batchbuffer_dword( struct i915_batchbuffer *batch, + unsigned dword ) +{ + if (i915_batchbuffer_space(batch) < 4) + return; + + *(unsigned *)batch->ptr = dword; + batch->ptr += 4; +} + +static INLINE void +i915_batchbuffer_write( struct i915_batchbuffer *batch, + void *data, + size_t size ) +{ + if (i915_batchbuffer_space(batch) < size) + return; + + memcpy(data, batch->ptr, size); + batch->ptr += size; +} + +static INLINE void +i915_batchbuffer_reloc( struct i915_batchbuffer *batch, + struct pipe_buffer *buffer, + size_t flags, + size_t offset ) +{ + batch->winsys->batch_reloc( batch->winsys, buffer, flags, offset ); +} + +static INLINE void +i915_batchbuffer_flush( struct i915_batchbuffer *batch, + struct pipe_fence_handle **fence ) +{ + batch->winsys->batch_flush( batch->winsys, fence ); +} + +#define BEGIN_BATCH( dwords, relocs ) \ + (i915_batchbuffer_check( i915->batch, dwords, relocs )) + +#define OUT_BATCH( dword ) \ + i915_batchbuffer_dword( i915->batch, dword ) + +#define OUT_RELOC( buf, flags, delta ) \ + i915_batchbuffer_reloc( i915->batch, buf, flags, delta ) + +#define FLUSH_BATCH(fence) do { \ + i915->winsys->batch_flush( i915->winsys, fence ); \ + i915->hardware_dirty = ~0; \ +} while (0) + +#endif diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c new file mode 100644 index 0000000000..45fae4c999 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_blit.c @@ -0,0 +1,157 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_blit.h" +#include "i915_reg.h" +#include "i915_batch.h" +#include "i915_debug.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +void +i915_fill_blit(struct i915_context *i915, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short x, short y, + short w, short h, + unsigned color) +{ + unsigned BR13, CMD; + + switch (cpp) { + case 1: + case 2: + case 3: + BR13 = dst_pitch | (0xF0 << 16) | (1 << 24); + CMD = XY_COLOR_BLT_CMD; + break; + case 4: + BR13 = dst_pitch | (0xF0 << 16) | (1 << 24) | (1 << 25); + CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | + XY_COLOR_BLT_WRITE_RGB); + break; + default: + return; + } + +// DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", +// __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); + + + if (!BEGIN_BATCH(6, 1)) { + FLUSH_BATCH(NULL); + assert(BEGIN_BATCH(6, 1)); + } + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((y << 16) | x); + OUT_BATCH(((y + h) << 16) | (x + w)); + OUT_RELOC( dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset); + OUT_BATCH(color); +} + + +void +i915_copy_blit( struct i915_context *i915, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short src_x, short src_y, + short dst_x, short dst_y, + short w, short h ) +{ + unsigned CMD, BR13; + int dst_y2 = dst_y + h; + int dst_x2 = dst_x + w; + + + I915_DBG(i915, + "%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_offset, src_x, src_y, + dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); + + src_pitch *= (short) cpp; + dst_pitch *= (short) cpp; + + switch (cpp) { + case 1: + case 2: + case 3: + BR13 = (((int) dst_pitch) & 0xffff) | + (0xCC << 16) | (1 << 24); + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + BR13 = + (((int) dst_pitch) & 0xffff) | + (0xCC << 16) | (1 << 24) | (1 << 25); + CMD = + (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB); + break; + default: + return; + } + + if (dst_y2 < dst_y || + dst_x2 < dst_x) { + return; + } + + /* Hardware can handle negative pitches but loses the ability to do + * proper overlapping blits in that case. We don't really have a + * need for either at this stage. + */ + assert (dst_pitch > 0 && src_pitch > 0); + + + if (!BEGIN_BATCH(8, 2)) { + FLUSH_BATCH(NULL); + assert(BEGIN_BATCH(8, 2)); + } + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((dst_y << 16) | dst_x); + OUT_BATCH((dst_y2 << 16) | dst_x2); + OUT_RELOC(dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset); + OUT_BATCH((src_y << 16) | src_x); + OUT_BATCH(((int) src_pitch & 0xffff)); + OUT_RELOC(src_buffer, I915_BUFFER_ACCESS_READ, src_offset); +} + + diff --git a/src/gallium/drivers/i915simple/i915_blit.h b/src/gallium/drivers/i915simple/i915_blit.h new file mode 100644 index 0000000000..6e5b44e124 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_blit.h @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_BLIT_H +#define I915_BLIT_H + +#include "i915_context.h" + +extern void i915_copy_blit(struct i915_context *i915, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short srcx, short srcy, + short dstx, short dsty, + short w, short h ); + +extern void i915_fill_blit(struct i915_context *i915, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + short x, short y, + short w, short h, unsigned color); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_clear.c b/src/gallium/drivers/i915simple/i915_clear.c new file mode 100644 index 0000000000..8a2d3ca43f --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_clear.c @@ -0,0 +1,48 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "i915_context.h" +#include "i915_state.h" + + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + */ +void +i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); + ps->status = PIPE_SURFACE_STATUS_DEFINED; +} diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c new file mode 100644 index 0000000000..6dd3eda85d --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -0,0 +1,193 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_state.h" +#include "i915_batch.h" +#include "i915_texture.h" +#include "i915_reg.h" + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "pipe/p_screen.h" + + +static void i915_destroy( struct pipe_context *pipe ) +{ + struct i915_context *i915 = i915_context( pipe ); + + draw_destroy( i915->draw ); + + if(i915->winsys->destroy) + i915->winsys->destroy(i915->winsys); + + FREE( i915 ); +} + + +static boolean +i915_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned prim, unsigned start, unsigned count) +{ + struct i915_context *i915 = i915_context( pipe ); + struct draw_context *draw = i915->draw; + unsigned i; + + if (i915->dirty) + i915_update_derived( i915 ); + + /* + * Map vertex buffers + */ + for (i = 0; i < i915->num_vertex_buffers; i++) { + void *buf + = pipe_buffer_map(pipe->screen, + i915->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(draw, indexSize, + min_index, + max_index, + mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + + draw_set_mapped_constant_buffer(draw, + i915->current.constants[PIPE_SHADER_VERTEX], + ( i915->current.num_user_constants[PIPE_SHADER_VERTEX] * + 4 * sizeof(float) )); + + /* draw! */ + draw_arrays(i915->draw, prim, start, count); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < i915->num_vertex_buffers; i++) { + pipe_buffer_unmap(pipe->screen, i915->vertex_buffer[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + if (indexBuffer) { + pipe_buffer_unmap(pipe->screen, indexBuffer); + draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); + } + + return TRUE; +} + +static boolean +i915_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + return i915_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + prim, start, count ); +} + +static boolean i915_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return i915_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + +struct pipe_context *i915_create_context( struct pipe_screen *screen, + struct pipe_winsys *pipe_winsys, + struct i915_winsys *i915_winsys ) +{ + struct i915_context *i915; + + i915 = CALLOC_STRUCT(i915_context); + if (i915 == NULL) + return NULL; + + i915->winsys = i915_winsys; + i915->pipe.winsys = pipe_winsys; + i915->pipe.screen = screen; + + i915->pipe.destroy = i915_destroy; + + i915->pipe.clear = i915_clear; + + + i915->pipe.draw_arrays = i915_draw_arrays; + i915->pipe.draw_elements = i915_draw_elements; + i915->pipe.draw_range_elements = i915_draw_range_elements; + + /* + * Create drawing context and plug our rendering stage into it. + */ + i915->draw = draw_create(); + assert(i915->draw); + if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) { + draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); + } + else { + draw_set_rasterize_stage(i915->draw, i915_draw_render_stage(i915)); + } + + i915_init_surface_functions(i915); + i915_init_state_functions(i915); + i915_init_flush_functions(i915); + i915_init_texture_functions(i915); + + draw_install_aaline_stage(i915->draw, &i915->pipe); + draw_install_aapoint_stage(i915->draw, &i915->pipe); + + i915->dirty = ~0; + i915->hardware_dirty = ~0; + + /* Batch stream debugging is a bit hacked up at the moment: + */ + i915->batch = i915_winsys->batch_get(i915_winsys); + i915->batch->winsys = i915_winsys; + + return &i915->pipe; +} + diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h new file mode 100644 index 0000000000..3cdabe45f9 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -0,0 +1,345 @@ + /************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_CONTEXT_H +#define I915_CONTEXT_H + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "draw/draw_vertex.h" + +#include "tgsi/tgsi_scan.h" + + +#define I915_TEX_UNITS 8 + +#define I915_DYNAMIC_MODES4 0 +#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ +#define I915_DYNAMIC_DEPTHSCALE_1 2 +#define I915_DYNAMIC_IAB 3 +#define I915_DYNAMIC_BC_0 4 /* just the header */ +#define I915_DYNAMIC_BC_1 5 +#define I915_DYNAMIC_BFO_0 6 +#define I915_DYNAMIC_BFO_1 7 +#define I915_DYNAMIC_STP_0 8 +#define I915_DYNAMIC_STP_1 9 +#define I915_DYNAMIC_SC_ENA_0 10 +#define I915_DYNAMIC_SC_RECT_0 11 +#define I915_DYNAMIC_SC_RECT_1 12 +#define I915_DYNAMIC_SC_RECT_2 13 +#define I915_MAX_DYNAMIC 14 + + +#define I915_IMMEDIATE_S0 0 +#define I915_IMMEDIATE_S1 1 +#define I915_IMMEDIATE_S2 2 +#define I915_IMMEDIATE_S3 3 +#define I915_IMMEDIATE_S4 4 +#define I915_IMMEDIATE_S5 5 +#define I915_IMMEDIATE_S6 6 +#define I915_IMMEDIATE_S7 7 +#define I915_MAX_IMMEDIATE 8 + +/* These must mach the order of LI0_STATE_* bits, as they will be used + * to generate hardware packets: + */ +#define I915_CACHE_STATIC 0 +#define I915_CACHE_DYNAMIC 1 /* handled specially */ +#define I915_CACHE_SAMPLER 2 +#define I915_CACHE_MAP 3 +#define I915_CACHE_PROGRAM 4 +#define I915_CACHE_CONSTANTS 5 +#define I915_MAX_CACHE 6 + +#define I915_MAX_CONSTANT 32 + + +/** See constant_flags[] below */ +#define I915_CONSTFLAG_USER 0x1f + + +/** + * Subclass of pipe_shader_state + */ +struct i915_fragment_shader +{ + struct pipe_shader_state state; + + struct tgsi_shader_info info; + + uint *program; + uint program_len; + + /** + * constants introduced during translation. + * These are placed at the end of the constant buffer and grow toward + * the beginning (eg: slot 31, 30 29, ...) + * User-provided constants start at 0. + * This allows both types of constants to co-exist (until there's too many) + * and doesn't require regenerating/changing the fragment program to + * shuffle constants around. + */ + uint num_constants; + float constants[I915_MAX_CONSTANT][4]; + + /** + * Status of each constant + * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding + * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) + * Else, the bitmask indicates which components are occupied by immediates. + */ + ubyte constant_flags[I915_MAX_CONSTANT]; +}; + + +struct i915_cache_context; + +/* Use to calculate differences between state emitted to hardware and + * current driver-calculated state. + */ +struct i915_state +{ + unsigned immediate[I915_MAX_IMMEDIATE]; + unsigned dynamic[I915_MAX_DYNAMIC]; + + float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; + /** number of constants passed in through a constant buffer */ + uint num_user_constants[PIPE_SHADER_TYPES]; + + /* texture sampler state */ + unsigned sampler[I915_TEX_UNITS][3]; + unsigned sampler_enable_flags; + unsigned sampler_enable_nr; + + /* texture image buffers */ + unsigned texbuffer[I915_TEX_UNITS][2]; + + /** Describes the current hardware vertex layout */ + struct vertex_info vertex_info; + + unsigned id; /* track lost context events */ +}; + +struct i915_blend_state { + unsigned iab; + unsigned modes4; + unsigned LIS5; + unsigned LIS6; +}; + +struct i915_depth_stencil_state { + unsigned stencil_modes4; + unsigned bfo[2]; + unsigned stencil_LIS5; + unsigned depth_LIS6; +}; + +struct i915_rasterizer_state { + int light_twoside : 1; + unsigned st; + enum interp_mode color_interp; + + unsigned LIS4; + unsigned LIS7; + unsigned sc[1]; + + const struct pipe_rasterizer_state *templ; + + union { float f; unsigned u; } ds[2]; +}; + +struct i915_sampler_state { + unsigned state[3]; + const struct pipe_sampler_state *templ; + unsigned minlod; + unsigned maxlod; +}; + + +struct i915_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned stride; + unsigned depth_stride; /* per-image on i945? */ + unsigned total_nblocksy; + + unsigned tiled; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +struct i915_batchbuffer; + +struct i915_context +{ + struct pipe_context pipe; + struct i915_winsys *winsys; + struct draw_context *draw; + + /* The most recent drawing state as set by the driver: + */ + const struct i915_blend_state *blend; + const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct i915_depth_stencil_state *depth_stencil; + const struct i915_rasterizer_state *rasterizer; + + struct i915_fragment_shader *fs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct i915_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + + unsigned dirty; + + unsigned num_samplers; + unsigned num_textures; + unsigned num_vertex_elements; + unsigned num_vertex_buffers; + + struct i915_batchbuffer *batch; + + /** Vertex buffer */ + struct pipe_buffer *vbo; + size_t vbo_offset; + unsigned vbo_flushed; + + struct i915_state current; + unsigned hardware_dirty; + + unsigned debug; +}; + +/* A flag for each state_tracker state object: + */ +#define I915_NEW_VIEWPORT 0x1 +#define I915_NEW_RASTERIZER 0x2 +#define I915_NEW_FS 0x4 +#define I915_NEW_BLEND 0x8 +#define I915_NEW_CLIP 0x10 +#define I915_NEW_SCISSOR 0x20 +#define I915_NEW_STIPPLE 0x40 +#define I915_NEW_FRAMEBUFFER 0x80 +#define I915_NEW_ALPHA_TEST 0x100 +#define I915_NEW_DEPTH_STENCIL 0x200 +#define I915_NEW_SAMPLER 0x400 +#define I915_NEW_TEXTURE 0x800 +#define I915_NEW_CONSTANTS 0x1000 +#define I915_NEW_VBO 0x2000 +#define I915_NEW_VS 0x4000 + + +/* Driver's internally generated state flags: + */ +#define I915_NEW_VERTEX_FORMAT 0x10000 + + +/* Dirty flags for hardware emit + */ +#define I915_HW_STATIC (1<<I915_CACHE_STATIC) +#define I915_HW_DYNAMIC (1<<I915_CACHE_DYNAMIC) +#define I915_HW_SAMPLER (1<<I915_CACHE_SAMPLER) +#define I915_HW_MAP (1<<I915_CACHE_MAP) +#define I915_HW_PROGRAM (1<<I915_CACHE_PROGRAM) +#define I915_HW_CONSTANTS (1<<I915_CACHE_CONSTANTS) +#define I915_HW_IMMEDIATE (1<<(I915_MAX_CACHE+0)) +#define I915_HW_INVARIENT (1<<(I915_MAX_CACHE+1)) + + +/*********************************************************************** + * i915_prim_emit.c: + */ +struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ); + + +/*********************************************************************** + * i915_prim_vbuf.c: + */ +struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ); + + +/*********************************************************************** + * i915_state_emit.c: + */ +void i915_emit_hardware_state(struct i915_context *i915 ); + + + +/*********************************************************************** + * i915_clear.c: + */ +void i915_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + +/*********************************************************************** + * i915_surface.c: + */ +void i915_init_surface_functions( struct i915_context *i915 ); + +void i915_init_state_functions( struct i915_context *i915 ); +void i915_init_flush_functions( struct i915_context *i915 ); +void i915_init_string_functions( struct i915_context *i915 ); + + + + +/*********************************************************************** + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static INLINE struct i915_context * +i915_context( struct pipe_context *pipe ) +{ + return (struct i915_context *)pipe; +} + + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c new file mode 100644 index 0000000000..5e26d1b905 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_debug.c @@ -0,0 +1,895 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_debug.h" + + +static void +PRINTF( + struct debug_stream *stream, + const char *fmt, + ... ) +{ + va_list args; + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); +} + + +static boolean debug( struct debug_stream *stream, const char *name, unsigned len ) +{ + unsigned i; + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + + if (len == 0) { + PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); + assert(0); + return FALSE; + } + + if (stream->print_addresses) + PRINTF(stream, "%08x: ", stream->offset); + + + PRINTF(stream, "%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + +static const char *get_prim_name( unsigned val ) +{ + switch (val & PRIM3D_MASK) { + case PRIM3D_TRILIST: return "TRILIST"; break; + case PRIM3D_TRISTRIP: return "TRISTRIP"; break; + case PRIM3D_TRISTRIP_RVRSE: return "TRISTRIP_RVRSE"; break; + case PRIM3D_TRIFAN: return "TRIFAN"; break; + case PRIM3D_POLY: return "POLY"; break; + case PRIM3D_LINELIST: return "LINELIST"; break; + case PRIM3D_LINESTRIP: return "LINESTRIP"; break; + case PRIM3D_RECTLIST: return "RECTLIST"; break; + case PRIM3D_POINTLIST: return "POINTLIST"; break; + case PRIM3D_DIB: return "DIB"; break; + case PRIM3D_CLEAR_RECT: return "CLEAR_RECT"; break; + case PRIM3D_ZONE_INIT: return "ZONE_INIT"; break; + default: return "????"; break; + } +} + +static boolean debug_prim( struct debug_stream *stream, const char *name, + boolean dump_floats, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + unsigned i; + + + + PRINTF(stream, "%s %s (%d dwords):\n", name, prim, len); + PRINTF(stream, "\t0x%08x\n", ptr[0]); + for (i = 1; i < len; i++) { + if (dump_floats) + PRINTF(stream, "\t0x%08x // %f\n", ptr[i], *(float *)&ptr[i]); + else + PRINTF(stream, "\t0x%08x\n", ptr[i]); + } + + + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + + + +static boolean debug_program( struct debug_stream *stream, const char *name, unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + + if (len == 0) { + PRINTF(stream, "Error - zero length packet (0x%08x)\n", stream->ptr[0]); + assert(0); + return FALSE; + } + + if (stream->print_addresses) + PRINTF(stream, "%08x: ", stream->offset); + + PRINTF(stream, "%s (%d dwords):\n", name, len); + i915_disassemble_program( stream, ptr, len ); + + stream->offset += len * sizeof(unsigned); + return TRUE; +} + + +static boolean debug_chain( struct debug_stream *stream, const char *name, unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned old_offset = stream->offset + len * sizeof(unsigned); + unsigned i; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + + stream->offset = ptr[1] & ~0x3; + + if (stream->offset < old_offset) + PRINTF(stream, "\n... skipping backwards from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + else + PRINTF(stream, "\n... skipping from 0x%x --> 0x%x ...\n\n", + old_offset, stream->offset ); + + + return TRUE; +} + + +static boolean debug_variable_length_prim( struct debug_stream *stream ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + const char *prim = get_prim_name( ptr[0] ); + unsigned i, len; + + ushort *idx = (ushort *)(ptr+1); + for (i = 0; idx[i] != 0xffff; i++) + ; + + len = 1+(i+2)/2; + + PRINTF(stream, "3DPRIM, %s variable length %d indicies (%d dwords):\n", prim, i, len); + for (i = 0; i < len; i++) + PRINTF(stream, "\t0x%08x\n", ptr[i]); + PRINTF(stream, "\n"); + + stream->offset += len * sizeof(unsigned); + return TRUE; +} + + +static void +BITS( + struct debug_stream *stream, + unsigned dw, + unsigned hi, + unsigned lo, + const char *fmt, + ... ) +{ + va_list args; + unsigned himask = ~0UL >> (31 - (hi)); + + PRINTF(stream, "\t\t "); + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + + PRINTF(stream, ": 0x%x\n", ((dw) & himask) >> (lo)); +} + +#define MBZ( dw, hi, lo) do { \ + unsigned x = (dw) >> (lo); \ + unsigned lomask = (1 << (lo)) - 1; \ + unsigned himask; \ + himask = (1UL << (hi)) - 1; \ + assert ((x & himask & ~lomask) == 0); \ +} while (0) + +static void +FLAG( + struct debug_stream *stream, + unsigned dw, + unsigned bit, + const char *fmt, + ... ) +{ + if (((dw) >> (bit)) & 1) { + va_list args; + + PRINTF(stream, "\t\t "); + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + + PRINTF(stream, "\n"); + } +} + +static boolean debug_load_immediate( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned bits = (ptr[0] >> 4) & 0xff; + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords, flags: %x):\n", name, len, bits); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + if (bits & (1<<0)) { + PRINTF(stream, "\t LIS0: 0x%08x\n", ptr[j]); + PRINTF(stream, "\t vb address: 0x%08x\n", (ptr[j] & ~0x3)); + BITS(stream, ptr[j], 0, 0, "vb invalidate disable"); + j++; + } + if (bits & (1<<1)) { + PRINTF(stream, "\t LIS1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 29, 24, "vb dword width"); + BITS(stream, ptr[j], 21, 16, "vb dword pitch"); + BITS(stream, ptr[j], 15, 0, "vb max index"); + j++; + } + if (bits & (1<<2)) { + int i; + PRINTF(stream, "\t LIS2: 0x%08x\n", ptr[j]); + for (i = 0; i < 8; i++) { + unsigned tc = (ptr[j] >> (i * 4)) & 0xf; + if (tc != 0xf) + BITS(stream, tc, 3, 0, "tex coord %d", i); + } + j++; + } + if (bits & (1<<3)) { + PRINTF(stream, "\t LIS3: 0x%08x\n", ptr[j]); + j++; + } + if (bits & (1<<4)) { + PRINTF(stream, "\t LIS4: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 23, "point width"); + BITS(stream, ptr[j], 22, 19, "line width"); + FLAG(stream, ptr[j], 18, "alpha flatshade"); + FLAG(stream, ptr[j], 17, "fog flatshade"); + FLAG(stream, ptr[j], 16, "spec flatshade"); + FLAG(stream, ptr[j], 15, "rgb flatshade"); + BITS(stream, ptr[j], 14, 13, "cull mode"); + FLAG(stream, ptr[j], 12, "vfmt: point width"); + FLAG(stream, ptr[j], 11, "vfmt: specular/fog"); + FLAG(stream, ptr[j], 10, "vfmt: rgba"); + FLAG(stream, ptr[j], 9, "vfmt: depth offset"); + BITS(stream, ptr[j], 8, 6, "vfmt: position (2==xyzw)"); + FLAG(stream, ptr[j], 5, "force dflt diffuse"); + FLAG(stream, ptr[j], 4, "force dflt specular"); + FLAG(stream, ptr[j], 3, "local depth offset enable"); + FLAG(stream, ptr[j], 2, "vfmt: fp32 fog coord"); + FLAG(stream, ptr[j], 1, "sprite point"); + FLAG(stream, ptr[j], 0, "antialiasing"); + j++; + } + if (bits & (1<<5)) { + PRINTF(stream, "\t LIS5: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 28, "rgba write disables"); + FLAG(stream, ptr[j], 27, "force dflt point width"); + FLAG(stream, ptr[j], 26, "last pixel enable"); + FLAG(stream, ptr[j], 25, "global z offset enable"); + FLAG(stream, ptr[j], 24, "fog enable"); + BITS(stream, ptr[j], 23, 16, "stencil ref"); + BITS(stream, ptr[j], 15, 13, "stencil test"); + BITS(stream, ptr[j], 12, 10, "stencil fail op"); + BITS(stream, ptr[j], 9, 7, "stencil pass z fail op"); + BITS(stream, ptr[j], 6, 4, "stencil pass z pass op"); + FLAG(stream, ptr[j], 3, "stencil write enable"); + FLAG(stream, ptr[j], 2, "stencil test enable"); + FLAG(stream, ptr[j], 1, "color dither enable"); + FLAG(stream, ptr[j], 0, "logiop enable"); + j++; + } + if (bits & (1<<6)) { + PRINTF(stream, "\t LIS6: 0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "alpha test enable"); + BITS(stream, ptr[j], 30, 28, "alpha func"); + BITS(stream, ptr[j], 27, 20, "alpha ref"); + FLAG(stream, ptr[j], 19, "depth test enable"); + BITS(stream, ptr[j], 18, 16, "depth func"); + FLAG(stream, ptr[j], 15, "blend enable"); + BITS(stream, ptr[j], 14, 12, "blend func"); + BITS(stream, ptr[j], 11, 8, "blend src factor"); + BITS(stream, ptr[j], 7, 4, "blend dst factor"); + FLAG(stream, ptr[j], 3, "depth write enable"); + FLAG(stream, ptr[j], 2, "color write enable"); + BITS(stream, ptr[j], 1, 0, "provoking vertex"); + j++; + } + + + PRINTF(stream, "\n"); + + assert(j == len); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + + + +static boolean debug_load_indirect( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned bits = (ptr[0] >> 8) & 0x3f; + unsigned i, j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + for (i = 0; i < 6; i++) { + if (bits & (1<<i)) { + switch (1<<(8+i)) { + case LI0_STATE_STATIC_INDIRECT: + PRINTF(stream, " STATIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_DYNAMIC_INDIRECT: + PRINTF(stream, " DYNAMIC: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + break; + case LI0_STATE_SAMPLER: + PRINTF(stream, " SAMPLER: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_MAP: + PRINTF(stream, " MAP: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_PROGRAM: + PRINTF(stream, " PROGRAM: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + case LI0_STATE_CONSTANTS: + PRINTF(stream, " CONSTANTS: 0x%08x | %x\n", ptr[j]&~3, ptr[j]&3); j++; + PRINTF(stream, " 0x%08x\n", ptr[j++]); + break; + default: + assert(0); + break; + } + } + } + + if (bits == 0) { + PRINTF(stream, "\t DUMMY: 0x%08x\n", ptr[j++]); + } + + PRINTF(stream, "\n"); + + + assert(j == len); + + stream->offset += len * sizeof(unsigned); + + return TRUE; +} + +static void BR13( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + FLAG(stream, val, 30, "clipping enable"); + BITS(stream, val, 25, 24, "color depth (3==32bpp)"); + BITS(stream, val, 23, 16, "raster op"); + BITS(stream, val, 15, 0, "dest pitch"); +} + + +static void BR22( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "dest y1"); + BITS(stream, val, 15, 0, "dest x1"); +} + +static void BR23( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "dest y2"); + BITS(stream, val, 15, 0, "dest x2"); +} + +static void BR09( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- dest address\n", val); +} + +static void BR26( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 31, 16, "src y1"); + BITS(stream, val, 15, 0, "src x1"); +} + +static void BR11( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x\n", val); + BITS(stream, val, 15, 0, "src pitch"); +} + +static void BR12( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- src address\n", val); +} + +static void BR16( struct debug_stream *stream, + unsigned val ) +{ + PRINTF(stream, "\t0x%08x -- color\n", val); +} + +static boolean debug_copy_blit( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR22(stream, ptr[j++]); + BR23(stream, ptr[j++]); + BR09(stream, ptr[j++]); + BR26(stream, ptr[j++]); + BR11(stream, ptr[j++]); + BR12(stream, ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_color_blit( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + BR13(stream, ptr[j++]); + BR22(stream, ptr[j++]); + BR23(stream, ptr[j++]); + BR09(stream, ptr[j++]); + BR16(stream, ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_modes4( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 21, 18, "logicop func"); + FLAG(stream, ptr[j], 17, "stencil test mask modify-enable"); + FLAG(stream, ptr[j], 16, "stencil write mask modify-enable"); + BITS(stream, ptr[j], 15, 8, "stencil test mask"); + BITS(stream, ptr[j], 7, 0, "stencil write mask"); + j++; + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_map_state( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 15, 0, "map mask"); + j++; + } + + while (j < len) { + { + PRINTF(stream, "\t TMn.0: 0x%08x\n", ptr[j]); + PRINTF(stream, "\t map address: 0x%08x\n", (ptr[j] & ~0x3)); + FLAG(stream, ptr[j], 1, "vertical line stride"); + FLAG(stream, ptr[j], 0, "vertical line stride offset"); + j++; + } + + { + PRINTF(stream, "\t TMn.1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 21, "height"); + BITS(stream, ptr[j], 20, 10, "width"); + BITS(stream, ptr[j], 9, 7, "surface format"); + BITS(stream, ptr[j], 6, 3, "texel format"); + FLAG(stream, ptr[j], 2, "use fence regs"); + FLAG(stream, ptr[j], 1, "tiled surface"); + FLAG(stream, ptr[j], 0, "tile walk ymajor"); + j++; + } + { + PRINTF(stream, "\t TMn.2: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 21, "dword pitch"); + BITS(stream, ptr[j], 20, 15, "cube face enables"); + BITS(stream, ptr[j], 14, 9, "max lod"); + FLAG(stream, ptr[j], 8, "mip layout right"); + BITS(stream, ptr[j], 7, 0, "depth"); + j++; + } + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_sampler_state( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 15, 0, "sampler mask"); + j++; + } + + while (j < len) { + { + PRINTF(stream, "\t TSn.0: 0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "reverse gamma"); + FLAG(stream, ptr[j], 30, "planar to packed"); + FLAG(stream, ptr[j], 29, "yuv->rgb"); + BITS(stream, ptr[j], 28, 27, "chromakey index"); + BITS(stream, ptr[j], 26, 22, "base mip level"); + BITS(stream, ptr[j], 21, 20, "mip mode filter"); + BITS(stream, ptr[j], 19, 17, "mag mode filter"); + BITS(stream, ptr[j], 16, 14, "min mode filter"); + BITS(stream, ptr[j], 13, 5, "lod bias (s4.4)"); + FLAG(stream, ptr[j], 4, "shadow enable"); + FLAG(stream, ptr[j], 3, "max-aniso-4"); + BITS(stream, ptr[j], 2, 0, "shadow func"); + j++; + } + + { + PRINTF(stream, "\t TSn.1: 0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 31, 24, "min lod"); + MBZ( ptr[j], 23, 18 ); + FLAG(stream, ptr[j], 17, "kill pixel enable"); + FLAG(stream, ptr[j], 16, "keyed tex filter mode"); + FLAG(stream, ptr[j], 15, "chromakey enable"); + BITS(stream, ptr[j], 14, 12, "tcx wrap mode"); + BITS(stream, ptr[j], 11, 9, "tcy wrap mode"); + BITS(stream, ptr[j], 8, 6, "tcz wrap mode"); + FLAG(stream, ptr[j], 5, "normalized coords"); + BITS(stream, ptr[j], 4, 1, "map (surface) index"); + FLAG(stream, ptr[j], 0, "EAST deinterlacer enable"); + j++; + } + { + PRINTF(stream, "\t TSn.2: 0x%08x (default color)\n", ptr[j]); + j++; + } + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_dest_vars( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + FLAG(stream, ptr[j], 31, "early classic ztest"); + FLAG(stream, ptr[j], 30, "opengl tex default color"); + FLAG(stream, ptr[j], 29, "bypass iz"); + FLAG(stream, ptr[j], 28, "lod preclamp"); + BITS(stream, ptr[j], 27, 26, "dither pattern"); + FLAG(stream, ptr[j], 25, "linear gamma blend"); + FLAG(stream, ptr[j], 24, "debug dither"); + BITS(stream, ptr[j], 23, 20, "dstorg x"); + BITS(stream, ptr[j], 19, 16, "dstorg y"); + MBZ (ptr[j], 15, 15 ); + BITS(stream, ptr[j], 14, 12, "422 write select"); + BITS(stream, ptr[j], 11, 8, "cbuf format"); + BITS(stream, ptr[j], 3, 2, "zbuf format"); + FLAG(stream, ptr[j], 1, "vert line stride"); + FLAG(stream, ptr[j], 1, "vert line stride offset"); + j++; + } + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean debug_buf_info( struct debug_stream *stream, + const char *name, + unsigned len ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + int j = 0; + + PRINTF(stream, "%s (%d dwords):\n", name, len); + PRINTF(stream, "\t0x%08x\n", ptr[j++]); + + { + PRINTF(stream, "\t0x%08x\n", ptr[j]); + BITS(stream, ptr[j], 28, 28, "aux buffer id"); + BITS(stream, ptr[j], 27, 24, "buffer id (7=depth, 3=back)"); + FLAG(stream, ptr[j], 23, "use fence regs"); + FLAG(stream, ptr[j], 22, "tiled surface"); + FLAG(stream, ptr[j], 21, "tile walk ymajor"); + MBZ (ptr[j], 20, 14); + BITS(stream, ptr[j], 13, 2, "dword pitch"); + MBZ (ptr[j], 2, 0); + j++; + } + + PRINTF(stream, "\t0x%08x -- buffer base address\n", ptr[j++]); + + stream->offset += len * sizeof(unsigned); + assert(j == len); + return TRUE; +} + +static boolean i915_debug_packet( struct debug_stream *stream ) +{ + unsigned *ptr = (unsigned *)(stream->ptr + stream->offset); + unsigned cmd = *ptr; + + switch (((cmd >> 29) & 0x7)) { + case 0x0: + switch ((cmd >> 23) & 0x3f) { + case 0x0: + return debug(stream, "MI_NOOP", 1); + case 0x3: + return debug(stream, "MI_WAIT_FOR_EVENT", 1); + case 0x4: + return debug(stream, "MI_FLUSH", 1); + case 0xA: + debug(stream, "MI_BATCH_BUFFER_END", 1); + return FALSE; + case 0x22: + return debug(stream, "MI_LOAD_REGISTER_IMM", 3); + case 0x31: + return debug_chain(stream, "MI_BATCH_BUFFER_START", 2); + default: + (void)debug(stream, "UNKNOWN 0x0 case!", 1); + assert(0); + break; + } + break; + case 0x1: + (void) debug(stream, "UNKNOWN 0x1 case!", 1); + assert(0); + break; + case 0x2: + switch ((cmd >> 22) & 0xff) { + case 0x50: + return debug_color_blit(stream, "XY_COLOR_BLT", (cmd & 0xff) + 2); + case 0x53: + return debug_copy_blit(stream, "XY_SRC_COPY_BLT", (cmd & 0xff) + 2); + default: + return debug(stream, "blit command", (cmd & 0xff) + 2); + } + break; + case 0x3: + switch ((cmd >> 24) & 0x1f) { + case 0x6: + return debug(stream, "3DSTATE_ANTI_ALIASING", 1); + case 0x7: + return debug(stream, "3DSTATE_RASTERIZATION_RULES", 1); + case 0x8: + return debug(stream, "3DSTATE_BACKFACE_STENCIL_OPS", 2); + case 0x9: + return debug(stream, "3DSTATE_BACKFACE_STENCIL_MASKS", 1); + case 0xb: + return debug(stream, "3DSTATE_INDEPENDENT_ALPHA_BLEND", 1); + case 0xc: + return debug(stream, "3DSTATE_MODES5", 1); + case 0xd: + return debug_modes4(stream, "3DSTATE_MODES4", 1); + case 0x15: + return debug(stream, "3DSTATE_FOG_COLOR", 1); + case 0x16: + return debug(stream, "3DSTATE_COORD_SET_BINDINGS", 1); + case 0x1c: + /* 3DState16NP */ + switch((cmd >> 19) & 0x1f) { + case 0x10: + return debug(stream, "3DSTATE_SCISSOR_ENABLE", 1); + case 0x11: + return debug(stream, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE", 1); + default: + (void) debug(stream, "UNKNOWN 0x1c case!", 1); + assert(0); + break; + } + break; + case 0x1d: + /* 3DStateMW */ + switch ((cmd >> 16) & 0xff) { + case 0x0: + return debug_map_state(stream, "3DSTATE_MAP_STATE", (cmd & 0x1f) + 2); + case 0x1: + return debug_sampler_state(stream, "3DSTATE_SAMPLER_STATE", (cmd & 0x1f) + 2); + case 0x4: + return debug_load_immediate(stream, "3DSTATE_LOAD_STATE_IMMEDIATE", (cmd & 0xf) + 2); + case 0x5: + return debug_program(stream, "3DSTATE_PIXEL_SHADER_PROGRAM", (cmd & 0x1ff) + 2); + case 0x6: + return debug(stream, "3DSTATE_PIXEL_SHADER_CONSTANTS", (cmd & 0xff) + 2); + case 0x7: + return debug_load_indirect(stream, "3DSTATE_LOAD_INDIRECT", (cmd & 0xff) + 2); + case 0x80: + return debug(stream, "3DSTATE_DRAWING_RECTANGLE", (cmd & 0xffff) + 2); + case 0x81: + return debug(stream, "3DSTATE_SCISSOR_RECTANGLE", (cmd & 0xffff) + 2); + case 0x83: + return debug(stream, "3DSTATE_SPAN_STIPPLE", (cmd & 0xffff) + 2); + case 0x85: + return debug_dest_vars(stream, "3DSTATE_DEST_BUFFER_VARS", (cmd & 0xffff) + 2); + case 0x88: + return debug(stream, "3DSTATE_CONSTANT_BLEND_COLOR", (cmd & 0xffff) + 2); + case 0x89: + return debug(stream, "3DSTATE_FOG_MODE", (cmd & 0xffff) + 2); + case 0x8e: + return debug_buf_info(stream, "3DSTATE_BUFFER_INFO", (cmd & 0xffff) + 2); + case 0x97: + return debug(stream, "3DSTATE_DEPTH_OFFSET_SCALE", (cmd & 0xffff) + 2); + case 0x98: + return debug(stream, "3DSTATE_DEFAULT_Z", (cmd & 0xffff) + 2); + case 0x99: + return debug(stream, "3DSTATE_DEFAULT_DIFFUSE", (cmd & 0xffff) + 2); + case 0x9a: + return debug(stream, "3DSTATE_DEFAULT_SPECULAR", (cmd & 0xffff) + 2); + case 0x9c: + return debug(stream, "3DSTATE_CLEAR_PARAMETERS", (cmd & 0xffff) + 2); + default: + assert(0); + return 0; + } + break; + case 0x1e: + if (cmd & (1 << 23)) + return debug(stream, "???", (cmd & 0xffff) + 1); + else + return debug(stream, "", 1); + break; + case 0x1f: + if ((cmd & (1 << 23)) == 0) + return debug_prim(stream, "3DPRIM (inline)", 1, (cmd & 0x1ffff) + 2); + else if (cmd & (1 << 17)) + { + if ((cmd & 0xffff) == 0) + return debug_variable_length_prim(stream); + else + return debug_prim(stream, "3DPRIM (indexed)", 0, (((cmd & 0xffff) + 1) / 2) + 1); + } + else + return debug_prim(stream, "3DPRIM (indirect sequential)", 0, 2); + break; + default: + return debug(stream, "", 0); + } + default: + assert(0); + return 0; + } + + assert(0); + return 0; +} + + + +void +i915_dump_batchbuffer( struct i915_context *i915 ) +{ + struct debug_stream stream; + /* TODO fix me */ + unsigned *start = 0;/*i915->batch_start;*/ + unsigned *end = 0;/*i915->winsys->batch_start( i915->winsys, 0, 0 );*/ + unsigned long bytes = (unsigned long) (end - start) * 4; + boolean done = FALSE; + + stream.offset = 0; + stream.ptr = (char *)start; + stream.print_addresses = 0; + stream.winsys = i915->pipe.winsys; + + if (!start || !end) { + debug_printf( "\n\nBATCH: ???\n"); + return; + } + + debug_printf( "\n\nBATCH: (%d)\n", bytes / 4); + + while (!done && + stream.offset < bytes) + { + if (!i915_debug_packet( &stream )) + break; + + assert(stream.offset <= bytes && + stream.offset >= 0); + } + + debug_printf( "END-BATCH\n\n\n"); +} + + diff --git a/src/gallium/drivers/i915simple/i915_debug.h b/src/gallium/drivers/i915simple/i915_debug.h new file mode 100644 index 0000000000..afb63edabf --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_debug.h @@ -0,0 +1,115 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef I915_DEBUG_H +#define I915_DEBUG_H + +#include <stdarg.h> + +struct i915_context; + +struct debug_stream +{ + unsigned offset; /* current gtt offset */ + char *ptr; /* pointer to gtt offset zero */ + char *end; /* pointer to gtt offset zero */ + unsigned print_addresses; + struct pipe_winsys *winsys; +}; + + +/* Internal functions + */ +void i915_disassemble_program(struct debug_stream *stream, + const unsigned *program, unsigned sz); + +void i915_print_ureg(const char *msg, unsigned ureg); + + +#define DEBUG_BATCH 0x1 +#define DEBUG_BLIT 0x2 +#define DEBUG_BUFFER 0x4 +#define DEBUG_CONSTANTS 0x8 +#define DEBUG_CONTEXT 0x10 +#define DEBUG_DRAW 0x20 +#define DEBUG_DYNAMIC 0x40 +#define DEBUG_FLUSH 0x80 +#define DEBUG_MAP 0x100 +#define DEBUG_PROGRAM 0x200 +#define DEBUG_REGIONS 0x400 +#define DEBUG_SAMPLER 0x800 +#define DEBUG_STATIC 0x1000 +#define DEBUG_SURFACE 0x2000 +#define DEBUG_WINSYS 0x4000 + +#include "pipe/p_compiler.h" + +#if defined(DEBUG) && defined(FILE_DEBUG_FLAG) + +#include "pipe/p_winsys.h" + +static INLINE void +I915_DBG( + struct i915_context *i915, + const char *fmt, + ... ) +{ + if ((i915)->debug & FILE_DEBUG_FLAG) { + va_list args; + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); + } +} + +#else + +static INLINE void +I915_DBG( + struct i915_context *i915, + const char *fmt, + ... ) +{ + (void) i915; + (void) fmt; +} + +#endif + + +void i915_dump_batchbuffer( struct i915_context *i915 ); + + + +void i915_debug_init( struct i915_context *i915 ); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c new file mode 100644 index 0000000000..48be3e1472 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_debug_fp.c @@ -0,0 +1,363 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_reg.h" +#include "i915_debug.h" +#include "pipe/p_winsys.h" +#include "util/u_memory.h" + + +static void +PRINTF( + struct debug_stream *stream, + const char *fmt, + ... ) +{ + va_list args; + + va_start( args, fmt ); + debug_vprintf( fmt, args ); + va_end( args ); +} + + +static const char *opcodes[0x20] = { + "NOP", + "ADD", + "MOV", + "MUL", + "MAD", + "DP2ADD", + "DP3", + "DP4", + "FRC", + "RCP", + "RSQ", + "EXP", + "LOG", + "CMP", + "MIN", + "MAX", + "FLR", + "MOD", + "TRC", + "SGE", + "SLT", + "TEXLD", + "TEXLDP", + "TEXLDB", + "TEXKILL", + "DCL", + "0x1a", + "0x1b", + "0x1c", + "0x1d", + "0x1e", + "0x1f", +}; + + +static const int args[0x20] = { + 0, /* 0 nop */ + 2, /* 1 add */ + 1, /* 2 mov */ + 2, /* 3 m ul */ + 3, /* 4 mad */ + 3, /* 5 dp2add */ + 2, /* 6 dp3 */ + 2, /* 7 dp4 */ + 1, /* 8 frc */ + 1, /* 9 rcp */ + 1, /* a rsq */ + 1, /* b exp */ + 1, /* c log */ + 3, /* d cmp */ + 2, /* e min */ + 2, /* f max */ + 1, /* 10 flr */ + 1, /* 11 mod */ + 1, /* 12 trc */ + 2, /* 13 sge */ + 2, /* 14 slt */ + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +}; + + +static const char *regname[0x8] = { + "R", + "T", + "CONST", + "S", + "OC", + "OD", + "U", + "UNKNOWN", +}; + +static void +print_reg_type_nr(struct debug_stream *stream, unsigned type, unsigned nr) +{ + switch (type) { + case REG_TYPE_T: + switch (nr) { + case T_DIFFUSE: + PRINTF(stream, "T_DIFFUSE"); + return; + case T_SPECULAR: + PRINTF(stream, "T_SPECULAR"); + return; + case T_FOG_W: + PRINTF(stream, "T_FOG_W"); + return; + default: + PRINTF(stream, "T_TEX%d", nr); + return; + } + case REG_TYPE_OC: + if (nr == 0) { + PRINTF(stream, "oC"); + return; + } + break; + case REG_TYPE_OD: + if (nr == 0) { + PRINTF(stream, "oD"); + return; + } + break; + default: + break; + } + + PRINTF(stream, "%s[%d]", regname[type], nr); +} + +#define REG_SWIZZLE_MASK 0x7777 +#define REG_NEGATE_MASK 0x8888 + +#define REG_SWIZZLE_XYZW ((SRC_X << A2_SRC2_CHANNEL_X_SHIFT) | \ + (SRC_Y << A2_SRC2_CHANNEL_Y_SHIFT) | \ + (SRC_Z << A2_SRC2_CHANNEL_Z_SHIFT) | \ + (SRC_W << A2_SRC2_CHANNEL_W_SHIFT)) + + +static void +print_reg_neg_swizzle(struct debug_stream *stream, unsigned reg) +{ + int i; + + if ((reg & REG_SWIZZLE_MASK) == REG_SWIZZLE_XYZW && + (reg & REG_NEGATE_MASK) == 0) + return; + + PRINTF(stream, "."); + + for (i = 3; i >= 0; i--) { + if (reg & (1 << ((i * 4) + 3))) + PRINTF(stream, "-"); + + switch ((reg >> (i * 4)) & 0x7) { + case 0: + PRINTF(stream, "x"); + break; + case 1: + PRINTF(stream, "y"); + break; + case 2: + PRINTF(stream, "z"); + break; + case 3: + PRINTF(stream, "w"); + break; + case 4: + PRINTF(stream, "0"); + break; + case 5: + PRINTF(stream, "1"); + break; + default: + PRINTF(stream, "?"); + break; + } + } +} + + +static void +print_src_reg(struct debug_stream *stream, unsigned dword) +{ + unsigned nr = (dword >> A2_SRC2_NR_SHIFT) & REG_NR_MASK; + unsigned type = (dword >> A2_SRC2_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(stream, type, nr); + print_reg_neg_swizzle(stream, dword); +} + + +static void +print_dest_reg(struct debug_stream *stream, unsigned dword) +{ + unsigned nr = (dword >> A0_DEST_NR_SHIFT) & REG_NR_MASK; + unsigned type = (dword >> A0_DEST_TYPE_SHIFT) & REG_TYPE_MASK; + print_reg_type_nr(stream, type, nr); + if ((dword & A0_DEST_CHANNEL_ALL) == A0_DEST_CHANNEL_ALL) + return; + PRINTF(stream, "."); + if (dword & A0_DEST_CHANNEL_X) + PRINTF(stream, "x"); + if (dword & A0_DEST_CHANNEL_Y) + PRINTF(stream, "y"); + if (dword & A0_DEST_CHANNEL_Z) + PRINTF(stream, "z"); + if (dword & A0_DEST_CHANNEL_W) + PRINTF(stream, "w"); +} + + +#define GET_SRC0_REG(r0, r1) ((r0<<14)|(r1>>A1_SRC0_CHANNEL_W_SHIFT)) +#define GET_SRC1_REG(r0, r1) ((r0<<8)|(r1>>A2_SRC1_CHANNEL_W_SHIFT)) +#define GET_SRC2_REG(r) (r) + + +static void +print_arith_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + if (opcode != A0_NOP) { + print_dest_reg(stream, program[0]); + if (program[0] & A0_DEST_SATURATE) + PRINTF(stream, " = SATURATE "); + else + PRINTF(stream, " = "); + } + + PRINTF(stream, "%s ", opcodes[opcode]); + + print_src_reg(stream, GET_SRC0_REG(program[0], program[1])); + if (args[opcode] == 1) { + PRINTF(stream, "\n"); + return; + } + + PRINTF(stream, ", "); + print_src_reg(stream, GET_SRC1_REG(program[1], program[2])); + if (args[opcode] == 2) { + PRINTF(stream, "\n"); + return; + } + + PRINTF(stream, ", "); + print_src_reg(stream, GET_SRC2_REG(program[2])); + PRINTF(stream, "\n"); + return; +} + + +static void +print_tex_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + print_dest_reg(stream, program[0] | A0_DEST_CHANNEL_ALL); + PRINTF(stream, " = "); + + PRINTF(stream, "%s ", opcodes[opcode]); + + PRINTF(stream, "S[%d],", program[0] & T0_SAMPLER_NR_MASK); + + print_reg_type_nr(stream, + (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & + REG_TYPE_MASK, + (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); + PRINTF(stream, "\n"); +} + +static void +print_texkil_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + PRINTF(stream, "TEXKIL "); + + print_reg_type_nr(stream, + (program[1] >> T1_ADDRESS_REG_TYPE_SHIFT) & + REG_TYPE_MASK, + (program[1] >> T1_ADDRESS_REG_NR_SHIFT) & REG_NR_MASK); + PRINTF(stream, "\n"); +} + +static void +print_dcl_op(struct debug_stream *stream, + unsigned opcode, const unsigned * program) +{ + PRINTF(stream, "%s ", opcodes[opcode]); + print_dest_reg(stream, + program[0] | A0_DEST_CHANNEL_ALL); + PRINTF(stream, "\n"); +} + + +void +i915_disassemble_program(struct debug_stream *stream, + const unsigned * program, unsigned sz) +{ + unsigned i; + + PRINTF(stream, "\t\tBEGIN\n"); + + assert((program[0] & 0x1ff) + 2 == sz); + + program++; + for (i = 1; i < sz; i += 3, program += 3) { + unsigned opcode = program[0] & (0x1f << 24); + + PRINTF(stream, "\t\t"); + + if ((int) opcode >= A0_NOP && opcode <= A0_SLT) + print_arith_op(stream, opcode >> 24, program); + else if (opcode >= T0_TEXLD && opcode < T0_TEXKILL) + print_tex_op(stream, opcode >> 24, program); + else if (opcode == T0_TEXKILL) + print_texkil_op(stream, opcode >> 24, program); + else if (opcode == D0_DCL) + print_dcl_op(stream, opcode >> 24, program); + else + PRINTF(stream, "Unknown opcode 0x%x\n", opcode); + } + + PRINTF(stream, "\t\tEND\n\n"); +} + + diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c new file mode 100644 index 0000000000..472e0ab774 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_flush.c @@ -0,0 +1,78 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_defines.h" +#include "draw/draw_context.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_batch.h" + + +static void i915_flush( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence ) +{ + struct i915_context *i915 = i915_context(pipe); + + draw_flush(i915->draw); + + /* Do we need to emit an MI_FLUSH command to flush the hardware + * caches? + */ + if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { + unsigned flush = MI_FLUSH; + + if (!(flags & PIPE_FLUSH_RENDER_CACHE)) + flush |= INHIBIT_FLUSH_RENDER_CACHE; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) + flush |= FLUSH_MAP_CACHE; + + if (!BEGIN_BATCH(1, 0)) { + FLUSH_BATCH(NULL); + assert(BEGIN_BATCH(1, 0)); + } + OUT_BATCH( flush ); + } + + /* If there are no flags, just flush pending commands to hardware: + */ + FLUSH_BATCH(fence); + i915->vbo_flushed = 1; +} + + + +void i915_init_flush_functions( struct i915_context *i915 ) +{ + i915->pipe.flush = i915_flush; +} diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h new file mode 100644 index 0000000000..2f0f99d046 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc.h @@ -0,0 +1,207 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef I915_FPC_H +#define I915_FPC_H + + +#include "i915_context.h" +#include "i915_reg.h" + + + +#define I915_PROGRAM_SIZE 192 + + + +/** + * Program translation state + */ +struct i915_fp_compile { + struct i915_fragment_shader *shader; /* the shader we're compiling */ + + boolean used_constants[I915_MAX_CONSTANT]; + + /** maps TGSI immediate index to constant slot */ + uint num_immediates; + uint immediates_map[I915_MAX_CONSTANT]; + float immediates[I915_MAX_CONSTANT][4]; + + boolean first_instruction; + + uint declarations[I915_PROGRAM_SIZE]; + uint program[I915_PROGRAM_SIZE]; + + uint *csr; /**< Cursor, points into program. */ + + uint *decl; /**< Cursor, points into declarations. */ + + uint decl_s; /**< flags for which s regs need to be decl'd */ + uint decl_t; /**< flags for which t regs need to be decl'd */ + + uint temp_flag; /**< Tracks temporary regs which are in use */ + uint utemp_flag; /**< Tracks TYPE_U temporary regs which are in use */ + + uint nr_tex_indirect; + uint nr_tex_insn; + uint nr_alu_insn; + uint nr_decl_insn; + + boolean error; /**< Set if i915_program_error() is called */ + uint wpos_tex; + uint NumNativeInstructions; + uint NumNativeAluInstructions; + uint NumNativeTexInstructions; + uint NumNativeTexIndirections; +}; + + +/* Having zero and one in here makes the definition of swizzle a lot + * easier. + */ +#define UREG_TYPE_SHIFT 29 +#define UREG_NR_SHIFT 24 +#define UREG_CHANNEL_X_NEGATE_SHIFT 23 +#define UREG_CHANNEL_X_SHIFT 20 +#define UREG_CHANNEL_Y_NEGATE_SHIFT 19 +#define UREG_CHANNEL_Y_SHIFT 16 +#define UREG_CHANNEL_Z_NEGATE_SHIFT 15 +#define UREG_CHANNEL_Z_SHIFT 12 +#define UREG_CHANNEL_W_NEGATE_SHIFT 11 +#define UREG_CHANNEL_W_SHIFT 8 +#define UREG_CHANNEL_ZERO_NEGATE_MBZ 5 +#define UREG_CHANNEL_ZERO_SHIFT 4 +#define UREG_CHANNEL_ONE_NEGATE_MBZ 1 +#define UREG_CHANNEL_ONE_SHIFT 0 + +#define UREG_BAD 0xffffffff /* not a valid ureg */ + +#define X SRC_X +#define Y SRC_Y +#define Z SRC_Z +#define W SRC_W +#define ZERO SRC_ZERO +#define ONE SRC_ONE + +/* Construct a ureg: + */ +#define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \ + ((nr) << UREG_NR_SHIFT) | \ + (X << UREG_CHANNEL_X_SHIFT) | \ + (Y << UREG_CHANNEL_Y_SHIFT) | \ + (Z << UREG_CHANNEL_Z_SHIFT) | \ + (W << UREG_CHANNEL_W_SHIFT) | \ + (ZERO << UREG_CHANNEL_ZERO_SHIFT) | \ + (ONE << UREG_CHANNEL_ONE_SHIFT)) + +#define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & (0xf<<20)) +#define CHANNEL_SRC( src, channel ) (src>>(channel*4)) + +#define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)®_TYPE_MASK) +#define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)®_NR_MASK) + + + +#define UREG_XYZW_CHANNEL_MASK 0x00ffff00 + +/* One neat thing about the UREG representation: + */ +static INLINE int +swizzle(int reg, uint x, uint y, uint z, uint w) +{ + assert(x <= SRC_ONE); + assert(y <= SRC_ONE); + assert(z <= SRC_ONE); + assert(w <= SRC_ONE); + return ((reg & ~UREG_XYZW_CHANNEL_MASK) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, x), 0) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, y), 1) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, z), 2) | + CHANNEL_SRC(GET_CHANNEL_SRC(reg, w), 3)); +} + + + +/*********************************************************************** + * Public interface for the compiler + */ +extern void +i915_translate_fragment_program( struct i915_context *i915, + struct i915_fragment_shader *fs); + + + +extern uint i915_get_temp(struct i915_fp_compile *p); +extern uint i915_get_utemp(struct i915_fp_compile *p); +extern void i915_release_utemps(struct i915_fp_compile *p); + + +extern uint i915_emit_texld(struct i915_fp_compile *p, + uint dest, + uint destmask, + uint sampler, uint coord, uint op); + +extern uint i915_emit_arith(struct i915_fp_compile *p, + uint op, + uint dest, + uint mask, + uint saturate, + uint src0, uint src1, uint src2); + +extern uint i915_emit_decl(struct i915_fp_compile *p, + uint type, uint nr, uint d0_flags); + + +extern uint i915_emit_const1f(struct i915_fp_compile *p, float c0); + +extern uint i915_emit_const2f(struct i915_fp_compile *p, + float c0, float c1); + +extern uint i915_emit_const4fv(struct i915_fp_compile *p, + const float * c); + +extern uint i915_emit_const4f(struct i915_fp_compile *p, + float c0, float c1, + float c2, float c3); + + +/*====================================================================== + * i915_fpc_debug.c + */ +extern void i915_disassemble_program(const uint * program, uint sz); + + +/*====================================================================== + * i915_fpc_translate.c + */ + +extern void +i915_program_error(struct i915_fp_compile *p, const char *msg, ...); + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c new file mode 100644 index 0000000000..b054ce41d3 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c @@ -0,0 +1,375 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" +#include "util/u_math.h" + + +#define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) +#define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT) +#define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT) +#define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT) +#define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT) +#define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT) + +/* These are special, and don't have swizzle/negate bits. + */ +#define T0_SAMPLER( reg ) (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT) +#define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \ + (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT)) + + +/* Macros for translating UREG's into the various register fields used + * by the I915 programmable unit. + */ +#define UREG_A0_DEST_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT) +#define UREG_A0_SRC0_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT) +#define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) +#define UREG_A1_SRC1_SHIFT_LEFT (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT) +#define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT) +#define UREG_A2_SRC2_SHIFT_LEFT (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT) + +#define UREG_MASK 0xffffff00 +#define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \ + (REG_NR_MASK << UREG_NR_SHIFT)) + + +uint +i915_get_temp(struct i915_fp_compile *p) +{ + int bit = ffs(~p->temp_flag); + if (!bit) { + i915_program_error(p, "i915_get_temp: out of temporaries\n"); + return 0; + } + + p->temp_flag |= 1 << (bit - 1); + return bit - 1; +} + + +static void +i915_release_temp(struct i915_fp_compile *p, int reg) +{ + p->temp_flag &= ~(1 << reg); +} + + +/** + * Get unpreserved temporary, a temp whose value is not preserved between + * PS program phases. + */ +uint +i915_get_utemp(struct i915_fp_compile * p) +{ + int bit = ffs(~p->utemp_flag); + if (!bit) { + i915_program_error(p, "i915_get_utemp: out of temporaries\n"); + return 0; + } + + p->utemp_flag |= 1 << (bit - 1); + return UREG(REG_TYPE_U, (bit - 1)); +} + +void +i915_release_utemps(struct i915_fp_compile *p) +{ + p->utemp_flag = ~0x7; +} + + +uint +i915_emit_decl(struct i915_fp_compile *p, + uint type, uint nr, uint d0_flags) +{ + uint reg = UREG(type, nr); + + if (type == REG_TYPE_T) { + if (p->decl_t & (1 << nr)) + return reg; + + p->decl_t |= (1 << nr); + } + else if (type == REG_TYPE_S) { + if (p->decl_s & (1 << nr)) + return reg; + + p->decl_s |= (1 << nr); + } + else + return reg; + + *(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags); + *(p->decl++) = D1_MBZ; + *(p->decl++) = D2_MBZ; + + p->nr_decl_insn++; + return reg; +} + +uint +i915_emit_arith(struct i915_fp_compile * p, + uint op, + uint dest, + uint mask, + uint saturate, uint src0, uint src1, uint src2) +{ + uint c[3]; + uint nr_const = 0; + + assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); + dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)); + assert(dest); + + if (GET_UREG_TYPE(src0) == REG_TYPE_CONST) + c[nr_const++] = 0; + if (GET_UREG_TYPE(src1) == REG_TYPE_CONST) + c[nr_const++] = 1; + if (GET_UREG_TYPE(src2) == REG_TYPE_CONST) + c[nr_const++] = 2; + + /* Recursively call this function to MOV additional const values + * into temporary registers. Use utemp registers for this - + * currently shouldn't be possible to run out, but keep an eye on + * this. + */ + if (nr_const > 1) { + uint s[3], first, i, old_utemp_flag; + + s[0] = src0; + s[1] = src1; + s[2] = src2; + old_utemp_flag = p->utemp_flag; + + first = GET_UREG_NR(s[c[0]]); + for (i = 1; i < nr_const; i++) { + if (GET_UREG_NR(s[c[i]]) != first) { + uint tmp = i915_get_utemp(p); + + i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, + s[c[i]], 0, 0); + s[c[i]] = tmp; + } + } + + src0 = s[0]; + src1 = s[1]; + src2 = s[2]; + p->utemp_flag = old_utemp_flag; /* restore */ + } + + *(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0)); + *(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1)); + *(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2)); + + p->nr_alu_insn++; + return dest; +} + + +/** + * Emit a texture load or texkill instruction. + * \param dest the dest i915 register + * \param destmask the dest register writemask + * \param sampler the i915 sampler register + * \param coord the i915 source texcoord operand + * \param opcode the instruction opcode + */ +uint i915_emit_texld( struct i915_fp_compile *p, + uint dest, + uint destmask, + uint sampler, + uint coord, + uint opcode ) +{ + const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord)); + int temp = -1; + + if (coord != k) { + /* texcoord is swizzled or negated. Need to allocate a new temporary + * register (a utemp / unpreserved temp) won't do. + */ + uint tempReg; + + temp = i915_get_temp(p); /* get temp reg index */ + tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */ + + i915_emit_arith( p, A0_MOV, + tempReg, A0_DEST_CHANNEL_ALL, /* dest reg, writemask */ + 0, /* saturate */ + coord, 0, 0 ); /* src0, src1, src2 */ + + /* new src texcoord is tempReg */ + coord = tempReg; + } + + /* Don't worry about saturate as we only support + */ + if (destmask != A0_DEST_CHANNEL_ALL) { + /* if not writing to XYZW... */ + uint tmp = i915_get_utemp(p); + i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode ); + i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 ); + /* XXX release utemp here? */ + } + else { + assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST); + assert(dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest))); + + /* is the sampler coord a texcoord input reg? */ + if (GET_UREG_TYPE(coord) != REG_TYPE_T) { + p->nr_tex_indirect++; + } + + *(p->csr++) = (opcode | + T0_DEST( dest ) | + T0_SAMPLER( sampler )); + + *(p->csr++) = T1_ADDRESS_REG( coord ); + *(p->csr++) = T2_MBZ; + + p->nr_tex_insn++; + } + + if (temp >= 0) + i915_release_temp(p, temp); + + return dest; +} + + +uint +i915_emit_const1f(struct i915_fp_compile * p, float c0) +{ + struct i915_fragment_shader *ifs = p->shader; + unsigned reg, idx; + + if (c0 == 0.0) + return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO); + if (c0 == 1.0) + return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE); + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER) + continue; + for (idx = 0; idx < 4; idx++) { + if (!(ifs->constant_flags[reg] & (1 << idx)) || + ifs->constants[reg][idx] == c0) { + ifs->constants[reg][idx] = c0; + ifs->constant_flags[reg] |= 1 << idx; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE); + } + } + } + + i915_program_error(p, "i915_emit_const1f: out of constants\n"); + return 0; +} + +uint +i915_emit_const2f(struct i915_fp_compile * p, float c0, float c1) +{ + struct i915_fragment_shader *ifs = p->shader; + unsigned reg, idx; + + if (c0 == 0.0) + return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W); + if (c0 == 1.0) + return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W); + + if (c1 == 0.0) + return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W); + if (c1 == 1.0) + return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W); + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (ifs->constant_flags[reg] == 0xf || + ifs->constant_flags[reg] == I915_CONSTFLAG_USER) + continue; + for (idx = 0; idx < 3; idx++) { + if (!(ifs->constant_flags[reg] & (3 << idx))) { + ifs->constants[reg][idx + 0] = c0; + ifs->constants[reg][idx + 1] = c1; + ifs->constant_flags[reg] |= 3 << idx; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; + return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE); + } + } + } + + i915_program_error(p, "i915_emit_const2f: out of constants\n"); + return 0; +} + + + +uint +i915_emit_const4f(struct i915_fp_compile * p, + float c0, float c1, float c2, float c3) +{ + struct i915_fragment_shader *ifs = p->shader; + unsigned reg; + + for (reg = 0; reg < I915_MAX_CONSTANT; reg++) { + if (ifs->constant_flags[reg] == 0xf && + ifs->constants[reg][0] == c0 && + ifs->constants[reg][1] == c1 && + ifs->constants[reg][2] == c2 && + ifs->constants[reg][3] == c3) { + return UREG(REG_TYPE_CONST, reg); + } + else if (ifs->constant_flags[reg] == 0) { + + ifs->constants[reg][0] = c0; + ifs->constants[reg][1] = c1; + ifs->constants[reg][2] = c2; + ifs->constants[reg][3] = c3; + ifs->constant_flags[reg] = 0xf; + if (reg + 1 > ifs->num_constants) + ifs->num_constants = reg + 1; + return UREG(REG_TYPE_CONST, reg); + } + } + + i915_program_error(p, "i915_emit_const4f: out of constants\n"); + return 0; +} + + +uint +i915_emit_const4fv(struct i915_fp_compile * p, const float * c) +{ + return i915_emit_const4f(p, c[0], c[1], c[2], c[3]); +} diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c new file mode 100644 index 0000000000..34b4a846c1 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -0,0 +1,1190 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdarg.h> + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_fpc.h" + +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" + +#include "draw/draw_vertex.h" + + +/** + * Simple pass-through fragment shader to use when we don't have + * a real shader (or it fails to compile for some reason). + */ +static unsigned passthrough[] = +{ + _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), + + /* declare input color: + */ + (D0_DCL | + (REG_TYPE_T << D0_TYPE_SHIFT) | + (T_DIFFUSE << D0_NR_SHIFT) | + D0_CHANNEL_ALL), + 0, + 0, + + /* move to output color: + */ + (A0_MOV | + (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | + A0_DEST_CHANNEL_ALL | + (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | + (T_DIFFUSE << A0_SRC0_NR_SHIFT)), + 0x01230000, /* .xyzw */ + 0 +}; + + +/* 1, -1/3!, 1/5!, -1/7! */ +static const float sin_constants[4] = { 1.0, + -1.0f / (3 * 2 * 1), + 1.0f / (5 * 4 * 3 * 2 * 1), + -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1) +}; + +/* 1, -1/2!, 1/4!, -1/6! */ +static const float cos_constants[4] = { 1.0, + -1.0f / (2 * 1), + 1.0f / (4 * 3 * 2 * 1), + -1.0f / (6 * 5 * 4 * 3 * 2 * 1) +}; + + + +/** + * component-wise negation of ureg + */ +static INLINE int +negate(int reg, int x, int y, int z, int w) +{ + /* Another neat thing about the UREG representation */ + return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | + ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | + ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | + ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); +} + + +/** + * In the event of a translation failure, we'll generate a simple color + * pass-through program. + */ +static void +i915_use_passthrough_shader(struct i915_fragment_shader *fs) +{ + fs->program = (uint *) MALLOC(sizeof(passthrough)); + if (fs->program) { + memcpy(fs->program, passthrough, sizeof(passthrough)); + fs->program_len = Elements(passthrough); + } + fs->num_constants = 0; +} + + +void +i915_program_error(struct i915_fp_compile *p, const char *msg, ...) +{ + va_list args; + char buffer[1024]; + + debug_printf("i915_program_error: "); + va_start( args, msg ); + util_vsnprintf( buffer, sizeof(buffer), msg, args ); + va_end( args ); + debug_printf(buffer); + debug_printf("\n"); + + p->error = 1; +} + + + +/** + * Construct a ureg for the given source register. Will emit + * constants, apply swizzling and negation as needed. + */ +static uint +src_vector(struct i915_fp_compile *p, + const struct tgsi_full_src_register *source) +{ + uint index = source->SrcRegister.Index; + uint src, sem_name, sem_ind; + + switch (source->SrcRegister.File) { + case TGSI_FILE_TEMPORARY: + if (source->SrcRegister.Index >= I915_MAX_TEMPORARY) { + i915_program_error(p, "Exceeded max temporary reg"); + return 0; + } + src = UREG(REG_TYPE_R, index); + break; + case TGSI_FILE_INPUT: + /* XXX: Packing COL1, FOGC into a single attribute works for + * texenv programs, but will fail for real fragment programs + * that use these attributes and expect them to be a full 4 + * components wide. Could use a texcoord to pass these + * attributes if necessary, but that won't work in the general + * case. + * + * We also use a texture coordinate to pass wpos when possible. + */ + + sem_name = p->shader->info.input_semantic_name[index]; + sem_ind = p->shader->info.input_semantic_index[index]; + + switch (sem_name) { + case TGSI_SEMANTIC_POSITION: + debug_printf("SKIP SEM POS\n"); + /* + assert(p->wpos_tex != -1); + src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); + */ + break; + case TGSI_SEMANTIC_COLOR: + if (sem_ind == 0) { + src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); + } + else { + /* secondary color */ + assert(sem_ind == 1); + src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); + src = swizzle(src, X, Y, Z, ONE); + } + break; + case TGSI_SEMANTIC_FOG: + src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); + src = swizzle(src, W, W, W, W); + break; + case TGSI_SEMANTIC_GENERIC: + /* usually a texcoord */ + src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + sem_ind, D0_CHANNEL_ALL); + break; + default: + i915_program_error(p, "Bad source->Index"); + return 0; + } + break; + + case TGSI_FILE_IMMEDIATE: + assert(index < p->num_immediates); + index = p->immediates_map[index]; + /* fall-through */ + case TGSI_FILE_CONSTANT: + src = UREG(REG_TYPE_CONST, index); + break; + + default: + i915_program_error(p, "Bad source->File"); + return 0; + } + + if (source->SrcRegister.Extended) { + src = swizzle(src, + source->SrcRegisterExtSwz.ExtSwizzleX, + source->SrcRegisterExtSwz.ExtSwizzleY, + source->SrcRegisterExtSwz.ExtSwizzleZ, + source->SrcRegisterExtSwz.ExtSwizzleW); + } + else { + src = swizzle(src, + source->SrcRegister.SwizzleX, + source->SrcRegister.SwizzleY, + source->SrcRegister.SwizzleZ, + source->SrcRegister.SwizzleW); + } + + + /* There's both negate-all-components and per-component negation. + * Try to handle both here. + */ + { + int nx = source->SrcRegisterExtSwz.NegateX; + int ny = source->SrcRegisterExtSwz.NegateY; + int nz = source->SrcRegisterExtSwz.NegateZ; + int nw = source->SrcRegisterExtSwz.NegateW; + if (source->SrcRegister.Negate) { + nx = !nx; + ny = !ny; + nz = !nz; + nw = !nw; + } + src = negate(src, nx, ny, nz, nw); + } + + /* no abs() or post-abs negation */ +#if 0 + /* XXX assertions disabled to allow arbfplight.c to run */ + /* XXX enable these assertions, or fix things */ + assert(!source->SrcRegisterExtMod.Absolute); + assert(!source->SrcRegisterExtMod.Negate); +#endif + return src; +} + + +/** + * Construct a ureg for a destination register. + */ +static uint +get_result_vector(struct i915_fp_compile *p, + const struct tgsi_full_dst_register *dest) +{ + switch (dest->DstRegister.File) { + case TGSI_FILE_OUTPUT: + { + uint sem_name = p->shader->info.output_semantic_name[dest->DstRegister.Index]; + switch (sem_name) { + case TGSI_SEMANTIC_POSITION: + return UREG(REG_TYPE_OD, 0); + case TGSI_SEMANTIC_COLOR: + return UREG(REG_TYPE_OC, 0); + default: + i915_program_error(p, "Bad inst->DstReg.Index/semantics"); + return 0; + } + } + case TGSI_FILE_TEMPORARY: + return UREG(REG_TYPE_R, dest->DstRegister.Index); + default: + i915_program_error(p, "Bad inst->DstReg.File"); + return 0; + } +} + + +/** + * Compute flags for saturation and writemask. + */ +static uint +get_result_flags(const struct tgsi_full_instruction *inst) +{ + const uint writeMask + = inst->FullDstRegisters[0].DstRegister.WriteMask; + uint flags = 0x0; + + if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE) + flags |= A0_DEST_SATURATE; + + if (writeMask & TGSI_WRITEMASK_X) + flags |= A0_DEST_CHANNEL_X; + if (writeMask & TGSI_WRITEMASK_Y) + flags |= A0_DEST_CHANNEL_Y; + if (writeMask & TGSI_WRITEMASK_Z) + flags |= A0_DEST_CHANNEL_Z; + if (writeMask & TGSI_WRITEMASK_W) + flags |= A0_DEST_CHANNEL_W; + + return flags; +} + + +/** + * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token + */ +static uint +translate_tex_src_target(struct i915_fp_compile *p, uint tex) +{ + switch (tex) { + case TGSI_TEXTURE_1D: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_2D: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_RECT: + return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_3D: + return D0_SAMPLE_TYPE_VOLUME; + case TGSI_TEXTURE_CUBE: + return D0_SAMPLE_TYPE_CUBE; + default: + i915_program_error(p, "TexSrc type"); + return 0; + } +} + + +/** + * Generate texel lookup instruction. + */ +static void +emit_tex(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode) +{ + uint texture = inst->InstructionExtTexture.Texture; + uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint tex = translate_tex_src_target( p, texture ); + uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); + uint coord = src_vector( p, &inst->FullSrcRegisters[0]); + + i915_emit_texld( p, + get_result_vector( p, &inst->FullDstRegisters[0] ), + get_result_flags( inst ), + sampler, + coord, + opcode); +} + + +/** + * Generate a simple arithmetic instruction + * \param opcode the i915 opcode + * \param numArgs the number of input/src arguments + */ +static void +emit_simple_arith(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode, uint numArgs) +{ + uint arg1, arg2, arg3; + + assert(numArgs <= 3); + + arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->FullSrcRegisters[0] ); + arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->FullSrcRegisters[1] ); + arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->FullSrcRegisters[2] ); + + i915_emit_arith( p, + opcode, + get_result_vector( p, &inst->FullDstRegisters[0]), + get_result_flags( inst ), 0, + arg1, + arg2, + arg3 ); +} + + +/** As above, but swap the first two src regs */ +static void +emit_simple_arith_swap2(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst, + uint opcode, uint numArgs) +{ + struct tgsi_full_instruction inst2; + + assert(numArgs == 2); + + /* transpose first two registers */ + inst2 = *inst; + inst2.FullSrcRegisters[0] = inst->FullSrcRegisters[1]; + inst2.FullSrcRegisters[1] = inst->FullSrcRegisters[0]; + + emit_simple_arith(p, &inst2, opcode, numArgs); +} + + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +/* + * Translate TGSI instruction to i915 instruction. + * + * Possible concerns: + * + * SIN, COS -- could use another taylor step? + * LIT -- results seem a little different to sw mesa + * LOG -- different to mesa on negative numbers, but this is conformant. + */ +static void +i915_translate_instruction(struct i915_fp_compile *p, + const struct tgsi_full_instruction *inst) +{ + uint writemask; + uint src0, src1, src2, flags; + uint tmp = 0; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + i915_emit_arith(p, + A0_MAX, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + src0, negate(src0, 1, 1, 1, 1), 0); + break; + + case TGSI_OPCODE_ADD: + emit_simple_arith(p, inst, A0_ADD, 2); + break; + + case TGSI_OPCODE_CMP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src2 = src_vector(p, &inst->FullSrcRegisters[2]); + i915_emit_arith(p, A0_CMP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), + 0, src0, src2, src1); /* NOTE: order of src2, src1 */ + break; + + case TGSI_OPCODE_COS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); + + i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + /* By choosing different taylor constants, could get rid of this mul: + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 + * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 + * result = DP4 t0, cos_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(tmp, X, X, ONE, ONE), + swizzle(tmp, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, Y, X, ONE), + swizzle(tmp, X, X, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, X, Z, ONE), + swizzle(tmp, Z, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(tmp, ONE, Z, Y, X), + i915_emit_const4fv(p, cos_constants), 0); + break; + + case TGSI_OPCODE_DP3: + emit_simple_arith(p, inst, A0_DP3, 2); + break; + + case TGSI_OPCODE_DP4: + emit_simple_arith(p, inst, A0_DP4, 2); + break; + + case TGSI_OPCODE_DPH: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, Y, Z, ONE), src1, 0); + break; + + case TGSI_OPCODE_DST: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + /* result[0] = 1 * 1; + * result[1] = a[1] * b[1]; + * result[2] = a[2] * 1; + * result[3] = 1 * b[3]; + */ + i915_emit_arith(p, + A0_MUL, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, ONE, Y, Z, ONE), + swizzle(src1, ONE, Y, ONE, W), 0); + break; + + case TGSI_OPCODE_END: + /* no-op */ + break; + + case TGSI_OPCODE_EX2: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_EXP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_FLR: + emit_simple_arith(p, inst, A0_FLR, 1); + break; + + case TGSI_OPCODE_FRC: + emit_simple_arith(p, inst, A0_FRC, 1); + break; + + case TGSI_OPCODE_KIL: + /* kill if src[0].x < 0 || src[0].y < 0 ... */ + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_texld(p, + tmp, /* dest reg: a dummy reg */ + A0_DEST_CHANNEL_ALL, /* dest writemask */ + 0, /* sampler */ + src0, /* coord*/ + T0_TEXKILL); /* opcode */ + break; + + case TGSI_OPCODE_KILP: + assert(0); /* not tested yet */ + break; + + case TGSI_OPCODE_LG2: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_LOG, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_LIT: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + /* tmp = max( a.xyzw, a.00zw ) + * XXX: Clamp tmp.w to -128..128 + * tmp.y = log(tmp.y) + * tmp.y = tmp.w * tmp.y + * tmp.y = exp(tmp.y) + * result = cmp (a.11-x1, a.1x01, a.1xy1 ) + */ + i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, + src0, swizzle(src0, ZERO, ZERO, Z, W), 0); + + i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, Y, Y, Y, Y), 0, 0); + + i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, ZERO, Y, ZERO, ZERO), + swizzle(tmp, ZERO, W, ZERO, ZERO), 0); + + i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, + swizzle(tmp, Y, Y, Y, Y), 0, 0); + + i915_emit_arith(p, A0_CMP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), + swizzle(tmp, ONE, X, ZERO, ONE), + swizzle(tmp, ONE, X, Y, ONE)); + + break; + + case TGSI_OPCODE_LRP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + src2 = src_vector(p, &inst->FullSrcRegisters[2]); + flags = get_result_flags(inst); + tmp = i915_get_utemp(p); + + /* b*a + c*(1-a) + * + * b*a + c - ca + * + * tmp = b*a + c, + * result = (-c)*a + tmp + */ + i915_emit_arith(p, A0_MAD, tmp, + flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); + + i915_emit_arith(p, A0_MAD, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); + break; + + case TGSI_OPCODE_MAD: + emit_simple_arith(p, inst, A0_MAD, 3); + break; + + case TGSI_OPCODE_MAX: + emit_simple_arith(p, inst, A0_MAX, 2); + break; + + case TGSI_OPCODE_MIN: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + + i915_emit_arith(p, + A0_MAX, + tmp, flags & A0_DEST_CHANNEL_ALL, 0, + negate(src0, 1, 1, 1, 1), + negate(src1, 1, 1, 1, 1), 0); + + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); + break; + + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + emit_simple_arith(p, inst, A0_MOV, 1); + break; + + case TGSI_OPCODE_MUL: + emit_simple_arith(p, inst, A0_MUL, 2); + break; + + case TGSI_OPCODE_POW: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + + /* XXX: masking on intermediate values, here and elsewhere. + */ + i915_emit_arith(p, + A0_LOG, + tmp, A0_DEST_CHANNEL_X, 0, + swizzle(src0, X, X, X, X), 0, 0); + + i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); + + i915_emit_arith(p, + A0_EXP, + get_result_vector(p, &inst->FullDstRegisters[0]), + flags, 0, swizzle(tmp, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_RET: + /* XXX: no-op? */ + break; + + case TGSI_OPCODE_RCP: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_RCP, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_RSQ: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + + i915_emit_arith(p, + A0_RSQ, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, X, X, X, X), 0, 0); + break; + + case TGSI_OPCODE_SCS: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x + * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x + * scs.x = DP4 t1, sin_constants + * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 + * scs.y = DP4 t1, cos_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(src0, X, X, ONE, ONE), + swizzle(src0, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, X, Y), + swizzle(tmp, X, X, ONE, ONE), 0); + + writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; + + if (writemask & TGSI_WRITEMASK_Y) { + uint tmp1; + + if (writemask & TGSI_WRITEMASK_X) + tmp1 = i915_get_utemp(p); + else + tmp1 = tmp; + + i915_emit_arith(p, + A0_MUL, + tmp1, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, Y, W), + swizzle(tmp, X, Z, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + A0_DEST_CHANNEL_Y, 0, + swizzle(tmp1, W, Z, Y, X), + i915_emit_const4fv(p, sin_constants), 0); + } + + if (writemask & TGSI_WRITEMASK_X) { + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XYZ, 0, + swizzle(tmp, X, X, Z, ONE), + swizzle(tmp, Z, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + A0_DEST_CHANNEL_X, 0, + swizzle(tmp, ONE, Z, Y, X), + i915_emit_const4fv(p, cos_constants), 0); + } + break; + + case TGSI_OPCODE_SGE: + emit_simple_arith(p, inst, A0_SGE, 2); + break; + + case TGSI_OPCODE_SLE: + /* like SGE, but swap reg0, reg1 */ + emit_simple_arith_swap2(p, inst, A0_SGE, 2); + break; + + case TGSI_OPCODE_SIN: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); + + i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); + + /* By choosing different taylor constants, could get rid of this mul: + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_X, 0, + tmp, i915_emit_const1f(p, (float) (M_PI * 2.0)), 0); + + /* + * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 + * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x + * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x + * result = DP4 t1.wzyx, sin_constants + */ + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_XY, 0, + swizzle(tmp, X, X, ONE, ONE), + swizzle(tmp, X, ONE, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, X, Y), + swizzle(tmp, X, X, ONE, ONE), 0); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(tmp, X, Y, Y, W), + swizzle(tmp, X, Z, ONE, ONE), 0); + + i915_emit_arith(p, + A0_DP4, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(tmp, W, Z, Y, X), + i915_emit_const4fv(p, sin_constants), 0); + break; + + case TGSI_OPCODE_SLT: + emit_simple_arith(p, inst, A0_SLT, 2); + break; + + case TGSI_OPCODE_SGT: + /* like SLT, but swap reg0, reg1 */ + emit_simple_arith_swap2(p, inst, A0_SLT, 2); + break; + + case TGSI_OPCODE_SUB: + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + + i915_emit_arith(p, + A0_ADD, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + src0, negate(src1, 1, 1, 1, 1), 0); + break; + + case TGSI_OPCODE_TEX: + emit_tex(p, inst, T0_TEXLD); + break; + + case TGSI_OPCODE_TXB: + emit_tex(p, inst, T0_TEXLDB); + break; + + case TGSI_OPCODE_TXP: + emit_tex(p, inst, T0_TEXLDP); + break; + + case TGSI_OPCODE_XPD: + /* Cross product: + * result.x = src0.y * src1.z - src0.z * src1.y; + * result.y = src0.z * src1.x - src0.x * src1.z; + * result.z = src0.x * src1.y - src0.y * src1.x; + * result.w = undef; + */ + src0 = src_vector(p, &inst->FullSrcRegisters[0]); + src1 = src_vector(p, &inst->FullSrcRegisters[1]); + tmp = i915_get_utemp(p); + + i915_emit_arith(p, + A0_MUL, + tmp, A0_DEST_CHANNEL_ALL, 0, + swizzle(src0, Z, X, Y, ONE), + swizzle(src1, Y, Z, X, ONE), 0); + + i915_emit_arith(p, + A0_MAD, + get_result_vector(p, &inst->FullDstRegisters[0]), + get_result_flags(inst), 0, + swizzle(src0, Y, Z, X, ONE), + swizzle(src1, Z, X, Y, ONE), + negate(tmp, 1, 1, 1, 0)); + break; + + default: + i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); + p->error = 1; + return; + } + + i915_release_utemps(p); +} + + +/** + * Translate TGSI fragment shader into i915 hardware instructions. + * \param p the translation state + * \param tokens the TGSI token array + */ +static void +i915_translate_instructions(struct i915_fp_compile *p, + const struct tgsi_token *tokens) +{ + struct i915_fragment_shader *ifs = p->shader; + struct tgsi_parse_context parse; + + tgsi_parse_init( &parse, tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File + == TGSI_FILE_CONSTANT) { + uint i; + for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; + i++) { + assert(ifs->constant_flags[i] == 0x0); + ifs->constant_flags[i] = I915_CONSTFLAG_USER; + ifs->num_constants = MAX2(ifs->num_constants, i + 1); + } + } + else if (parse.FullToken.FullDeclaration.Declaration.File + == TGSI_FILE_TEMPORARY) { + uint i; + for (i = parse.FullToken.FullDeclaration.DeclarationRange.First; + i <= parse.FullToken.FullDeclaration.DeclarationRange.Last; + i++) { + assert(i < I915_MAX_TEMPORARY); + /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ + p->temp_flag |= (1 << i); /* mark temp as used */ + } + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm + = &parse.FullToken.FullImmediate; + const uint pos = p->num_immediates++; + uint j; + for (j = 0; j < imm->Immediate.Size; j++) { + p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float; + } + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (p->first_instruction) { + /* resolve location of immediates */ + uint i, j; + for (i = 0; i < p->num_immediates; i++) { + /* find constant slot for this immediate */ + for (j = 0; j < I915_MAX_CONSTANT; j++) { + if (ifs->constant_flags[j] == 0x0) { + memcpy(ifs->constants[j], + p->immediates[i], + 4 * sizeof(float)); + /*printf("immediate %d maps to const %d\n", i, j);*/ + ifs->constant_flags[j] = 0xf; /* all four comps used */ + p->immediates_map[i] = j; + ifs->num_constants = MAX2(ifs->num_constants, j + 1); + break; + } + } + } + + p->first_instruction = FALSE; + } + + i915_translate_instruction(p, &parse.FullToken.FullInstruction); + break; + + default: + assert( 0 ); + } + + } /* while */ + + tgsi_parse_free (&parse); +} + + +static struct i915_fp_compile * +i915_init_compile(struct i915_context *i915, + struct i915_fragment_shader *ifs) +{ + struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); + + p->shader = ifs; + + /* Put new constants at end of const buffer, growing downward. + * The problem is we don't know how many user-defined constants might + * be specified with pipe->set_constant_buffer(). + * Should pre-scan the user's program to determine the highest-numbered + * constant referenced. + */ + ifs->num_constants = 0; + memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); + + p->first_instruction = TRUE; + + p->nr_tex_indirect = 1; /* correct? */ + p->nr_tex_insn = 0; + p->nr_alu_insn = 0; + p->nr_decl_insn = 0; + + p->csr = p->program; + p->decl = p->declarations; + p->decl_s = 0; + p->decl_t = 0; + p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; + p->utemp_flag = ~0x7; + + p->wpos_tex = -1; + + /* initialize the first program word */ + *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; + + return p; +} + + +/* Copy compile results to the fragment program struct and destroy the + * compilation context. + */ +static void +i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) +{ + struct i915_fragment_shader *ifs = p->shader; + unsigned long program_size = (unsigned long) (p->csr - p->program); + unsigned long decl_size = (unsigned long) (p->decl - p->declarations); + + if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) + i915_program_error(p, "Exceeded max nr indirect texture lookups"); + + if (p->nr_tex_insn > I915_MAX_TEX_INSN) + i915_program_error(p, "Exceeded max TEX instructions"); + + if (p->nr_alu_insn > I915_MAX_ALU_INSN) + i915_program_error(p, "Exceeded max ALU instructions"); + + if (p->nr_decl_insn > I915_MAX_DECL_INSN) + i915_program_error(p, "Exceeded max DECL instructions"); + + if (p->error) { + p->NumNativeInstructions = 0; + p->NumNativeAluInstructions = 0; + p->NumNativeTexInstructions = 0; + p->NumNativeTexIndirections = 0; + + i915_use_passthrough_shader(ifs); + } + else { + p->NumNativeInstructions + = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; + p->NumNativeAluInstructions = p->nr_alu_insn; + p->NumNativeTexInstructions = p->nr_tex_insn; + p->NumNativeTexIndirections = p->nr_tex_indirect; + + /* patch in the program length */ + p->declarations[0] |= program_size + decl_size - 2; + + /* Copy compilation results to fragment program struct: + */ + assert(!ifs->program); + ifs->program + = (uint *) MALLOC((program_size + decl_size) * sizeof(uint)); + if (ifs->program) { + ifs->program_len = program_size + decl_size; + + memcpy(ifs->program, + p->declarations, + decl_size * sizeof(uint)); + + memcpy(ifs->program + decl_size, + p->program, + program_size * sizeof(uint)); + } + } + + /* Release the compilation struct: + */ + FREE(p); +} + + +/** + * Find an unused texture coordinate slot to use for fragment WPOS. + * Update p->fp->wpos_tex with the result (-1 if no used texcoord slot is found). + */ +static void +i915_find_wpos_space(struct i915_fp_compile *p) +{ +#if 0 + const uint inputs + = p->shader->inputs_read | (1 << TGSI_ATTRIB_POS); /*XXX hack*/ + uint i; + + p->wpos_tex = -1; + + if (inputs & (1 << TGSI_ATTRIB_POS)) { + for (i = 0; i < I915_TEX_UNITS; i++) { + if ((inputs & (1 << (TGSI_ATTRIB_TEX0 + i))) == 0) { + p->wpos_tex = i; + return; + } + } + + i915_program_error(p, "No free texcoord for wpos value"); + } +#else + if (p->shader->info.input_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + /* frag shader using the fragment position input */ +#if 0 + assert(0); +#endif + } +#endif +} + + + + +/** + * Rather than trying to intercept and jiggle depth writes during + * emit, just move the value into its correct position at the end of + * the program: + */ +static void +i915_fixup_depth_write(struct i915_fp_compile *p) +{ + /* XXX assuming pos/depth is always in output[0] */ + if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { + const uint depth = UREG(REG_TYPE_OD, 0); + + i915_emit_arith(p, + A0_MOV, /* opcode */ + depth, /* dest reg */ + A0_DEST_CHANNEL_W, /* write mask */ + 0, /* saturate? */ + swizzle(depth, X, Y, Z, Z), /* src0 */ + 0, 0 /* src1, src2 */); + } +} + + +void +i915_translate_fragment_program( struct i915_context *i915, + struct i915_fragment_shader *fs) +{ + struct i915_fp_compile *p = i915_init_compile(i915, fs); + const struct tgsi_token *tokens = fs->state.tokens; + + i915_find_wpos_space(p); + +#if 0 + tgsi_dump(tokens, 0); +#endif + + i915_translate_instructions(p, tokens); + i915_fixup_depth_write(p); + + i915_fini_compile(i915, p); +} diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c new file mode 100644 index 0000000000..8f1f58b2dd --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -0,0 +1,220 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "draw/draw_pipe.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" + +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "i915_batch.h" + + + +/** + * Primitive emit to hardware. No support for vertex buffers or any + * nice fast paths. + */ +struct setup_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct i915_context *i915; +}; + + + +/** + * Basically a cast wrapper. + */ +static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +{ + return (struct setup_stage *)stage; +} + + +/** + * Extract the needed fields from vertex_header and emit i915 dwords. + * Recall that the vertices are constructed by the 'draw' module and + * have a couple of slots at the beginning (1-dword header, 4-dword + * clip pos) that we ignore here. + */ +static INLINE void +emit_hw_vertex( struct i915_context *i915, + const struct vertex_header *vertex) +{ + const struct vertex_info *vinfo = &i915->current.vertex_info; + uint i; + uint count = 0; /* for debug/sanity */ + + assert(!i915->dirty); + + for (i = 0; i < vinfo->num_attribs; i++) { + const uint j = vinfo->attrib[i].src_index; + const float *attrib = vertex->data[j]; + switch (vinfo->attrib[i].emit) { + case EMIT_1F: + OUT_BATCH( fui(attrib[0]) ); + count++; + break; + case EMIT_2F: + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + count += 2; + break; + case EMIT_3F: + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + OUT_BATCH( fui(attrib[2]) ); + count += 3; + break; + case EMIT_4F: + OUT_BATCH( fui(attrib[0]) ); + OUT_BATCH( fui(attrib[1]) ); + OUT_BATCH( fui(attrib[2]) ); + OUT_BATCH( fui(attrib[3]) ); + count += 4; + break; + case EMIT_4UB: + OUT_BATCH( pack_ub4(float_to_ubyte( attrib[2] ), + float_to_ubyte( attrib[1] ), + float_to_ubyte( attrib[0] ), + float_to_ubyte( attrib[3] )) ); + count += 1; + break; + default: + assert(0); + } + } + assert(count == vinfo->size); +} + + + +static INLINE void +emit_prim( struct draw_stage *stage, + struct prim_header *prim, + unsigned hwprim, + unsigned nr ) +{ + struct i915_context *i915 = setup_stage(stage)->i915; + unsigned vertex_size; + unsigned i; + + if (i915->dirty) + i915_update_derived( i915 ); + + if (i915->hardware_dirty) + i915_emit_hardware_state( i915 ); + + /* need to do this after validation! */ + vertex_size = i915->current.vertex_info.size * 4; /* in bytes */ + assert(vertex_size >= 12); /* never smaller than 12 bytes */ + + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + FLUSH_BATCH(NULL); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived( i915 ); + i915_emit_hardware_state( i915 ); + + if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { + assert(0); + return; + } + } + + /* Emit each triangle as a single primitive. I told you this was + * simple. + */ + OUT_BATCH(_3DPRIMITIVE | + hwprim | + ((4 + vertex_size * nr)/4 - 2)); + + for (i = 0; i < nr; i++) + emit_hw_vertex(i915, prim->v[i]); +} + + +static void +setup_tri( struct draw_stage *stage, struct prim_header *prim ) +{ + emit_prim( stage, prim, PRIM3D_TRILIST, 3 ); +} + + +static void +setup_line(struct draw_stage *stage, struct prim_header *prim) +{ + emit_prim( stage, prim, PRIM3D_LINELIST, 2 ); +} + + +static void +setup_point(struct draw_stage *stage, struct prim_header *prim) +{ + emit_prim( stage, prim, PRIM3D_POINTLIST, 1 ); +} + + +static void setup_flush( struct draw_stage *stage, unsigned flags ) +{ +} + +static void reset_stipple_counter( struct draw_stage *stage ) +{ +} + +static void render_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create a new primitive setup/render stage. This gets plugged into + * the 'draw' module's pipeline. + */ +struct draw_stage *i915_draw_render_stage( struct i915_context *i915 ) +{ + struct setup_stage *setup = CALLOC_STRUCT(setup_stage); + + setup->i915 = i915; + setup->stage.draw = i915->draw; + setup->stage.point = setup_point; + setup->stage.line = setup_line; + setup->stage.tri = setup_tri; + setup->stage.flush = setup_flush; + setup->stage.reset_stipple_counter = reset_stipple_counter; + setup->stage.destroy = render_destroy; + + return &setup->stage; +} diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c new file mode 100644 index 0000000000..4fda1ab64f --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -0,0 +1,547 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Build post-transformation, post-clipping vertex buffers and element + * lists by hooking into the end of the primitive pipeline and + * manipulating the vertex_id field in the vertex headers. + * + * XXX: work in progress + * + * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "pipe/p_debug.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_winsys.h" +#include "i915_batch.h" +#include "i915_state.h" + + +/** + * Primitive renderer for i915. + */ +struct i915_vbuf_render { + struct vbuf_render base; + + struct i915_context *i915; + + /** Vertex size in bytes */ + unsigned vertex_size; + + /** Software primitive */ + unsigned prim; + + /** Hardware primitive */ + unsigned hwprim; + + /** Genereate a vertex list */ + unsigned fallback; + + /* Stuff for the vbo */ + struct pipe_buffer *vbo; + size_t vbo_size; + size_t vbo_offset; + void *vbo_ptr; + size_t vbo_alloc_size; +}; + + +/** + * Basically a cast wrapper. + */ +static INLINE struct i915_vbuf_render * +i915_vbuf_render( struct vbuf_render *render ) +{ + assert(render); + return (struct i915_vbuf_render *)render; +} + + +static const struct vertex_info * +i915_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + + if (i915->dirty) { + /* make sure we have up to date vertex layout */ + i915_update_derived( i915 ); + } + + return &i915->current.vertex_info; +} + + +static void * +i915_vbuf_render_allocate_vertices( struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + struct pipe_screen *screen = i915->pipe.screen; + size_t size = (size_t)vertex_size * (size_t)nr_vertices; + + /* FIXME: handle failure */ + assert(!i915->vbo); + + if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { + } else { + i915->vbo_flushed = 0; + pipe_buffer_reference(screen, &i915_render->vbo, NULL); + } + + if (!i915_render->vbo) { + i915_render->vbo_size = MAX2(size, i915_render->vbo_alloc_size); + i915_render->vbo_offset = 0; + i915_render->vbo = pipe_buffer_create(screen, + 64, + I915_BUFFER_USAGE_LIT_VERTEX, + i915_render->vbo_size); + i915_render->vbo_ptr = pipe_buffer_map(screen, + i915_render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + pipe_buffer_unmap(screen, i915_render->vbo); + } + + i915->vbo = i915_render->vbo; + i915->vbo_offset = i915_render->vbo_offset; + i915->dirty |= I915_NEW_VBO; + + return (unsigned char *)i915_render->vbo_ptr + i915->vbo_offset; +} + + +static boolean +i915_vbuf_render_set_primitive( struct vbuf_render *render, + unsigned prim ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + i915_render->prim = prim; + + switch(prim) { + case PIPE_PRIM_POINTS: + i915_render->hwprim = PRIM3D_POINTLIST; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_LINES: + i915_render->hwprim = PRIM3D_LINELIST; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_LINE_LOOP: + i915_render->hwprim = PRIM3D_LINELIST; + i915_render->fallback = PIPE_PRIM_LINE_LOOP; + return TRUE; + case PIPE_PRIM_LINE_STRIP: + i915_render->hwprim = PRIM3D_LINESTRIP; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_TRIANGLES: + i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_TRIANGLE_STRIP: + i915_render->hwprim = PRIM3D_TRISTRIP; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_TRIANGLE_FAN: + i915_render->hwprim = PRIM3D_TRIFAN; + i915_render->fallback = 0; + return TRUE; + case PIPE_PRIM_QUADS: + i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = PIPE_PRIM_QUADS; + return TRUE; + case PIPE_PRIM_QUAD_STRIP: + i915_render->hwprim = PRIM3D_TRILIST; + i915_render->fallback = PIPE_PRIM_QUAD_STRIP; + return TRUE; + case PIPE_PRIM_POLYGON: + i915_render->hwprim = PRIM3D_POLY; + i915_render->fallback = 0; + return TRUE; + default: + assert((int)"Error unkown primtive type" & 0); + /* Actually, can handle a lot more just fine... Fixme. + */ + return FALSE; + } +} + + + +/** + * Used for fallbacks in draw_arrays + */ +static void +draw_arrays_generate_indices( struct vbuf_render *render, + unsigned start, uint nr, + unsigned type ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned i; + unsigned end = start + nr; + switch(type) { + case 0: + for (i = start; i+1 < end; i += 2) + OUT_BATCH( (i+0) | (i+1) << 16 ); + if (i < end) + OUT_BATCH( i ); + break; + case PIPE_PRIM_LINE_LOOP: + if (nr >= 2) { + for (i = start + 1; i < end; i++) + OUT_BATCH( (i-0) | (i+0) << 16 ); + OUT_BATCH( (i-0) | ( start) << 16 ); + } + break; + case PIPE_PRIM_QUADS: + for (i = start; i + 3 < end; i += 4) { + OUT_BATCH( (i+0) | (i+1) << 16 ); + OUT_BATCH( (i+3) | (i+1) << 16 ); + OUT_BATCH( (i+2) | (i+3) << 16 ); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = start; i + 3 < end; i += 2) { + OUT_BATCH( (i+0) | (i+1) << 16 ); + OUT_BATCH( (i+3) | (i+2) << 16 ); + OUT_BATCH( (i+0) | (i+3) << 16 ); + } + break; + default: + assert(0); + } +} + +static unsigned +draw_arrays_calc_nr_indices( uint nr, unsigned type ) +{ + switch (type) { + case 0: + return nr; + case PIPE_PRIM_LINE_LOOP: + if (nr >= 2) + return nr * 2; + else + return 0; + case PIPE_PRIM_QUADS: + return (nr / 4) * 6; + case PIPE_PRIM_QUAD_STRIP: + return ((nr - 2) / 2) * 6; + default: + assert(0); + return 0; + } +} + +static void +draw_arrays_fallback( struct vbuf_render *render, + unsigned start, + uint nr ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned nr_indices; + + if (i915->dirty) + i915_update_derived( i915 ); + + if (i915->hardware_dirty) + i915_emit_hardware_state( i915 ); + + nr_indices = draw_arrays_calc_nr_indices( nr, i915_render->fallback ); + if (!nr_indices) + return; + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + FLUSH_BATCH(NULL); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived( i915 ); + i915_emit_hardware_state( i915 ); + i915->vbo_flushed = 1; + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + assert(0); + goto out; + } + } + OUT_BATCH( _3DPRIMITIVE | + PRIM_INDIRECT | + i915_render->hwprim | + PRIM_INDIRECT_ELTS | + nr_indices ); + + draw_arrays_generate_indices( render, start, nr, i915_render->fallback ); + +out: + return; +} + +static void +i915_vbuf_render_draw_arrays( struct vbuf_render *render, + unsigned start, + uint nr ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + + if (i915_render->fallback) { + draw_arrays_fallback( render, start, nr ); + return; + } + + /* JB: TODO submit direct cmds */ + draw_arrays_fallback( render, start, nr ); +} + +/** + * Used for normal and fallback emitting of indices + * If type is zero normal operation assumed. + */ +static void +draw_generate_indices( struct vbuf_render *render, + const ushort *indices, + uint nr_indices, + unsigned type ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned i; + + switch(type) { + case 0: + for (i = 0; i + 1 < nr_indices; i += 2) { + OUT_BATCH( indices[i] | indices[i+1] << 16 ); + } + if (i < nr_indices) { + OUT_BATCH( indices[i] ); + } + break; + case PIPE_PRIM_LINE_LOOP: + if (nr_indices >= 2) { + for (i = 1; i < nr_indices; i++) + OUT_BATCH( indices[i-1] | indices[i] << 16 ); + OUT_BATCH( indices[i-1] | indices[0] << 16 ); + } + break; + case PIPE_PRIM_QUADS: + for (i = 0; i + 3 < nr_indices; i += 4) { + OUT_BATCH( indices[i+0] | indices[i+1] << 16 ); + OUT_BATCH( indices[i+3] | indices[i+1] << 16 ); + OUT_BATCH( indices[i+2] | indices[i+3] << 16 ); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = 0; i + 3 < nr_indices; i += 2) { + OUT_BATCH( indices[i+0] | indices[i+1] << 16 ); + OUT_BATCH( indices[i+3] | indices[i+2] << 16 ); + OUT_BATCH( indices[i+0] | indices[i+3] << 16 ); + } + break; + default: + assert(0); + break; + } +} + +static unsigned +draw_calc_nr_indices( uint nr_indices, unsigned type ) +{ + switch (type) { + case 0: + return nr_indices; + case PIPE_PRIM_LINE_LOOP: + if (nr_indices >= 2) + return nr_indices * 2; + else + return 0; + case PIPE_PRIM_QUADS: + return (nr_indices / 4) * 6; + case PIPE_PRIM_QUAD_STRIP: + return ((nr_indices - 2) / 2) * 6; + default: + assert(0); + return 0; + } +} + +static void +i915_vbuf_render_draw( struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + unsigned save_nr_indices; + + save_nr_indices = nr_indices; + + nr_indices = draw_calc_nr_indices( nr_indices, i915_render->fallback ); + if (!nr_indices) + return; + + if (i915->dirty) + i915_update_derived( i915 ); + + if (i915->hardware_dirty) + i915_emit_hardware_state( i915 ); + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + FLUSH_BATCH(NULL); + + /* Make sure state is re-emitted after a flush: + */ + i915_update_derived( i915 ); + i915_emit_hardware_state( i915 ); + i915->vbo_flushed = 1; + + if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { + assert(0); + goto out; + } + } + + OUT_BATCH( _3DPRIMITIVE | + PRIM_INDIRECT | + i915_render->hwprim | + PRIM_INDIRECT_ELTS | + nr_indices ); + draw_generate_indices( render, + indices, + save_nr_indices, + i915_render->fallback ); + +out: + return; +} + + +static void +i915_vbuf_render_release_vertices( struct vbuf_render *render, + void *vertices, + unsigned vertex_size, + unsigned vertices_used ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + size_t size = (size_t)vertex_size * (size_t)vertices_used; + + assert(i915->vbo); + + i915_render->vbo_offset += size; + i915->vbo = NULL; + i915->dirty |= I915_NEW_VBO; +} + + +static void +i915_vbuf_render_destroy( struct vbuf_render *render ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + FREE(i915_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +i915_vbuf_render_create( struct i915_context *i915 ) +{ + struct i915_vbuf_render *i915_render = CALLOC_STRUCT(i915_vbuf_render); + struct pipe_screen *screen = i915->pipe.screen; + + i915_render->i915 = i915; + + i915_render->base.max_vertex_buffer_bytes = 128*1024; + + /* NOTE: it must be such that state and vertices indices fit in a single + * batch buffer. + */ + i915_render->base.max_indices = 16*1024; + + i915_render->base.get_vertex_info = i915_vbuf_render_get_vertex_info; + i915_render->base.allocate_vertices = i915_vbuf_render_allocate_vertices; + i915_render->base.set_primitive = i915_vbuf_render_set_primitive; + i915_render->base.draw = i915_vbuf_render_draw; + i915_render->base.draw_arrays = i915_vbuf_render_draw_arrays; + i915_render->base.release_vertices = i915_vbuf_render_release_vertices; + i915_render->base.destroy = i915_vbuf_render_destroy; + + i915_render->vbo_alloc_size = 128 * 4096; + i915_render->vbo_size = i915_render->vbo_alloc_size; + i915_render->vbo_offset = 0; + i915_render->vbo = pipe_buffer_create(screen, + 64, + I915_BUFFER_USAGE_LIT_VERTEX, + i915_render->vbo_size); + i915_render->vbo_ptr = pipe_buffer_map(screen, + i915_render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + pipe_buffer_unmap(screen, i915_render->vbo); + + return &i915_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *i915_draw_vbuf_stage( struct i915_context *i915 ) +{ + struct vbuf_render *render; + struct draw_stage *stage; + + render = i915_vbuf_render_create(i915); + if(!render) + return NULL; + + stage = draw_vbuf_stage( i915->draw, render ); + if(!stage) { + render->destroy(render); + return NULL; + } + /** TODO JB: this shouldn't be here */ + draw_set_render(i915->draw, render); + + return stage; +} diff --git a/src/gallium/drivers/i915simple/i915_reg.h b/src/gallium/drivers/i915simple/i915_reg.h new file mode 100644 index 0000000000..04620fec68 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_reg.h @@ -0,0 +1,978 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef I915_REG_H +#define I915_REG_H + + +#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value) + +#define CMD_3D (0x3<<29) + +#define PRIM3D_INLINE (CMD_3D | (0x1f<<24)) +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_CLEAR_RECT (0xa<<18) +#define PRIM3D_ZONE_INIT (0xd<<18) +#define PRIM3D_MASK (0x1f<<18) + +/* p137 */ +#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 0 +#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) +#define AA_LINE_REGION_WIDTH_0_5 0 +#define AA_LINE_REGION_WIDTH_1_0 (1<<6) +#define AA_LINE_REGION_WIDTH_2_0 (2<<6) +#define AA_LINE_REGION_WIDTH_4_0 (3<<6) + +/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/ +#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24)) +#define BFO_ENABLE_STENCIL_REF (1<<23) +#define BFO_STENCIL_REF_SHIFT 15 +#define BFO_STENCIL_REF_MASK (0xff<<15) +#define BFO_ENABLE_STENCIL_FUNCS (1<<14) +#define BFO_STENCIL_TEST_SHIFT 11 +#define BFO_STENCIL_TEST_MASK (0x7<<11) +#define BFO_STENCIL_FAIL_SHIFT 8 +#define BFO_STENCIL_FAIL_MASK (0x7<<8) +#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5 +#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5) +#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2 +#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2) +#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1) +#define BFO_STENCIL_TWO_SIDE (1<<0) + + +/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */ +#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24)) +#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17) +#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16) +#define BFM_STENCIL_TEST_MASK_SHIFT 8 +#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8) +#define BFM_STENCIL_WRITE_MASK_SHIFT 0 +#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0) + + + +/* 3DSTATE_BIN_CONTROL p141 */ + +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + + +/* 3DSTATE_CHROMA_KEY */ + +/* 3DSTATE_CLEAR_PARAMETERS, p150 */ +#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5) +/* Dword 1 */ +#define CLEARPARAM_CLEAR_RECT (1 << 16) +#define CLEARPARAM_ZONE_INIT (0 << 16) +#define CLEARPARAM_WRITE_COLOR (1 << 2) +#define CLEARPARAM_WRITE_DEPTH (1 << 1) +#define CLEARPARAM_WRITE_STENCIL (1 << 0) + +/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */ +#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) + + + +/* 3DSTATE_COORD_SET_BINDINGS, p154 */ +#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24)) +#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3)) + +/* p156 */ +#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) + +/* p157 */ +#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) + +/* p158 */ +#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) + + +/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */ +#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16)) +/* scale in dword 1 */ + + +/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ +#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | 0x2) + +/* p161 */ +#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define TEX_DEFAULT_COLOR_OGL (0<<30) +#define TEX_DEFAULT_COLOR_D3D (1<<30) +#define ZR_EARLY_DEPTH (1<<29) +#define LOD_PRECLAMP_OGL (1<<28) +#define LOD_PRECLAMP_D3D (0<<28) +#define DITHER_FULL_ALWAYS (0<<26) +#define DITHER_FULL_ON_FB_BLEND (1<<26) +#define DITHER_CLAMPED_ALWAYS (2<<26) +#define LINEAR_GAMMA_BLEND_32BPP (1<<25) +#define DEBUG_DISABLE_ENH_DITHER (1<<24) +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL 0 +#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) +#define COLOR_BUF_8BIT 0 +#define COLOR_BUF_RGB555 (1<<8) +#define COLOR_BUF_RGB565 (2<<8) +#define COLOR_BUF_ARGB8888 (3<<8) +#define DEPTH_FRMT_16_FIXED 0 +#define DEPTH_FRMT_16_FLOAT (1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) +#define VERT_LINE_STRIDE_1 (1<<1) +#define VERT_LINE_STRIDE_0 (0<<1) +#define VERT_LINE_STRIDE_OFS_1 1 +#define VERT_LINE_STRIDE_OFS_0 0 + +/* p166 */ +#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) +#define DRAW_DITHER_OFS_X(x) ((x)<<26) +#define DRAW_DITHER_OFS_Y(x) ((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x) ((x)<<16) +#define DRAW_XMIN(x) (x) +/* Dword 3 */ +#define DRAW_YMAX(x) ((x)<<16) +#define DRAW_XMAX(x) (x) +/* Dword 4 */ +#define DRAW_YORG(x) ((x)<<16) +#define DRAW_XORG(x) (x) + + +/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */ + +/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */ + + +/* _3DSTATE_FOG_COLOR, p173 */ +#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x) ((x)<<16) +#define FOG_COLOR_GREEN(x) ((x)<<8) +#define FOG_COLOR_BLUE(x) (x) + +/* _3DSTATE_FOG_MODE, p174 */ +#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31) +#define FMC1_FOGFUNC_VERTEX (0<<28) +#define FMC1_FOGFUNC_PIXEL_EXP (1<<28) +#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28) +#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28) +#define FMC1_FOGFUNC_MASK (3<<28) +#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27) +#define FMC1_FOGINDEX_Z (0<<25) +#define FMC1_FOGINDEX_W (1<<25) +#define FMC1_C1_C2_MODIFY_ENABLE (1<<24) +#define FMC1_DENSITY_MODIFY_ENABLE (1<<23) +#define FMC1_C1_ONE (1<<13) +#define FMC1_C1_MASK (0xffff<<4) +/* Dword 2 */ +#define FMC2_C2_ONE (1<<16) +/* Dword 3 */ +#define FMC3_D_ONE (1<<16) + + + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */ +#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) +#define IAB_MODIFY_ENABLE (1<<23) +#define IAB_ENABLE (1<<22) +#define IAB_MODIFY_FUNC (1<<21) +#define IAB_FUNC_SHIFT 16 +#define IAB_MODIFY_SRC_FACTOR (1<<11) +#define IAB_SRC_FACTOR_SHIFT 6 +#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6) +#define IAB_MODIFY_DST_FACTOR (1<<5) +#define IAB_DST_FACTOR_SHIFT 0 +#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0) + + +#define BLENDFUNC_ADD 0x0 +#define BLENDFUNC_SUBTRACT 0x1 +#define BLENDFUNC_REVERSE_SUBTRACT 0x2 +#define BLENDFUNC_MIN 0x3 +#define BLENDFUNC_MAX 0x4 +#define BLENDFUNC_MASK 0x7 + +/* 3DSTATE_LOAD_INDIRECT, p180 */ + +#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16)) +#define LI0_STATE_STATIC_INDIRECT (0x01<<8) +#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8) +#define LI0_STATE_SAMPLER (0x04<<8) +#define LI0_STATE_MAP (0x08<<8) +#define LI0_STATE_PROGRAM (0x10<<8) +#define LI0_STATE_CONSTANTS (0x20<<8) + +#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SIS0_FORCE_LOAD (1<<1) +#define SIS0_BUFFER_VALID (1<<0) +#define SIS1_BUFFER_LENGTH(x) ((x)&0xff) + +#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define DIS0_BUFFER_RESET (1<<1) +#define DIS0_BUFFER_VALID (1<<0) + +#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SSB0_FORCE_LOAD (1<<1) +#define SSB0_BUFFER_VALID (1<<0) +#define SSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define MSB0_FORCE_LOAD (1<<1) +#define MSB0_BUFFER_VALID (1<<0) +#define MSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSP0_FORCE_LOAD (1<<1) +#define PSP0_BUFFER_VALID (1<<0) +#define PSP1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSC0_FORCE_LOAD (1<<1) +#define PSC0_BUFFER_VALID (1<<0) +#define PSC1_BUFFER_LENGTH(x) ((x)&0xff) + + + + + +/* _3DSTATE_RASTERIZATION_RULES */ +#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE (1<<15) +#define OGL_POINT_RASTER_RULE (1<<13) +#define ENABLE_TEXKILL_3D_4D (1<<10) +#define TEXKILL_3D (0<<9) +#define TEXKILL_4D (1<<9) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) + +/* _3DSTATE_SCISSOR_ENABLE, p256 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT ((1<<1) | 1) +#define DISABLE_SCISSOR_RECT (1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) +#define SCISSOR_RECT_0_XMIN(x) (x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) +#define SCISSOR_RECT_0_XMAX(x) (x) + +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +#define S0_VB_OFFSET_MASK 0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0) + +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + + + +#define DST_BLND_FACT(f) ((f)<<S6_CBUF_DST_BLEND_FACT_SHIFT) +#define SRC_BLND_FACT(f) ((f)<<S6_CBUF_SRC_BLEND_FACT_SHIFT) +#define DST_ABLND_FACT(f) ((f)<<IAB_DST_FACTOR_SHIFT) +#define SRC_ABLND_FACT(f) ((f)<<IAB_SRC_FACTOR_SHIFT) + + + + +/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */ + +/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */ +#define _3DSTATE_MAP_PALETTE_LOAD_32 (CMD_3D|(0x1d<<24)|(0x8f<<16)) +/* subsequent dwords up to length (max 16) are ARGB8888 color values */ + +/* _3DSTATE_MODES_4, p218 */ +#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x0d<<24)) +#define ENABLE_LOGIC_OP_FUNC (1<<23) +#define LOGIC_OP_FUNC(x) ((x)<<18) +#define LOGICOP_MASK (0xf<<18) +#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00)) +#define ENABLE_STENCIL_TEST_MASK (1<<17) +#define STENCIL_TEST_MASK(x) (((x)&0xff)<<8) +#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff)) +#define ENABLE_STENCIL_WRITE_MASK (1<<16) +#define STENCIL_WRITE_MASK(x) ((x)&0xff) + +/* _3DSTATE_MODES_5, p220 */ +#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24)) +#define PIPELINE_FLUSH_RENDER_CACHE (1<<18) +#define PIPELINE_FLUSH_TEXTURE_CACHE (1<<16) + + +/* p221 */ +#define _3DSTATE_PIXEL_SHADER_CONSTANTS (CMD_3D|(0x1d<<24)|(0x6<<16)) +#define PS1_REG(n) (1<<(n)) +#define PS2_CONST_X(n) (n) +#define PS3_CONST_Y(n) (n) +#define PS4_CONST_Z(n) (n) +#define PS5_CONST_W(n) (n) + +/* p222 */ + + +#define I915_MAX_TEX_INDIRECT 4 +#define I915_MAX_TEX_INSN 32 +#define I915_MAX_ALU_INSN 64 +#define I915_MAX_DECL_INSN 27 +#define I915_MAX_TEMPORARY 16 + + +/* Each instruction is 3 dwords long, though most don't require all + * this space. Maximum of 123 instructions. Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_MASK 0x7 +#define REG_NR_MASK 0xf + + +/* REG_TYPE_T: + */ +#define T_TEX0 0 +#define T_TEX1 1 +#define T_TEX2 2 +#define T_TEX3 3 +#define T_TEX4 4 +#define T_TEX5 5 +#define T_TEX6 6 +#define T_TEX7 7 +#define T_DIFFUSE 8 +#define T_SPECULAR 9 +#define T_FOG_W 10 /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww + */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + + + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + + + +/* p207 */ +#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16)) + +#define MS1_MAPMASK_SHIFT 0 +#define MS1_MAPMASK_MASK (0x8fff<<0) + +#define MS2_UNTRUSTED_SURFACE (1<<31) +#define MS2_ADDRESS_MASK 0xfffffffc +#define MS2_VERTICAL_LINE_STRIDE (1<<1) +#define MS2_VERTICAL_OFFSET (1<<1) + +#define MS3_HEIGHT_SHIFT 21 +#define MS3_WIDTH_SHIFT 10 +#define MS3_PALETTE_SELECT (1<<9) +#define MS3_MAPSURF_FORMAT_SHIFT 7 +#define MS3_MAPSURF_FORMAT_MASK (0x7<<7) +#define MAPSURF_8BIT (1<<7) +#define MAPSURF_16BIT (2<<7) +#define MAPSURF_32BIT (3<<7) +#define MAPSURF_422 (5<<7) +#define MAPSURF_COMPRESSED (6<<7) +#define MAPSURF_4BIT_INDEXED (7<<7) +#define MS3_MT_FORMAT_MASK (0x7 << 3) +#define MS3_MT_FORMAT_SHIFT 3 +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_L8 (1<<3) +#define MT_8BIT_A8 (4<<3) +#define MT_8BIT_MONO8 (5<<3) +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_ARGB1555 (1<<3) +#define MT_16BIT_ARGB4444 (2<<3) +#define MT_16BIT_AY88 (3<<3) +#define MT_16BIT_88DVDU (5<<3) +#define MT_16BIT_BUMP_655LDVDU (6<<3) +#define MT_16BIT_I16 (7<<3) +#define MT_16BIT_L16 (8<<3) +#define MT_16BIT_A16 (9<<3) +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) +#define MT_32BIT_XBGR8888 (3<<3) +#define MT_32BIT_QWVU8888 (4<<3) +#define MT_32BIT_AXVU8888 (5<<3) +#define MT_32BIT_LXVU8888 (6<<3) +#define MT_32BIT_XLVU8888 (7<<3) +#define MT_32BIT_ARGB2101010 (8<<3) +#define MT_32BIT_ABGR2101010 (9<<3) +#define MT_32BIT_AWVU2101010 (0xA<<3) +#define MT_32BIT_GR1616 (0xB<<3) +#define MT_32BIT_VU1616 (0xC<<3) +#define MT_32BIT_xI824 (0xD<<3) +#define MT_32BIT_xA824 (0xE<<3) +#define MT_32BIT_xL824 (0xF<<3) +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_NORMAL (1<<3) +#define MT_422_YCRCB_SWAPUV (2<<3) +#define MT_422_YCRCB_SWAPUVY (3<<3) +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT2_3 (1<<3) +#define MT_COMPRESS_DXT4_5 (2<<3) +#define MT_COMPRESS_FXT1 (3<<3) +#define MT_COMPRESS_DXT1_RGB (4<<3) +#define MS3_USE_FENCE_REGS (1<<2) +#define MS3_TILED_SURFACE (1<<1) +#define MS3_TILE_WALK (1<<0) + +#define MS4_PITCH_SHIFT 21 +#define MS4_CUBE_FACE_ENA_NEGX (1<<20) +#define MS4_CUBE_FACE_ENA_POSX (1<<19) +#define MS4_CUBE_FACE_ENA_NEGY (1<<18) +#define MS4_CUBE_FACE_ENA_POSY (1<<17) +#define MS4_CUBE_FACE_ENA_NEGZ (1<<16) +#define MS4_CUBE_FACE_ENA_POSZ (1<<15) +#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15) +#define MS4_MAX_LOD_SHIFT 9 +#define MS4_MAX_LOD_MASK (0x3f<<9) +#define MS4_MIP_LAYOUT_LEGACY (0<<8) +#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8) +#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8) +#define MS4_VOLUME_DEPTH_SHIFT 0 +#define MS4_VOLUME_DEPTH_MASK (0xff<<0) + +/* p244 */ +#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16)) + +#define SS1_MAPMASK_SHIFT 0 +#define SS1_MAPMASK_MASK (0x8fff<<0) + +#define SS2_REVERSE_GAMMA_ENABLE (1<<31) +#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30) +#define SS2_COLORSPACE_CONVERSION (1<<29) +#define SS2_CHROMAKEY_SHIFT 27 +#define SS2_BASE_MIP_LEVEL_SHIFT 22 +#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22) +#define SS2_MIP_FILTER_SHIFT 20 +#define SS2_MIP_FILTER_MASK (0x3<<20) +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 +#define SS2_MAG_FILTER_SHIFT 17 +#define SS2_MAG_FILTER_MASK (0x7<<17) +#define FILTER_NEAREST 0 +#define FILTER_LINEAR 1 +#define FILTER_ANISOTROPIC 2 +#define FILTER_4X4_1 3 +#define FILTER_4X4_2 4 +#define FILTER_4X4_FLAT 5 +#define FILTER_6X5_MONO 6 /* XXX - check */ +#define SS2_MIN_FILTER_SHIFT 14 +#define SS2_MIN_FILTER_MASK (0x7<<14) +#define SS2_LOD_BIAS_SHIFT 5 +#define SS2_LOD_BIAS_ONE (0x10<<5) +#define SS2_LOD_BIAS_MASK (0x1ff<<5) +/* Shadow requires: + * MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format + * FILTER_4X4_x MIN and MAG filters + */ +#define SS2_SHADOW_ENABLE (1<<4) +#define SS2_MAX_ANISO_MASK (1<<3) +#define SS2_MAX_ANISO_2 (0<<3) +#define SS2_MAX_ANISO_4 (1<<3) +#define SS2_SHADOW_FUNC_SHIFT 0 +#define SS2_SHADOW_FUNC_MASK (0x7<<0) +/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */ + +#define SS3_MIN_LOD_SHIFT 24 +#define SS3_MIN_LOD_ONE (0x10<<24) +#define SS3_MIN_LOD_MASK (0xff<<24) +#define SS3_KILL_PIXEL_ENABLE (1<<17) +#define SS3_TCX_ADDR_MODE_SHIFT 12 +#define SS3_TCX_ADDR_MODE_MASK (0x7<<12) +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP_EDGE 2 +#define TEXCOORDMODE_CUBE 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORDMODE_MIRROR_ONCE 5 +#define SS3_TCY_ADDR_MODE_SHIFT 9 +#define SS3_TCY_ADDR_MODE_MASK (0x7<<9) +#define SS3_TCZ_ADDR_MODE_SHIFT 6 +#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6) +#define SS3_NORMALIZED_COORDS (1<<5) +#define SS3_TEXTUREMAP_INDEX_SHIFT 1 +#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1) +#define SS3_DEINTERLACER_ENABLE (1<<0) + +#define SS4_BORDER_COLOR_MASK (~0) + +/* 3DSTATE_SPAN_STIPPLE, p258 + */ +#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE (1<<16) +#define ST1_MASK (0xffff) + +#define _3DSTATE_DEFAULT_Z ((0x3<<29)|(0x1d<<24)|(0x98<<16)) +#define _3DSTATE_DEFAULT_DIFFUSE ((0x3<<29)|(0x1d<<24)|(0x99<<16)) +#define _3DSTATE_DEFAULT_SPECULAR ((0x3<<29)|(0x1d<<24)|(0x9a<<16)) + + +#define MI_FLUSH ((0<<29)|(4<<23)) +#define FLUSH_MAP_CACHE (1<<0) +#define INHIBIT_FLUSH_RENDER_CACHE (1<<2) + + +#define CMD_3D (0x3<<29) + + +#define _3DPRIMITIVE ((0x3<<29)|(0x1f<<24)) +#define PRIM_INDIRECT (1<<23) +#define PRIM_INLINE (0<<23) +#define PRIM_INDIRECT_SEQUENTIAL (0<<17) +#define PRIM_INDIRECT_ELTS (1<<17) + +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define I915PACKCOLOR4444(r,g,b,a) \ + ((((a) & 0xf0) << 8) | (((r) & 0xf0) << 4) | ((g) & 0xf0) | ((b) >> 4)) + +#define I915PACKCOLOR1555(r,g,b,a) \ + ((((r) & 0xf8) << 7) | (((g) & 0xf8) << 2) | (((b) & 0xf8) >> 3) | \ + ((a) ? 0x8000 : 0)) + +#define I915PACKCOLOR565(r,g,b) \ + ((((r) & 0xf8) << 8) | (((g) & 0xfc) << 3) | (((b) & 0xf8) >> 3)) + +#define I915PACKCOLOR8888(r,g,b,a) \ + ((a<<24) | (r<<16) | (g<<8) | b) + + + + +#define BR00_BITBLT_CLIENT 0x40000000 +#define BR00_OP_COLOR_BLT 0x10000000 +#define BR00_OP_SRC_COPY_BLT 0x10C00000 +#define BR13_SOLID_PATTERN 0x80000000 + +#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4) +#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) +#define XY_COLOR_BLT_WRITE_RGB (1<<20) + +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) +#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) + +#define MI_WAIT_FOR_EVENT ((0x3<<23)) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) + +#define MI_BATCH_BUFFER (0x30<<23) +#define MI_BATCH_BUFFER_START (0x31<<23) +#define MI_BATCH_BUFFER_END (0xa<<23) + + + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +#define STENCILOP_KEEP 0 +#define STENCILOP_ZERO 0x1 +#define STENCILOP_REPLACE 0x2 +#define STENCILOP_INCRSAT 0x3 +#define STENCILOP_DECRSAT 0x4 +#define STENCILOP_INCR 0x5 +#define STENCILOP_DECR 0x6 +#define STENCILOP_INVERT 0x7 + +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 0x1 +#define LOGICOP_AND_INV 0x2 +#define LOGICOP_COPY_INV 0x3 +#define LOGICOP_AND_RVRSE 0x4 +#define LOGICOP_INV 0x5 +#define LOGICOP_XOR 0x6 +#define LOGICOP_NAND 0x7 +#define LOGICOP_AND 0x8 +#define LOGICOP_EQUIV 0x9 +#define LOGICOP_NOOP 0xa +#define LOGICOP_OR_INV 0xb +#define LOGICOP_COPY 0xc +#define LOGICOP_OR_RVRSE 0xd +#define LOGICOP_OR 0xe +#define LOGICOP_SET 0xf + +#define BLENDFACT_ZERO 0x01 +#define BLENDFACT_ONE 0x02 +#define BLENDFACT_SRC_COLR 0x03 +#define BLENDFACT_INV_SRC_COLR 0x04 +#define BLENDFACT_SRC_ALPHA 0x05 +#define BLENDFACT_INV_SRC_ALPHA 0x06 +#define BLENDFACT_DST_ALPHA 0x07 +#define BLENDFACT_INV_DST_ALPHA 0x08 +#define BLENDFACT_DST_COLR 0x09 +#define BLENDFACT_INV_DST_COLR 0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACT_CONST_COLOR 0x0c +#define BLENDFACT_INV_CONST_COLOR 0x0d +#define BLENDFACT_CONST_ALPHA 0x0e +#define BLENDFACT_INV_CONST_ALPHA 0x0f +#define BLENDFACT_MASK 0x0f + +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_Q33_G 0x29D2 + + +#endif diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c new file mode 100644 index 0000000000..1c976082df --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -0,0 +1,284 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/u_string.h" + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_screen.h" +#include "i915_texture.h" + + +static const char * +i915_get_vendor( struct pipe_screen *pscreen ) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +i915_get_name( struct pipe_screen *pscreen ) +{ + static char buffer[128]; + const char *chipset; + + switch (i915_screen(pscreen)->pci_id) { + case PCI_CHIP_I915_G: + chipset = "915G"; + break; + case PCI_CHIP_I915_GM: + chipset = "915GM"; + break; + case PCI_CHIP_I945_G: + chipset = "945G"; + break; + case PCI_CHIP_I945_GM: + chipset = "945GM"; + break; + case PCI_CHIP_I945_GME: + chipset = "945GME"; + break; + case PCI_CHIP_G33_G: + chipset = "G33"; + break; + case PCI_CHIP_Q35_G: + chipset = "Q35"; + break; + case PCI_CHIP_Q33_G: + chipset = "Q33"; + break; + default: + chipset = "unknown"; + break; + } + + util_snprintf(buffer, sizeof(buffer), "i915 (chipset: %s)", chipset); + return buffer; +} + + +static int +i915_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + + +static float +i915_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + + +static boolean +i915_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) +{ + static const enum pipe_format tex_supported[] = { + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_A8L8_UNORM, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_NONE /* list terminator */ + }; + static const enum pipe_format surface_supported[] = { + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + /*PIPE_FORMAT_R16G16B16A16_SNORM,*/ + PIPE_FORMAT_NONE /* list terminator */ + }; + const enum pipe_format *list; + uint i; + + if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + list = surface_supported; + else + list = tex_supported; + + for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { + if (list[i] == format) + return TRUE; + } + + return FALSE; +} + + +static void +i915_destroy_screen( struct pipe_screen *screen ) +{ + struct pipe_winsys *winsys = screen->winsys; + + if(winsys->destroy) + winsys->destroy(winsys); + + FREE(screen); +} + + +static void * +i915_surface_map( struct pipe_screen *screen, + struct pipe_surface *surface, + unsigned flags ) +{ + char *map = pipe_buffer_map( screen, surface->buffer, flags ); + if (map == NULL) + return NULL; + + if (surface->texture && + (flags & PIPE_BUFFER_USAGE_CPU_WRITE)) + { + /* Do something to notify contexts of a texture change. + */ + /* i915_screen(screen)->timestamp++; */ + } + + return map + surface->offset; +} + +static void +i915_surface_unmap(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + pipe_buffer_unmap( screen, surface->buffer ); +} + + + +/** + * Create a new i915_screen object + */ +struct pipe_screen * +i915_create_screen(struct pipe_winsys *winsys, uint pci_id) +{ + struct i915_screen *i915screen = CALLOC_STRUCT(i915_screen); + + if (!i915screen) + return NULL; + + switch (pci_id) { + case PCI_CHIP_I915_G: + case PCI_CHIP_I915_GM: + i915screen->is_i945 = FALSE; + break; + + case PCI_CHIP_I945_G: + case PCI_CHIP_I945_GM: + case PCI_CHIP_I945_GME: + case PCI_CHIP_G33_G: + case PCI_CHIP_Q33_G: + case PCI_CHIP_Q35_G: + i915screen->is_i945 = TRUE; + break; + + default: + debug_printf("%s: unknown pci id 0x%x, cannot create screen\n", + __FUNCTION__, pci_id); + return NULL; + } + + i915screen->pci_id = pci_id; + + i915screen->screen.winsys = winsys; + + i915screen->screen.destroy = i915_destroy_screen; + + i915screen->screen.get_name = i915_get_name; + i915screen->screen.get_vendor = i915_get_vendor; + i915screen->screen.get_param = i915_get_param; + i915screen->screen.get_paramf = i915_get_paramf; + i915screen->screen.is_format_supported = i915_is_format_supported; + i915screen->screen.surface_map = i915_surface_map; + i915screen->screen.surface_unmap = i915_surface_unmap; + + i915_init_screen_texture_functions(&i915screen->screen); + + return &i915screen->screen; +} diff --git a/src/gallium/drivers/i915simple/i915_screen.h b/src/gallium/drivers/i915simple/i915_screen.h new file mode 100644 index 0000000000..73b0ff05ce --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_screen.h @@ -0,0 +1,69 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef I915_SCREEN_H +#define I915_SCREEN_H + + +#include "pipe/p_screen.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Subclass of pipe_screen + */ +struct i915_screen +{ + struct pipe_screen screen; + + boolean is_i945; + uint pci_id; +}; + + +/** cast wrapper */ +static INLINE struct i915_screen * +i915_screen(struct pipe_screen *pscreen) +{ + return (struct i915_screen *) pscreen; +} + + +extern struct pipe_screen * +i915_create_screen(struct pipe_winsys *winsys, uint pci_id); + + +#ifdef __cplusplus +} +#endif + +#endif /* I915_SCREEN_H */ diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c new file mode 100644 index 0000000000..d2487d8277 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -0,0 +1,788 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "draw/draw_context.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_parse.h" + +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "i915_state_inlines.h" +#include "i915_fpc.h" + +/* The i915 (and related graphics cores) do not support GL_CLAMP. The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static unsigned +translate_wrap_mode(unsigned wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return TEXCOORDMODE_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return TEXCOORDMODE_CLAMP_EDGE; /* not quite correct */ + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return TEXCOORDMODE_CLAMP_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return TEXCOORDMODE_CLAMP_BORDER; +// case PIPE_TEX_WRAP_MIRRORED_REPEAT: +// return TEXCOORDMODE_MIRROR; + default: + return TEXCOORDMODE_WRAP; + } +} + +static unsigned translate_img_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return FILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return FILTER_LINEAR; + case PIPE_TEX_FILTER_ANISO: + return FILTER_ANISOTROPIC; + default: + assert(0); + return FILTER_NEAREST; + } +} + +static unsigned translate_mip_filter( unsigned filter ) +{ + switch (filter) { + case PIPE_TEX_MIPFILTER_NONE: + return MIPFILTER_NONE; + case PIPE_TEX_MIPFILTER_NEAREST: + return MIPFILTER_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: + return MIPFILTER_LINEAR; + default: + assert(0); + return MIPFILTER_NONE; + } +} + + +/* None of this state is actually used for anything yet. + */ +static void * +i915_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + struct i915_blend_state *cso_data = CALLOC_STRUCT( i915_blend_state ); + + { + unsigned eqRGB = blend->rgb_func; + unsigned srcRGB = blend->rgb_src_factor; + unsigned dstRGB = blend->rgb_dst_factor; + + unsigned eqA = blend->alpha_func; + unsigned srcA = blend->alpha_src_factor; + unsigned dstA = blend->alpha_dst_factor; + + /* Special handling for MIN/MAX filter modes handled at + * state_tracker level. + */ + + if (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB) { + + cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + IAB_ENABLE | + IAB_MODIFY_FUNC | + IAB_MODIFY_SRC_FACTOR | + IAB_MODIFY_DST_FACTOR | + SRC_ABLND_FACT(i915_translate_blend_factor(srcA)) | + DST_ABLND_FACT(i915_translate_blend_factor(dstA)) | + (i915_translate_blend_func(eqA) << IAB_FUNC_SHIFT)); + } + else { + cso_data->iab = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + 0); + } + } + + cso_data->modes4 |= (_3DSTATE_MODES_4_CMD | + ENABLE_LOGIC_OP_FUNC | + LOGIC_OP_FUNC(i915_translate_logic_op(blend->logicop_func))); + + if (blend->logicop_enable) + cso_data->LIS5 |= S5_LOGICOP_ENABLE; + + if (blend->dither) + cso_data->LIS5 |= S5_COLOR_DITHER_ENABLE; + + if ((blend->colormask & PIPE_MASK_R) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_RED; + + if ((blend->colormask & PIPE_MASK_G) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_GREEN; + + if ((blend->colormask & PIPE_MASK_B) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_BLUE; + + if ((blend->colormask & PIPE_MASK_A) == 0) + cso_data->LIS5 |= S5_WRITEDISABLE_ALPHA; + + if (blend->blend_enable) { + unsigned funcRGB = blend->rgb_func; + unsigned srcRGB = blend->rgb_src_factor; + unsigned dstRGB = blend->rgb_dst_factor; + + cso_data->LIS6 |= (S6_CBUF_BLEND_ENABLE | + SRC_BLND_FACT(i915_translate_blend_factor(srcRGB)) | + DST_BLND_FACT(i915_translate_blend_factor(dstRGB)) | + (i915_translate_blend_func(funcRGB) << S6_CBUF_BLEND_FUNC_SHIFT)); + } + + return cso_data; +} + +static void i915_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->blend = (struct i915_blend_state*)blend; + + i915->dirty |= I915_NEW_BLEND; +} + + +static void i915_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + FREE(blend); +} + +static void i915_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->blend_color = *blend_color; + + i915->dirty |= I915_NEW_BLEND; +} + +static void * +i915_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + struct i915_sampler_state *cso = CALLOC_STRUCT( i915_sampler_state ); + const unsigned ws = sampler->wrap_s; + const unsigned wt = sampler->wrap_t; + const unsigned wr = sampler->wrap_r; + unsigned minFilt, magFilt; + unsigned mipFilt; + + cso->templ = sampler; + + mipFilt = translate_mip_filter(sampler->min_mip_filter); + minFilt = translate_img_filter( sampler->min_img_filter ); + magFilt = translate_img_filter( sampler->mag_img_filter ); + + if (sampler->max_anisotropy > 2.0) { + cso->state[0] |= SS2_MAX_ANISO_4; + } + + { + int b = (int) (sampler->lod_bias * 16.0); + b = CLAMP(b, -256, 255); + cso->state[0] |= ((b << SS2_LOD_BIAS_SHIFT) & SS2_LOD_BIAS_MASK); + } + + /* Shadow: + */ + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) + { + cso->state[0] |= (SS2_SHADOW_ENABLE | + i915_translate_compare_func(sampler->compare_func)); + + minFilt = FILTER_4X4_FLAT; + magFilt = FILTER_4X4_FLAT; + } + + cso->state[0] |= ((minFilt << SS2_MIN_FILTER_SHIFT) | + (mipFilt << SS2_MIP_FILTER_SHIFT) | + (magFilt << SS2_MAG_FILTER_SHIFT)); + + cso->state[1] |= + ((translate_wrap_mode(ws) << SS3_TCX_ADDR_MODE_SHIFT) | + (translate_wrap_mode(wt) << SS3_TCY_ADDR_MODE_SHIFT) | + (translate_wrap_mode(wr) << SS3_TCZ_ADDR_MODE_SHIFT)); + + if (sampler->normalized_coords) + cso->state[1] |= SS3_NORMALIZED_COORDS; + + { + int minlod = (int) (16.0 * sampler->min_lod); + int maxlod = (int) (16.0 * sampler->max_lod); + minlod = CLAMP(minlod, 0, 16 * 11); + maxlod = CLAMP(maxlod, 0, 16 * 11); + + if (minlod > maxlod) + maxlod = minlod; + + cso->minlod = minlod; + cso->maxlod = maxlod; + } + + { + ubyte r = float_to_ubyte(sampler->border_color[0]); + ubyte g = float_to_ubyte(sampler->border_color[1]); + ubyte b = float_to_ubyte(sampler->border_color[2]); + ubyte a = float_to_ubyte(sampler->border_color[3]); + cso->state[2] = I915PACKCOLOR8888(r, g, b, a); + } + return cso; +} + +static void i915_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct i915_context *i915 = i915_context(pipe); + unsigned i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == i915->num_samplers && + !memcmp(i915->sampler, sampler, num * sizeof(void *))) + return; + + draw_flush(i915->draw); + + for (i = 0; i < num; ++i) + i915->sampler[i] = sampler[i]; + for (i = num; i < PIPE_MAX_SAMPLERS; ++i) + i915->sampler[i] = NULL; + + i915->num_samplers = num; + + i915->dirty |= I915_NEW_SAMPLER; +} + +static void i915_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE(sampler); +} + + +/** XXX move someday? Or consolidate all these simple state setters + * into one file. + */ + +static void * +i915_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + struct i915_depth_stencil_state *cso = CALLOC_STRUCT( i915_depth_stencil_state ); + + { + int testmask = depth_stencil->stencil[0].value_mask & 0xff; + int writemask = depth_stencil->stencil[0].write_mask & 0xff; + + cso->stencil_modes4 |= (_3DSTATE_MODES_4_CMD | + ENABLE_STENCIL_TEST_MASK | + STENCIL_TEST_MASK(testmask) | + ENABLE_STENCIL_WRITE_MASK | + STENCIL_WRITE_MASK(writemask)); + } + + if (depth_stencil->stencil[0].enabled) { + int test = i915_translate_compare_func(depth_stencil->stencil[0].func); + int fop = i915_translate_stencil_op(depth_stencil->stencil[0].fail_op); + int dfop = i915_translate_stencil_op(depth_stencil->stencil[0].zfail_op); + int dpop = i915_translate_stencil_op(depth_stencil->stencil[0].zpass_op); + int ref = depth_stencil->stencil[0].ref_value & 0xff; + + cso->stencil_LIS5 |= (S5_STENCIL_TEST_ENABLE | + S5_STENCIL_WRITE_ENABLE | + (ref << S5_STENCIL_REF_SHIFT) | + (test << S5_STENCIL_TEST_FUNC_SHIFT) | + (fop << S5_STENCIL_FAIL_SHIFT) | + (dfop << S5_STENCIL_PASS_Z_FAIL_SHIFT) | + (dpop << S5_STENCIL_PASS_Z_PASS_SHIFT)); + } + + if (depth_stencil->stencil[1].enabled) { + int test = i915_translate_compare_func(depth_stencil->stencil[1].func); + int fop = i915_translate_stencil_op(depth_stencil->stencil[1].fail_op); + int dfop = i915_translate_stencil_op(depth_stencil->stencil[1].zfail_op); + int dpop = i915_translate_stencil_op(depth_stencil->stencil[1].zpass_op); + int ref = depth_stencil->stencil[1].ref_value & 0xff; + int tmask = depth_stencil->stencil[1].value_mask & 0xff; + int wmask = depth_stencil->stencil[1].write_mask & 0xff; + + cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | + BFO_ENABLE_STENCIL_FUNCS | + BFO_ENABLE_STENCIL_TWO_SIDE | + BFO_ENABLE_STENCIL_REF | + BFO_STENCIL_TWO_SIDE | + (ref << BFO_STENCIL_REF_SHIFT) | + (test << BFO_STENCIL_TEST_SHIFT) | + (fop << BFO_STENCIL_FAIL_SHIFT) | + (dfop << BFO_STENCIL_PASS_Z_FAIL_SHIFT) | + (dpop << BFO_STENCIL_PASS_Z_PASS_SHIFT)); + + cso->bfo[1] = (_3DSTATE_BACKFACE_STENCIL_MASKS | + BFM_ENABLE_STENCIL_TEST_MASK | + BFM_ENABLE_STENCIL_WRITE_MASK | + (tmask << BFM_STENCIL_TEST_MASK_SHIFT) | + (wmask << BFM_STENCIL_WRITE_MASK_SHIFT)); + } + else { + /* This actually disables two-side stencil: The bit set is a + * modify-enable bit to indicate we are changing the two-side + * setting. Then there is a symbolic zero to show that we are + * setting the flag to zero/off. + */ + cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | + BFO_ENABLE_STENCIL_TWO_SIDE | + 0); + cso->bfo[1] = 0; + } + + if (depth_stencil->depth.enabled) { + int func = i915_translate_compare_func(depth_stencil->depth.func); + + cso->depth_LIS6 |= (S6_DEPTH_TEST_ENABLE | + (func << S6_DEPTH_TEST_FUNC_SHIFT)); + + if (depth_stencil->depth.writemask) + cso->depth_LIS6 |= S6_DEPTH_WRITE_ENABLE; + } + + if (depth_stencil->alpha.enabled) { + int test = i915_translate_compare_func(depth_stencil->alpha.func); + ubyte refByte = float_to_ubyte(depth_stencil->alpha.ref); + + cso->depth_LIS6 |= (S6_ALPHA_TEST_ENABLE | + (test << S6_ALPHA_TEST_FUNC_SHIFT) | + (((unsigned) refByte) << S6_ALPHA_REF_SHIFT)); + } + + return cso; +} + +static void i915_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->depth_stencil = (const struct i915_depth_stencil_state *)depth_stencil; + + i915->dirty |= I915_NEW_DEPTH_STENCIL; +} + +static void i915_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + FREE(depth_stencil); +} + + +static void i915_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + memcpy( &i915->scissor, scissor, sizeof(*scissor) ); + i915->dirty |= I915_NEW_SCISSOR; +} + + +static void i915_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ +} + + + +static void * +i915_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct i915_context *i915 = i915_context(pipe); + struct i915_fragment_shader *ifs = CALLOC_STRUCT(i915_fragment_shader); + if (!ifs) + return NULL; + + ifs->state.tokens = tgsi_dup_tokens(templ->tokens); + + tgsi_scan_shader(templ->tokens, &ifs->info); + + /* The shader's compiled to i915 instructions here */ + i915_translate_fragment_program(i915, ifs); + + return ifs; +} + +static void +i915_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->fs = (struct i915_fragment_shader*) shader; + + i915->dirty |= I915_NEW_FS; +} + +static +void i915_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_fragment_shader *ifs = (struct i915_fragment_shader *) shader; + + if (ifs->program) + FREE(ifs->program); + ifs->program_len = 0; + + FREE((struct tgsi_token *)ifs->state.tokens); + + FREE(ifs); +} + + +static void * +i915_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + return draw_create_vertex_shader(i915->draw, templ); +} + +static void i915_bind_vs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + draw_bind_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); + + i915->dirty |= I915_NEW_VS; +} + +static void i915_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + struct i915_context *i915 = i915_context(pipe); + + /* just pass-through to draw module */ + draw_delete_vertex_shader(i915->draw, (struct draw_vertex_shader *) shader); +} + +static void i915_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct i915_context *i915 = i915_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + draw_flush(i915->draw); + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + /* Make a copy of shader constants. + * During fragment program translation we may add additional + * constants to the array. + * + * We want to consider the situation where some user constants + * (ex: a material color) may change frequently but the shader program + * stays the same. In that case we should only be updating the first + * N constants, leaving any extras from shader translation alone. + */ + if (buf) { + void *mapped; + if (buf->size && + (mapped = ws->buffer_map(ws, buf->buffer, + PIPE_BUFFER_USAGE_CPU_READ))) { + memcpy(i915->current.constants[shader], mapped, buf->size); + ws->buffer_unmap(ws, buf->buffer); + i915->current.num_user_constants[shader] + = buf->size / (4 * sizeof(float)); + } + else { + i915->current.num_user_constants[shader] = 0; + } + } + + i915->dirty |= I915_NEW_CONSTANTS; +} + + +static void i915_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct i915_context *i915 = i915_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == i915->num_textures && + !memcmp(i915->texture, texture, num * sizeof(struct pipe_texture *))) + return; + + /* Fixes wrong texture in texobj with VBUF */ + draw_flush(i915->draw); + + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &i915->texture[i], + texture[i]); + + for (i = num; i < i915->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &i915->texture[i], + NULL); + + i915->num_textures = num; + + i915->dirty |= I915_NEW_TEXTURE; +} + + + +static void i915_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + i915->framebuffer = *fb; /* struct copy */ + + i915->dirty |= I915_NEW_FRAMEBUFFER; +} + + + +static void i915_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct i915_context *i915 = i915_context(pipe); + draw_flush(i915->draw); + + draw_set_clip_state(i915->draw, clip); + + i915->dirty |= I915_NEW_CLIP; +} + + + +/* Called when driver state tracker notices changes to the viewport + * matrix: + */ +static void i915_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->viewport = *viewport; /* struct copy */ + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(i915->draw, &i915->viewport); + + i915->dirty |= I915_NEW_VIEWPORT; +} + + +static void * +i915_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + struct i915_rasterizer_state *cso = CALLOC_STRUCT( i915_rasterizer_state ); + + cso->templ = rasterizer; + cso->color_interp = rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + cso->light_twoside = rasterizer->light_twoside; + cso->ds[0].u = _3DSTATE_DEPTH_OFFSET_SCALE; + cso->ds[1].f = rasterizer->offset_scale; + if (rasterizer->poly_stipple_enable) { + cso->st |= ST1_ENABLE; + } + + if (rasterizer->scissor) + cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | ENABLE_SCISSOR_RECT; + else + cso->sc[0] = _3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT; + + switch (rasterizer->cull_mode) { + case PIPE_WINDING_NONE: + cso->LIS4 |= S4_CULLMODE_NONE; + break; + case PIPE_WINDING_CW: + cso->LIS4 |= S4_CULLMODE_CW; + break; + case PIPE_WINDING_CCW: + cso->LIS4 |= S4_CULLMODE_CCW; + break; + case PIPE_WINDING_BOTH: + cso->LIS4 |= S4_CULLMODE_BOTH; + break; + } + + { + int line_width = CLAMP((int)(rasterizer->line_width * 2), 1, 0xf); + + cso->LIS4 |= line_width << S4_LINE_WIDTH_SHIFT; + + if (rasterizer->line_smooth) + cso->LIS4 |= S4_LINE_ANTIALIAS_ENABLE; + } + + { + int point_size = CLAMP((int) rasterizer->point_size, 1, 0xff); + + cso->LIS4 |= point_size << S4_POINT_WIDTH_SHIFT; + } + + if (rasterizer->flatshade) { + cso->LIS4 |= (S4_FLATSHADE_ALPHA | + S4_FLATSHADE_COLOR | + S4_FLATSHADE_SPECULAR); + } + + cso->LIS7 = fui( rasterizer->offset_units ); + + + return cso; +} + +static void i915_bind_rasterizer_state( struct pipe_context *pipe, + void *raster ) +{ + struct i915_context *i915 = i915_context(pipe); + + i915->rasterizer = (struct i915_rasterizer_state *)raster; + + /* pass-through to draw module */ + draw_set_rasterizer_state(i915->draw, + (i915->rasterizer ? i915->rasterizer->templ : NULL)); + + i915->dirty |= I915_NEW_RASTERIZER; +} + +static void i915_delete_rasterizer_state(struct pipe_context *pipe, + void *raster) +{ + FREE(raster); +} + +static void i915_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct i915_context *i915 = i915_context(pipe); + /* Because we change state before the draw_set_vertex_buffers call + * we need a flush here, just to be sure. + */ + draw_flush(i915->draw); + + memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0])); + i915->num_vertex_buffers = count; + + /* pass-through to draw module */ + draw_set_vertex_buffers(i915->draw, count, buffers); +} + +static void i915_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) +{ + struct i915_context *i915 = i915_context(pipe); + /* Because we change state before the draw_set_vertex_buffers call + * we need a flush here, just to be sure. + */ + draw_flush(i915->draw); + + i915->num_vertex_elements = count; + /* pass-through to draw module */ + draw_set_vertex_elements(i915->draw, count, elements); +} + + +static void i915_set_edgeflags(struct pipe_context *pipe, + const unsigned *bitfield) +{ + /* TODO do something here */ +} + +void +i915_init_state_functions( struct i915_context *i915 ) +{ + i915->pipe.set_edgeflags = i915_set_edgeflags; + i915->pipe.create_blend_state = i915_create_blend_state; + i915->pipe.bind_blend_state = i915_bind_blend_state; + i915->pipe.delete_blend_state = i915_delete_blend_state; + + i915->pipe.create_sampler_state = i915_create_sampler_state; + i915->pipe.bind_sampler_states = i915_bind_sampler_states; + i915->pipe.delete_sampler_state = i915_delete_sampler_state; + + i915->pipe.create_depth_stencil_alpha_state = i915_create_depth_stencil_state; + i915->pipe.bind_depth_stencil_alpha_state = i915_bind_depth_stencil_state; + i915->pipe.delete_depth_stencil_alpha_state = i915_delete_depth_stencil_state; + + i915->pipe.create_rasterizer_state = i915_create_rasterizer_state; + i915->pipe.bind_rasterizer_state = i915_bind_rasterizer_state; + i915->pipe.delete_rasterizer_state = i915_delete_rasterizer_state; + i915->pipe.create_fs_state = i915_create_fs_state; + i915->pipe.bind_fs_state = i915_bind_fs_state; + i915->pipe.delete_fs_state = i915_delete_fs_state; + i915->pipe.create_vs_state = i915_create_vs_state; + i915->pipe.bind_vs_state = i915_bind_vs_state; + i915->pipe.delete_vs_state = i915_delete_vs_state; + + i915->pipe.set_blend_color = i915_set_blend_color; + i915->pipe.set_clip_state = i915_set_clip_state; + i915->pipe.set_constant_buffer = i915_set_constant_buffer; + i915->pipe.set_framebuffer_state = i915_set_framebuffer_state; + + i915->pipe.set_polygon_stipple = i915_set_polygon_stipple; + i915->pipe.set_scissor_state = i915_set_scissor_state; + i915->pipe.set_sampler_textures = i915_set_sampler_textures; + i915->pipe.set_viewport_state = i915_set_viewport_state; + i915->pipe.set_vertex_buffers = i915_set_vertex_buffers; + i915->pipe.set_vertex_elements = i915_set_vertex_elements; +} diff --git a/src/gallium/drivers/i915simple/i915_state.h b/src/gallium/drivers/i915simple/i915_state.h new file mode 100644 index 0000000000..86c6b0027d --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef I915_STATE_H +#define I915_STATE_H + +struct i915_context; + + +struct i915_tracked_state { + unsigned dirty; + void (*update)( struct i915_context * ); +}; + +void i915_update_immediate( struct i915_context *i915 ); +void i915_update_dynamic( struct i915_context *i915 ); +void i915_update_derived( struct i915_context *i915 ); +void i915_update_samplers( struct i915_context *i915 ); +void i915_update_textures(struct i915_context *i915); + +void i915_emit_hardware_state( struct i915_context *i915 ); + +#endif diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c new file mode 100644 index 0000000000..178d4e8781 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -0,0 +1,183 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "i915_context.h" +#include "i915_state.h" +#include "i915_reg.h" +#include "i915_fpc.h" + + + +/** + * Determine the hardware vertex layout. + * Depends on vertex/fragment shader state. + */ +static void calculate_vertex_layout( struct i915_context *i915 ) +{ + const struct i915_fragment_shader *fs = i915->fs; + const enum interp_mode colorInterp = i915->rasterizer->color_interp; + struct vertex_info vinfo; + boolean texCoords[8], colors[2], fog, needW; + uint i; + int src; + + memset(texCoords, 0, sizeof(texCoords)); + colors[0] = colors[1] = fog = needW = FALSE; + memset(&vinfo, 0, sizeof(vinfo)); + + /* Determine which fragment program inputs are needed. Setup HW vertex + * layout below, in the HW-specific attribute order. + */ + for (i = 0; i < fs->info.num_inputs; i++) { + switch (fs->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + assert(fs->info.input_semantic_index[i] < 2); + colors[fs->info.input_semantic_index[i]] = TRUE; + break; + case TGSI_SEMANTIC_GENERIC: + /* usually a texcoord */ + { + const uint unit = fs->info.input_semantic_index[i]; + assert(unit < 8); + texCoords[unit] = TRUE; + needW = TRUE; + } + break; + case TGSI_SEMANTIC_FOG: + fog = TRUE; + break; + default: + assert(0); + } + } + + + /* pos */ + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_POSITION, 0); + if (needW) { + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); + vinfo.hwfmt[0] |= S4_VFMT_XYZW; + vinfo.attrib[0].emit = EMIT_4F; + } + else { + draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src); + vinfo.hwfmt[0] |= S4_VFMT_XYZ; + vinfo.attrib[0].emit = EMIT_3F; + } + + /* hardware point size */ + /* XXX todo */ + + /* primary color */ + if (colors[0]) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 0); + draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); + vinfo.hwfmt[0] |= S4_VFMT_COLOR; + } + + /* secondary color */ + if (colors[1]) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_COLOR, 1); + draw_emit_vertex_attr(&vinfo, EMIT_4UB, colorInterp, src); + vinfo.hwfmt[0] |= S4_VFMT_SPEC_FOG; + } + + /* fog coord, not fog blend factor */ + if (fog) { + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_FOG, 0); + draw_emit_vertex_attr(&vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + vinfo.hwfmt[0] |= S4_VFMT_FOG_PARAM; + } + + /* texcoords */ + for (i = 0; i < 8; i++) { + uint hwtc; + if (texCoords[i]) { + hwtc = TEXCOORDFMT_4D; + src = draw_find_vs_output(i915->draw, TGSI_SEMANTIC_GENERIC, i); + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + } + else { + hwtc = TEXCOORDFMT_NOT_PRESENT; + } + vinfo.hwfmt[1] |= hwtc << (i * 4); + } + + draw_compute_vertex_size(&vinfo); + + if (memcmp(&i915->current.vertex_info, &vinfo, sizeof(vinfo))) { + /* Need to set this flag so that the LIS2/4 registers get set. + * It also means the i915_update_immediate() function must be called + * after this one, in i915_update_derived(). + */ + i915->dirty |= I915_NEW_VERTEX_FORMAT; + + memcpy(&i915->current.vertex_info, &vinfo, sizeof(vinfo)); + } +} + + + + +/* Hopefully this will remain quite simple, otherwise need to pull in + * something like the state tracker mechanism. + */ +void i915_update_derived( struct i915_context *i915 ) +{ + if (i915->dirty & (I915_NEW_RASTERIZER | I915_NEW_FS | I915_NEW_VS)) + calculate_vertex_layout( i915 ); + + if (i915->dirty & (I915_NEW_SAMPLER | I915_NEW_TEXTURE)) + i915_update_samplers(i915); + + if (i915->dirty & I915_NEW_TEXTURE) + i915_update_textures(i915); + + if (i915->dirty) + i915_update_immediate( i915 ); + + if (i915->dirty) + i915_update_dynamic( i915 ); + + if (i915->dirty & I915_NEW_FS) { + i915->hardware_dirty |= I915_HW_PROGRAM; /* XXX right? */ + } + + /* HW emit currently references framebuffer state directly: + */ + if (i915->dirty & I915_NEW_FRAMEBUFFER) + i915->hardware_dirty |= I915_HW_STATIC; + + i915->dirty = 0; +} diff --git a/src/gallium/drivers/i915simple/i915_state_dynamic.c b/src/gallium/drivers/i915simple/i915_state_dynamic.c new file mode 100644 index 0000000000..86126a5a15 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_dynamic.c @@ -0,0 +1,310 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_batch.h" +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" + +#define FILE_DEBUG_FLAG DEBUG_STATE + +/* State that we have chosen to store in the DYNAMIC segment of the + * i915 indirect state mechanism. + * + * Can't cache these in the way we do the static state, as there is no + * start/size in the command packet, instead an 'end' value that gets + * incremented. + * + * Additionally, there seems to be a requirement to re-issue the full + * (active) state every time a 4kb boundary is crossed. + */ + +static INLINE void set_dynamic_indirect( struct i915_context *i915, + unsigned offset, + const unsigned *src, + unsigned dwords ) +{ + unsigned i; + + for (i = 0; i < dwords; i++) + i915->current.dynamic[offset + i] = src[i]; + + i915->hardware_dirty |= I915_HW_DYNAMIC; +} + + +/*********************************************************************** + * Modes4: stencil masks and logicop + */ +static void upload_MODES4( struct i915_context *i915 ) +{ + unsigned modes4 = 0; + + /* I915_NEW_STENCIL */ + modes4 |= i915->depth_stencil->stencil_modes4; + /* I915_NEW_BLEND */ + modes4 |= i915->blend->modes4; + + /* Always, so that we know when state is in-active: + */ + set_dynamic_indirect( i915, + I915_DYNAMIC_MODES4, + &modes4, + 1 ); +} + +const struct i915_tracked_state i915_upload_MODES4 = { + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL, + upload_MODES4 +}; + + + + +/*********************************************************************** + */ + +static void upload_BFO( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_BFO_0, + &(i915->depth_stencil->bfo[0]), + 2 ); +} + +const struct i915_tracked_state i915_upload_BFO = { + I915_NEW_DEPTH_STENCIL, + upload_BFO +}; + + +/*********************************************************************** + */ + + +static void upload_BLENDCOLOR( struct i915_context *i915 ) +{ + unsigned bc[2]; + + memset( bc, 0, sizeof(bc) ); + + /* I915_NEW_BLEND {_COLOR} + */ + { + const float *color = i915->blend_color.color; + + bc[0] = _3DSTATE_CONST_BLEND_COLOR_CMD; + bc[1] = pack_ui32_float4( color[0], + color[1], + color[2], + color[3] ); + } + + set_dynamic_indirect( i915, + I915_DYNAMIC_BC_0, + bc, + 2 ); +} + +const struct i915_tracked_state i915_upload_BLENDCOLOR = { + I915_NEW_BLEND, + upload_BLENDCOLOR +}; + +/*********************************************************************** + */ + + +static void upload_IAB( struct i915_context *i915 ) +{ + unsigned iab = i915->blend->iab; + + + set_dynamic_indirect( i915, + I915_DYNAMIC_IAB, + &iab, + 1 ); +} + +const struct i915_tracked_state i915_upload_IAB = { + I915_NEW_BLEND, + upload_IAB +}; + + +/*********************************************************************** + */ + + + +static void upload_DEPTHSCALE( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_DEPTHSCALE_0, + &(i915->rasterizer->ds[0].u), + 2 ); +} + +const struct i915_tracked_state i915_upload_DEPTHSCALE = { + I915_NEW_RASTERIZER, + upload_DEPTHSCALE +}; + + + +/*********************************************************************** + * Polygon stipple + * + * The i915 supports a 4x4 stipple natively, GL wants 32x32. + * Fortunately stipple is usually a repeating pattern. + * + * XXX: does stipple pattern need to be adjusted according to + * the window position? + * + * XXX: possibly need workaround for conform paths test. + */ + +static void upload_STIPPLE( struct i915_context *i915 ) +{ + unsigned st[2]; + + st[0] = _3DSTATE_STIPPLE; + st[1] = 0; + + /* I915_NEW_RASTERIZER + */ + st[1] |= i915->rasterizer->st; + + + /* I915_NEW_STIPPLE + */ + { + const ubyte *mask = (const ubyte *)i915->poly_stipple.stipple; + ubyte p[4]; + + p[0] = mask[12] & 0xf; + p[1] = mask[8] & 0xf; + p[2] = mask[4] & 0xf; + p[3] = mask[0] & 0xf; + + /* Not sure what to do about fallbacks, so for now just dont: + */ + st[1] |= ((p[0] << 0) | + (p[1] << 4) | + (p[2] << 8) | + (p[3] << 12)); + } + + + set_dynamic_indirect( i915, + I915_DYNAMIC_STP_0, + &st[0], + 2 ); +} + + +const struct i915_tracked_state i915_upload_STIPPLE = { + I915_NEW_RASTERIZER | I915_NEW_STIPPLE, + upload_STIPPLE +}; + + + +/*********************************************************************** + * Scissor. + */ +static void upload_SCISSOR_ENABLE( struct i915_context *i915 ) +{ + set_dynamic_indirect( i915, + I915_DYNAMIC_SC_ENA_0, + &(i915->rasterizer->sc[0]), + 1 ); +} + +const struct i915_tracked_state i915_upload_SCISSOR_ENABLE = { + I915_NEW_RASTERIZER, + upload_SCISSOR_ENABLE +}; + + + +static void upload_SCISSOR_RECT( struct i915_context *i915 ) +{ + unsigned x1 = i915->scissor.minx; + unsigned y1 = i915->scissor.miny; + unsigned x2 = i915->scissor.maxx; + unsigned y2 = i915->scissor.maxy; + unsigned sc[3]; + + sc[0] = _3DSTATE_SCISSOR_RECT_0_CMD; + sc[1] = (y1 << 16) | (x1 & 0xffff); + sc[2] = (y2 << 16) | (x2 & 0xffff); + + set_dynamic_indirect( i915, + I915_DYNAMIC_SC_RECT_0, + &sc[0], + 3 ); +} + + +const struct i915_tracked_state i915_upload_SCISSOR_RECT = { + I915_NEW_SCISSOR, + upload_SCISSOR_RECT +}; + + + + + + +static const struct i915_tracked_state *atoms[] = { + &i915_upload_MODES4, + &i915_upload_BFO, + &i915_upload_BLENDCOLOR, + &i915_upload_IAB, + &i915_upload_DEPTHSCALE, + &i915_upload_STIPPLE, + &i915_upload_SCISSOR_ENABLE, + &i915_upload_SCISSOR_RECT +}; + +/* These will be dynamic indirect state commands, but for now just end + * up on the batch buffer with everything else. + */ +void i915_update_dynamic( struct i915_context *i915 ) +{ + int i; + + for (i = 0; i < Elements(atoms); i++) + if (i915->dirty & atoms[i]->dirty) + atoms[i]->update( i915 ); +} + diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c new file mode 100644 index 0000000000..9bd6f92323 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -0,0 +1,402 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i915_reg.h" +#include "i915_context.h" +#include "i915_winsys.h" +#include "i915_batch.h" +#include "i915_reg.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" + +static unsigned translate_format( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + return COLOR_BUF_ARGB8888; + case PIPE_FORMAT_R5G6B5_UNORM: + return COLOR_BUF_RGB565; + default: + assert(0); + return 0; + } +} + +static unsigned translate_depth_format( enum pipe_format zformat ) +{ + switch (zformat) { + case PIPE_FORMAT_S8Z24_UNORM: + return DEPTH_FRMT_24_FIXED_8_OTHER; + case PIPE_FORMAT_Z16_UNORM: + return DEPTH_FRMT_16_FIXED; + default: + assert(0); + return 0; + } +} + + +/** + * Examine framebuffer state to determine width, height. + */ +static boolean +framebuffer_size(const struct pipe_framebuffer_state *fb, + uint *width, uint *height) +{ + if (fb->cbufs[0]) { + *width = fb->cbufs[0]->width; + *height = fb->cbufs[0]->height; + return TRUE; + } + else if (fb->zsbuf) { + *width = fb->zsbuf->width; + *height = fb->zsbuf->height; + return TRUE; + } + else { + *width = *height = 0; + return FALSE; + } +} + + +/* Push the state into the sarea and/or texture memory. + */ +void +i915_emit_hardware_state(struct i915_context *i915 ) +{ + /* XXX: there must be an easier way */ + const unsigned dwords = ( 14 + + 7 + + I915_MAX_DYNAMIC + + 8 + + 2 + I915_TEX_UNITS*3 + + 2 + I915_TEX_UNITS*3 + + 2 + I915_MAX_CONSTANT*4 + +#if 0 + i915->current.program_len + +#else + i915->fs->program_len + +#endif + 6 + ) * 3/2; /* plus 50% margin */ + const unsigned relocs = ( I915_TEX_UNITS + + 3 + ) * 3/2; /* plus 50% margin */ + +#if 0 + debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs); +#endif + + if(!BEGIN_BATCH(dwords, relocs)) { + FLUSH_BATCH(NULL); + assert(BEGIN_BATCH(dwords, relocs)); + } + + /* 14 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_INVARIENT) + { + OUT_BATCH(_3DSTATE_AA_CMD | + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); + + OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_Z_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | + CSB_TCB(6, 6) | + CSB_TCB(7, 7)); + + OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + ENABLE_TEXKILL_3D_4D | + TEXKILL_4D); + + /* Need to initialize this to zero. + */ + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); + + /* disable indirect state for now + */ + OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); + OUT_BATCH(0); + } + + /* 7 dwords, 1 relocs */ + if (i915->hardware_dirty & I915_HW_IMMEDIATE) + { + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(0) | + I1_LOAD_S(1) | + I1_LOAD_S(2) | + I1_LOAD_S(4) | + I1_LOAD_S(5) | + I1_LOAD_S(6) | + (5)); + + if(i915->vbo) + OUT_RELOC(i915->vbo, + I915_BUFFER_ACCESS_READ, + i915->current.immediate[I915_IMMEDIATE_S0]); + else + /* FIXME: we should not do this */ + OUT_BATCH(0); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]); + OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]); + } + + /* I915_MAX_DYNAMIC dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_DYNAMIC) + { + int i; + for (i = 0; i < I915_MAX_DYNAMIC; i++) { + OUT_BATCH(i915->current.dynamic[i]); + } + } + + /* 8 dwords, 2 relocs */ + if (i915->hardware_dirty & I915_HW_STATIC) + { + struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; + struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; + + if (cbuf_surface) { + unsigned cpitch = cbuf_surface->stride; + unsigned ctile = BUF_3D_USE_FENCE; + if (cbuf_surface->texture && + ((struct i915_texture*)(cbuf_surface->texture))->tiled) { + ctile = BUF_3D_TILED_SURFACE; + } + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + + OUT_BATCH(BUF_3D_ID_COLOR_BACK | + BUF_3D_PITCH(cpitch) | /* pitch in bytes */ + ctile); + + OUT_RELOC(cbuf_surface->buffer, + I915_BUFFER_ACCESS_WRITE, + cbuf_surface->offset); + } + + /* What happens if no zbuf?? + */ + if (depth_surface) { + unsigned zpitch = depth_surface->stride; + unsigned ztile = BUF_3D_USE_FENCE; + if (depth_surface->texture && + ((struct i915_texture*)(depth_surface->texture))->tiled) { + ztile = BUF_3D_TILED_SURFACE; + } + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + + OUT_BATCH(BUF_3D_ID_DEPTH | + BUF_3D_PITCH(zpitch) | /* pitch in bytes */ + ztile); + + OUT_RELOC(depth_surface->buffer, + I915_BUFFER_ACCESS_WRITE, + depth_surface->offset); + } + + { + unsigned cformat, zformat = 0; + + if (cbuf_surface) + cformat = cbuf_surface->format; + else + cformat = PIPE_FORMAT_A8R8G8B8_UNORM; /* arbitrary */ + cformat = translate_format(cformat); + + if (depth_surface) + zformat = translate_depth_format( i915->framebuffer.zsbuf->format ); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */ + DSTORG_VERT_BIAS(0x8) | /* .5 */ + LOD_PRECLAMP_OGL | + TEX_DEFAULT_COLOR_OGL | + cformat | + zformat ); + } + } + +#if 01 + /* texture images */ + /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ + if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) + { + const uint nr = i915->current.sampler_enable_nr; + if (nr) { + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + uint count = 0; + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); + OUT_BATCH(enabled); + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + struct pipe_buffer *buf = + i915->texture[unit]->buffer; + uint offset = 0; + assert(buf); + + count++; + + OUT_RELOC(buf, + I915_BUFFER_ACCESS_READ, + offset); + OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ + OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ + } + } + assert(count == nr); + } + } +#endif + +#if 01 + /* samplers */ + /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_SAMPLER) + { + if (i915->current.sampler_enable_nr) { + int i; + + OUT_BATCH( _3DSTATE_SAMPLER_STATE | + (3 * i915->current.sampler_enable_nr) ); + + OUT_BATCH( i915->current.sampler_enable_flags ); + + for (i = 0; i < I915_TEX_UNITS; i++) { + if (i915->current.sampler_enable_flags & (1<<i)) { + OUT_BATCH( i915->current.sampler[i][0] ); + OUT_BATCH( i915->current.sampler[i][1] ); + OUT_BATCH( i915->current.sampler[i][2] ); + } + } + } + } +#endif + + /* constants */ + /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_PROGRAM) + { + /* Collate the user-defined constants with the fragment shader's + * immediates according to the constant_flags[] array. + */ + const uint nr = i915->fs->num_constants; + if (nr) { + uint i; + + OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); + OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); + + for (i = 0; i < nr; i++) { + const uint *c; + if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { + /* grab user-defined constant */ + c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i]; + } + else { + /* emit program constant */ + c = (uint *) i915->fs->constants[i]; + } +#if 0 /* debug */ + { + float *f = (float *) c; + printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], + (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER + ? "user" : "immediate")); + } +#endif + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + } + } + } + + /* Fragment program */ + /* i915->current.program_len dwords, 0 relocs */ + if (i915->hardware_dirty & I915_HW_PROGRAM) + { + uint i; + /* we should always have, at least, a pass-through program */ + assert(i915->fs->program_len > 0); + for (i = 0; i < i915->fs->program_len; i++) { + OUT_BATCH(i915->fs->program[i]); + } + } + + /* drawing surface size */ + /* 6 dwords, 0 relocs */ + { + uint w, h; + boolean k = framebuffer_size(&i915->framebuffer, &w, &h); + (void)k; + assert(k); + + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(((w - 1) & 0xffff) | ((h - 1) << 16)); + OUT_BATCH(0); + OUT_BATCH(0); + } + + + i915->hardware_dirty = 0; +} diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c new file mode 100644 index 0000000000..8c16bb4e27 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_immediate.c @@ -0,0 +1,225 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_state.h" +#include "i915_reg.h" +#include "util/u_memory.h" + + +/* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. + * Would like to opportunistically recombine all these fragments into + * a single packet containing only what has changed, but for now emit + * as multiple packets. + */ + + + + +/*********************************************************************** + * S0,S1: Vertex buffer state. + */ +static void upload_S0S1(struct i915_context *i915) +{ + unsigned LIS0, LIS1; + + /* INTEL_NEW_VBO */ + /* TODO: re-use vertex buffers here? */ + LIS0 = i915->vbo_offset; + + /* INTEL_NEW_VERTEX_SIZE -- do this where the vertex size is calculated! + */ + { + unsigned vertex_size = i915->current.vertex_info.size; + + LIS1 = ((vertex_size << 24) | + (vertex_size << 16)); + } + + /* INTEL_NEW_VBO */ + /* TODO: use a vertex generation number to track vbo changes */ + if (1 || + i915->current.immediate[I915_IMMEDIATE_S0] != LIS0 || + i915->current.immediate[I915_IMMEDIATE_S1] != LIS1) + { + i915->current.immediate[I915_IMMEDIATE_S0] = LIS0; + i915->current.immediate[I915_IMMEDIATE_S1] = LIS1; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S0S1 = { + I915_NEW_VBO | I915_NEW_VERTEX_FORMAT, + upload_S0S1 +}; + + + + +/*********************************************************************** + * S4: Vertex format, rasterization state + */ +static void upload_S2S4(struct i915_context *i915) +{ + unsigned LIS2, LIS4; + + /* I915_NEW_VERTEX_FORMAT */ + { + LIS2 = i915->current.vertex_info.hwfmt[1]; + LIS4 = i915->current.vertex_info.hwfmt[0]; + /* + debug_printf("LIS2: 0x%x LIS4: 0x%x\n", LIS2, LIS4); + */ + assert(LIS4); /* should never be zero? */ + } + + LIS4 |= i915->rasterizer->LIS4; + + if (LIS2 != i915->current.immediate[I915_IMMEDIATE_S2] || + LIS4 != i915->current.immediate[I915_IMMEDIATE_S4]) { + + i915->current.immediate[I915_IMMEDIATE_S2] = LIS2; + i915->current.immediate[I915_IMMEDIATE_S4] = LIS4; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + + +const struct i915_tracked_state i915_upload_S2S4 = { + I915_NEW_RASTERIZER | I915_NEW_VERTEX_FORMAT, + upload_S2S4 +}; + + + +/*********************************************************************** + * + */ +static void upload_S5( struct i915_context *i915 ) +{ + unsigned LIS5 = 0; + + LIS5 |= i915->depth_stencil->stencil_LIS5; + + LIS5 |= i915->blend->LIS5; + +#if 0 + /* I915_NEW_RASTERIZER */ + if (i915->state.Polygon->OffsetFill) { + LIS5 |= S5_GLOBAL_DEPTH_OFFSET_ENABLE; + } +#endif + + + if (LIS5 != i915->current.immediate[I915_IMMEDIATE_S5]) { + i915->current.immediate[I915_IMMEDIATE_S5] = LIS5; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S5 = { + (I915_NEW_DEPTH_STENCIL | I915_NEW_BLEND | I915_NEW_RASTERIZER), + upload_S5 +}; + + +/*********************************************************************** + */ +static void upload_S6( struct i915_context *i915 ) +{ + unsigned LIS6 = (2 << S6_TRISTRIP_PV_SHIFT); + + /* I915_NEW_FRAMEBUFFER + */ + if (i915->framebuffer.cbufs[0]) + LIS6 |= S6_COLOR_WRITE_ENABLE; + + /* I915_NEW_BLEND + */ + LIS6 |= i915->blend->LIS6; + + /* I915_NEW_DEPTH + */ + LIS6 |= i915->depth_stencil->depth_LIS6; + + if (LIS6 != i915->current.immediate[I915_IMMEDIATE_S6]) { + i915->current.immediate[I915_IMMEDIATE_S6] = LIS6; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S6 = { + I915_NEW_BLEND | I915_NEW_DEPTH_STENCIL | I915_NEW_FRAMEBUFFER, + upload_S6 +}; + + +/*********************************************************************** + */ +static void upload_S7( struct i915_context *i915 ) +{ + unsigned LIS7; + + /* I915_NEW_RASTERIZER + */ + LIS7 = i915->rasterizer->LIS7; + + if (LIS7 != i915->current.immediate[I915_IMMEDIATE_S7]) { + i915->current.immediate[I915_IMMEDIATE_S7] = LIS7; + i915->hardware_dirty |= I915_HW_IMMEDIATE; + } +} + +const struct i915_tracked_state i915_upload_S7 = { + I915_NEW_RASTERIZER, + upload_S7 +}; + + +static const struct i915_tracked_state *atoms[] = { + &i915_upload_S0S1, + &i915_upload_S2S4, + &i915_upload_S5, + &i915_upload_S6, + &i915_upload_S7 +}; + +/* + */ +void i915_update_immediate( struct i915_context *i915 ) +{ + int i; + + for (i = 0; i < Elements(atoms); i++) + if (i915->dirty & atoms[i]->dirty) + atoms[i]->update( i915 ); +} diff --git a/src/gallium/drivers/i915simple/i915_state_inlines.h b/src/gallium/drivers/i915simple/i915_state_inlines.h new file mode 100644 index 0000000000..378de8f9c4 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_inlines.h @@ -0,0 +1,230 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_STATE_INLINES_H +#define I915_STATE_INLINES_H + +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" +#include "i915_reg.h" + + +static INLINE unsigned +i915_translate_compare_func(unsigned func) +{ + switch (func) { + case PIPE_FUNC_NEVER: + return COMPAREFUNC_NEVER; + case PIPE_FUNC_LESS: + return COMPAREFUNC_LESS; + case PIPE_FUNC_LEQUAL: + return COMPAREFUNC_LEQUAL; + case PIPE_FUNC_GREATER: + return COMPAREFUNC_GREATER; + case PIPE_FUNC_GEQUAL: + return COMPAREFUNC_GEQUAL; + case PIPE_FUNC_NOTEQUAL: + return COMPAREFUNC_NOTEQUAL; + case PIPE_FUNC_EQUAL: + return COMPAREFUNC_EQUAL; + case PIPE_FUNC_ALWAYS: + return COMPAREFUNC_ALWAYS; + default: + return COMPAREFUNC_ALWAYS; + } +} + +static INLINE unsigned +i915_translate_stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_KEEP: + return STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: + return STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: + return STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: + return STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: + return STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: + return STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: + return STENCILOP_INVERT; + default: + return STENCILOP_ZERO; + } +} + +static INLINE unsigned +i915_translate_blend_factor(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return BLENDFACT_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return BLENDFACT_SRC_ALPHA; + case PIPE_BLENDFACTOR_ONE: + return BLENDFACT_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return BLENDFACT_SRC_COLR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return BLENDFACT_INV_SRC_COLR; + case PIPE_BLENDFACTOR_DST_COLOR: + return BLENDFACT_DST_COLR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return BLENDFACT_INV_DST_COLR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return BLENDFACT_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return BLENDFACT_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return BLENDFACT_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BLENDFACT_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return BLENDFACT_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return BLENDFACT_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return BLENDFACT_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return BLENDFACT_INV_CONST_ALPHA; + default: + return BLENDFACT_ZERO; + } +} + +static INLINE unsigned +i915_translate_blend_func(unsigned mode) +{ + switch (mode) { + case PIPE_BLEND_ADD: + return BLENDFUNC_ADD; + case PIPE_BLEND_MIN: + return BLENDFUNC_MIN; + case PIPE_BLEND_MAX: + return BLENDFUNC_MAX; + case PIPE_BLEND_SUBTRACT: + return BLENDFUNC_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BLENDFUNC_REVERSE_SUBTRACT; + default: + return 0; + } +} + + +static INLINE unsigned +i915_translate_logic_op(unsigned opcode) +{ + switch (opcode) { + case PIPE_LOGICOP_CLEAR: + return LOGICOP_CLEAR; + case PIPE_LOGICOP_AND: + return LOGICOP_AND; + case PIPE_LOGICOP_AND_REVERSE: + return LOGICOP_AND_RVRSE; + case PIPE_LOGICOP_COPY: + return LOGICOP_COPY; + case PIPE_LOGICOP_COPY_INVERTED: + return LOGICOP_COPY_INV; + case PIPE_LOGICOP_AND_INVERTED: + return LOGICOP_AND_INV; + case PIPE_LOGICOP_NOOP: + return LOGICOP_NOOP; + case PIPE_LOGICOP_XOR: + return LOGICOP_XOR; + case PIPE_LOGICOP_OR: + return LOGICOP_OR; + case PIPE_LOGICOP_OR_INVERTED: + return LOGICOP_OR_INV; + case PIPE_LOGICOP_NOR: + return LOGICOP_NOR; + case PIPE_LOGICOP_EQUIV: + return LOGICOP_EQUIV; + case PIPE_LOGICOP_INVERT: + return LOGICOP_INV; + case PIPE_LOGICOP_OR_REVERSE: + return LOGICOP_OR_RVRSE; + case PIPE_LOGICOP_NAND: + return LOGICOP_NAND; + case PIPE_LOGICOP_SET: + return LOGICOP_SET; + default: + return LOGICOP_SET; + } +} + + + +static INLINE boolean i915_validate_vertices( unsigned hw_prim, unsigned nr ) +{ + boolean ok; + + switch (hw_prim) { + case PRIM3D_POINTLIST: + ok = (nr >= 1); + assert(ok); + break; + case PRIM3D_LINELIST: + ok = (nr >= 2) && (nr % 2) == 0; + assert(ok); + break; + case PRIM3D_LINESTRIP: + ok = (nr >= 2); + assert(ok); + break; + case PRIM3D_TRILIST: + ok = (nr >= 3) && (nr % 3) == 0; + assert(ok); + break; + case PRIM3D_TRISTRIP: + ok = (nr >= 3); + assert(ok); + break; + case PRIM3D_TRIFAN: + ok = (nr >= 3); + assert(ok); + break; + case PRIM3D_POLY: + ok = (nr >= 3); + assert(ok); + break; + default: + assert(0); + ok = 0; + break; + } + + return ok; +} + +#endif diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c new file mode 100644 index 0000000000..c09c10601b --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -0,0 +1,299 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "util/u_memory.h" + +#include "i915_state_inlines.h" +#include "i915_context.h" +#include "i915_reg.h" +#include "i915_state.h" + + +/* + * A note about min_lod & max_lod. + * + * There is a circular dependancy between the sampler state + * and the map state to be submitted to hw. + * + * Two condition must be meet: + * min_lod =< max_lod == true + * max_lod =< last_level == true + * + * + * This is all fine and dandy if it where for the fact that max_lod + * is set on the map state instead of the sampler state. That is + * the max_lod we submit on map is: + * max_lod = MIN2(last_level, max_lod); + * + * So we need to update the map state when we change samplers and + * we need to be change the sampler state when map state is changed. + * The first part is done by calling i915_update_texture in + * i915_update_samplers and the second part is done else where in + * code tracking the state changes. + */ + +static void +i915_update_texture(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, + uint state[6]); +/** + * Compute i915 texture sampling state. + * + * Recalculate all state from scratch. Perhaps not the most + * efficient, but this has gotten complex enough that we need + * something which is understandable and reliable. + * \param state returns the 3 words of compute state + */ +static void update_sampler(struct i915_context *i915, + uint unit, + const struct i915_sampler_state *sampler, + const struct i915_texture *tex, + unsigned state[3] ) +{ + const struct pipe_texture *pt = &tex->base; + unsigned minlod, lastlod; + + /* Need to do this after updating the maps, which call the + * intel_finalize_mipmap_tree and hence can update firstLevel: + */ + state[0] = sampler->state[0]; + state[1] = sampler->state[1]; + state[2] = sampler->state[2]; + + if (pt->format == PIPE_FORMAT_YCBCR || + pt->format == PIPE_FORMAT_YCBCR_REV) + state[0] |= SS2_COLORSPACE_CONVERSION; + + /* 3D textures don't seem to respect the border color. + * Fallback if there's ever a danger that they might refer to + * it. + * + * Effectively this means fallback on 3D clamp or + * clamp_to_border. + * + * XXX: Check if this is true on i945. + * XXX: Check if this bug got fixed in release silicon. + */ +#if 0 + { + const unsigned ws = sampler->templ->wrap_s; + const unsigned wt = sampler->templ->wrap_t; + const unsigned wr = sampler->templ->wrap_r; + if (pt->target == PIPE_TEXTURE_3D && + (sampler->templ->min_img_filter != PIPE_TEX_FILTER_NEAREST || + sampler->templ->mag_img_filter != PIPE_TEX_FILTER_NEAREST) && + (ws == PIPE_TEX_WRAP_CLAMP || + wt == PIPE_TEX_WRAP_CLAMP || + wr == PIPE_TEX_WRAP_CLAMP || + ws == PIPE_TEX_WRAP_CLAMP_TO_BORDER || + wt == PIPE_TEX_WRAP_CLAMP_TO_BORDER || + wr == PIPE_TEX_WRAP_CLAMP_TO_BORDER)) { + if (i915->strict_conformance) { + assert(0); + /* sampler->fallback = true; */ + /* TODO */ + } + } + } +#endif + + /* See note at the top of file */ + minlod = sampler->minlod; + lastlod = pt->last_level << 4; + + if (lastlod < minlod) { + minlod = lastlod; + } + + state[1] |= (sampler->minlod << SS3_MIN_LOD_SHIFT); + state[1] |= (unit << SS3_TEXTUREMAP_INDEX_SHIFT); +} + + +void i915_update_samplers( struct i915_context *i915 ) +{ + uint unit; + + i915->current.sampler_enable_nr = 0; + i915->current.sampler_enable_flags = 0x0; + + for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; + unit++) { + /* determine unit enable/disable by looking for a bound texture */ + /* could also examine the fragment program? */ + if (i915->texture[unit]) { + update_sampler( i915, + unit, + i915->sampler[unit], /* sampler state */ + i915->texture[unit], /* texture */ + i915->current.sampler[unit] /* the result */ + ); + i915_update_texture( i915, + unit, + i915->texture[unit], /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit] ); + + i915->current.sampler_enable_nr++; + i915->current.sampler_enable_flags |= (1 << unit); + } + } + + i915->hardware_dirty |= I915_HW_SAMPLER | I915_HW_MAP; +} + + +static uint +translate_texture_format(enum pipe_format pipeFormat) +{ + switch (pipeFormat) { + case PIPE_FORMAT_L8_UNORM: + return MAPSURF_8BIT | MT_8BIT_L8; + case PIPE_FORMAT_I8_UNORM: + return MAPSURF_8BIT | MT_8BIT_I8; + case PIPE_FORMAT_A8_UNORM: + return MAPSURF_8BIT | MT_8BIT_A8; + case PIPE_FORMAT_A8L8_UNORM: + return MAPSURF_16BIT | MT_16BIT_AY88; + case PIPE_FORMAT_R5G6B5_UNORM: + return MAPSURF_16BIT | MT_16BIT_RGB565; + case PIPE_FORMAT_A1R5G5B5_UNORM: + return MAPSURF_16BIT | MT_16BIT_ARGB1555; + case PIPE_FORMAT_A4R4G4B4_UNORM: + return MAPSURF_16BIT | MT_16BIT_ARGB4444; + case PIPE_FORMAT_A8R8G8B8_UNORM: + return MAPSURF_32BIT | MT_32BIT_ARGB8888; + case PIPE_FORMAT_YCBCR_REV: + return (MAPSURF_422 | MT_422_YCRCB_NORMAL); + case PIPE_FORMAT_YCBCR: + return (MAPSURF_422 | MT_422_YCRCB_SWAPY); +#if 0 + case PIPE_FORMAT_RGB_FXT1: + case PIPE_FORMAT_RGBA_FXT1: + return (MAPSURF_COMPRESSED | MT_COMPRESS_FXT1); +#endif + case PIPE_FORMAT_Z16_UNORM: + return (MAPSURF_16BIT | MT_16BIT_L16); +#if 0 + case PIPE_FORMAT_RGBA_DXT1: + case PIPE_FORMAT_RGB_DXT1: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT1); + case PIPE_FORMAT_RGBA_DXT3: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT2_3); + case PIPE_FORMAT_RGBA_DXT5: + return (MAPSURF_COMPRESSED | MT_COMPRESS_DXT4_5); +#endif + case PIPE_FORMAT_S8Z24_UNORM: + return (MAPSURF_32BIT | MT_32BIT_xI824); + default: + debug_printf("i915: translate_texture_format() bad image format %x\n", + pipeFormat); + assert(0); + return 0; + } +} + + +static void +i915_update_texture(struct i915_context *i915, + uint unit, + const struct i915_texture *tex, + const struct i915_sampler_state *sampler, + uint state[6]) +{ + const struct pipe_texture *pt = &tex->base; + uint format, pitch; + const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + const uint num_levels = pt->last_level; + unsigned max_lod = num_levels * 4; + unsigned tiled = MS3_USE_FENCE_REGS; + + assert(tex); + assert(width); + assert(height); + assert(depth); + + format = translate_texture_format(pt->format); + pitch = tex->stride; + + assert(format); + assert(pitch); + + if (tex->tiled) { + assert(!((pitch - 1) & pitch)); + tiled = MS3_TILED_SURFACE; + } + + /* MS3 state */ + state[0] = + (((height - 1) << MS3_HEIGHT_SHIFT) + | ((width - 1) << MS3_WIDTH_SHIFT) + | format + | tiled); + + /* + * XXX When min_filter != mag_filter and there's just one mipmap level, + * set max_lod = 1 to make sure i915 chooses between min/mag filtering. + */ + + /* See note at the top of file */ + if (max_lod > (sampler->maxlod >> 2)) + max_lod = sampler->maxlod >> 2; + + /* MS4 state */ + state[1] = + ((((pitch / 4) - 1) << MS4_PITCH_SHIFT) + | MS4_CUBE_FACE_ENA_MASK + | ((max_lod) << MS4_MAX_LOD_SHIFT) + | ((depth - 1) << MS4_VOLUME_DEPTH_SHIFT)); +} + + +void +i915_update_textures(struct i915_context *i915) +{ + uint unit; + + for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; + unit++) { + /* determine unit enable/disable by looking for a bound texture */ + /* could also examine the fragment program? */ + if (i915->texture[unit]) { + i915_update_texture( i915, + unit, + i915->texture[unit], /* texture */ + i915->sampler[unit], /* sampler state */ + i915->current.texbuffer[unit] ); + } + } + + i915->hardware_dirty |= I915_HW_MAP; +} diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c new file mode 100644 index 0000000000..62f1926644 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -0,0 +1,123 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915_context.h" +#include "i915_blit.h" +#include "i915_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_tile.h" +#include "util/u_rect.h" + + +/* Assumes all values are within bounds -- no checking at this level - + * do it higher up if required. + */ +static void +i915_surface_copy(struct pipe_context *pipe, + boolean do_flip, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + assert( dst != src ); + assert( dst->block.size == src->block.size ); + assert( dst->block.width == src->block.height ); + assert( dst->block.height == src->block.height ); + + if (0) { + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + + const void *src_map = pipe->screen->surface_map( pipe->screen, + src, + PIPE_BUFFER_USAGE_CPU_READ ); + + pipe_copy_rect(dst_map, + &dst->block, + dst->stride, + dstx, dsty, + width, height, + src_map, + do_flip ? -(int) src->stride : src->stride, + srcx, do_flip ? height - 1 - srcy : srcy); + + pipe->screen->surface_unmap(pipe->screen, src); + pipe->screen->surface_unmap(pipe->screen, dst); + } + else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); + i915_copy_blit( i915_context(pipe), + do_flip, + dst->block.size, + (short) src->stride, src->buffer, src->offset, + (short) dst->stride, dst->buffer, dst->offset, + (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); + } +} + + +static void +i915_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + if (0) { + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + + pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); + + pipe->screen->surface_unmap(pipe->screen, dst); + } + else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); + i915_fill_blit( i915_context(pipe), + dst->block.size, + (short) dst->stride, + dst->buffer, dst->offset, + (short) dstx, (short) dsty, + (short) width, (short) height, + value ); + } +} + + +void +i915_init_surface_functions(struct i915_context *i915) +{ + i915->pipe.surface_copy = i915_surface_copy; + i915->pipe.surface_fill = i915_surface_fill; +} diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c new file mode 100644 index 0000000000..bd87217063 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -0,0 +1,774 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Michel Dänzer <michel@tungstengraphics.com> + */ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "i915_context.h" +#include "i915_texture.h" +#include "i915_debug.h" +#include "i915_screen.h" + +/* + * Helper function and arrays + */ + +/** + * Initial offset for Cube map. + */ +static const int initial_offsets[6][2] = { + {0, 0}, + {0, 2}, + {1, 0}, + {1, 2}, + {1, 1}, + {1, 3} +}; + +/** + * Step offsets for Cube map. + */ +static const int step_offsets[6][2] = { + {0, 2}, + {0, 2}, + {-1, 2}, + {-1, 2}, + {-1, 1}, + {-1, 1} +}; + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + +static unsigned +power_of_two(unsigned x) +{ + unsigned value = 1; + while (value < x) + value = value << 1; + return value; +} + +static unsigned +round_up(unsigned n, unsigned multiple) +{ + return (n + multiple - 1) & ~(multiple - 1); +} + + +/* + * More advanced helper funcs + */ + + +static void +i915_miptree_set_level_info(struct i915_texture *tex, + unsigned level, + unsigned nr_images, + unsigned w, unsigned h, unsigned d) +{ + struct pipe_texture *pt = &tex->base; + + assert(level < PIPE_MAX_TEXTURE_LEVELS); + + pt->width[level] = w; + pt->height[level] = h; + pt->depth[level] = d; + + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); + + tex->nr_images[level] = nr_images; + + /* + DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + level, w, h, d, x, y, tex->level_offset[level]); + */ + + /* Not sure when this would happen, but anyway: + */ + if (tex->image_offset[level]) { + FREE(tex->image_offset[level]); + tex->image_offset[level] = NULL; + } + + assert(nr_images); + assert(!tex->image_offset[level]); + + tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); + tex->image_offset[level][0] = 0; +} + +static void +i915_miptree_set_image_offset(struct i915_texture *tex, + unsigned level, unsigned img, unsigned x, unsigned y) +{ + if (img == 0 && level == 0) + assert(x == 0 && y == 0); + + assert(img < tex->nr_images[level]); + + tex->image_offset[level][img] = y * tex->stride + x * tex->base.block.size; + + /* + printf("%s level %d img %d pos %d,%d image_offset %x\n", + __FUNCTION__, level, img, x, y, tex->image_offset[level][img]); + */ +} + + +/* + * Layout functions + */ + + +/** + * Special case to deal with display targets. + */ +static boolean +i915_displaytarget_layout(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + + if (pt->last_level > 0 || pt->block.size != 4) + return 0; + + i915_miptree_set_level_info( tex, 0, 1, + tex->base.width[0], + tex->base.height[0], + 1 ); + i915_miptree_set_image_offset( tex, 0, 0, 0, 0 ); + + if (tex->base.width[0] >= 128) { + tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); + tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); + tex->tiled = 1; + } else { + tex->stride = round_up(tex->base.nblocksx[0] * pt->block.size, 64); + tex->total_nblocksy = tex->base.nblocksy[0]; + } + + /* + printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + tex->base.width[0], tex->base.height[0], pt->block.size, + tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); + */ + + return 1; +} + +static void +i945_miptree_layout_2d( struct i915_texture *tex ) +{ + struct pipe_texture *pt = &tex->base; + const int align_x = 2, align_y = 4; + unsigned level; + unsigned x = 0; + unsigned y = 0; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + +#if 0 /* used for tiled display targets */ + if (pt->last_level == 0 && pt->block.size == 4) + if (i915_displaytarget_layout(tex)) + return; +#endif + + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + + /* May need to adjust pitch to accomodate the placement of + * the 2nd mipmap level. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap level out past the width of its parent. + */ + if (pt->last_level > 0) { + unsigned mip1_nblocksx + = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) + + pf_get_nblocksx(&pt->block, minify(minify(width))); + + if (mip1_nblocksx > nblocksx) + tex->stride = mip1_nblocksx * pt->block.size; + } + + /* Pitch must be a whole number of dwords + */ + tex->stride = align(tex->stride, 64); + tex->total_nblocksy = 0; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 1, width, height, 1); + i915_miptree_set_image_offset(tex, level, 0, x, y); + + nblocksy = align(nblocksy, align_y); + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy); + + /* Layout_below: step right after second mipmap level. + */ + if (level == 1) { + x += align(nblocksx, align_x); + } + else { + y += nblocksy; + } + + width = minify(width); + height = minify(height); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + } +} + +static void +i945_miptree_layout_cube(struct i915_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; + + const unsigned nblocks = pt->nblocksx[0]; + unsigned face; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + + /* + printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]); + */ + + assert(width == height); /* cubemap images are square */ + + /* + * XXX Should only be used for compressed formats. But lets + * keep this code active just in case. + * + * Depending on the size of the largest images, pitch can be + * determined either by the old-style packing of cubemap faces, + * or the final row of 4x4, 2x2 and 1x1 faces below this. + */ + if (nblocks > 32) + tex->stride = round_up(nblocks * pt->block.size * 2, 4); + else + tex->stride = 14 * 8 * pt->block.size; + + tex->total_nblocksy = nblocks * 4; + + /* Set all the levels to effectively occupy the whole rectangular region. + */ + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 6, width, height, 1); + width /= 2; + height /= 2; + } + + for (face = 0; face < 6; face++) { + unsigned x = initial_offsets[face][0] * nblocks; + unsigned y = initial_offsets[face][1] * nblocks; + unsigned d = nblocks; + +#if 0 /* Fix and enable this code for compressed formats */ + if (nblocks == 4 && face >= 4) { + y = tex->total_height - 4; + x = (face - 4) * 8; + } + else if (nblocks < 4 && (face > 0)) { + y = tex->total_height - 4; + x = face * 8; + } +#endif + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_image_offset(tex, level, face, x, y); + + d >>= 1; + +#if 0 /* Fix and enable this code for compressed formats */ + switch (d) { + case 4: + switch (face) { + case PIPE_TEX_FACE_POS_X: + case PIPE_TEX_FACE_NEG_X: + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + break; + case PIPE_TEX_FACE_POS_Y: + case PIPE_TEX_FACE_NEG_Y: + y += 12; + x -= 8; + break; + case PIPE_TEX_FACE_POS_Z: + case PIPE_TEX_FACE_NEG_Z: + y = tex->total_height - 4; + x = (face - 4) * 8; + break; + } + case 2: + y = tex->total_height - 4; + x = 16 + face * 8; + break; + + case 1: + x += 48; + break; + default: +#endif + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; +#if 0 + break; + } +#endif + } + } +} + +static boolean +i915_miptree_layout(struct i915_texture * tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: { + const unsigned nblocks = pt->nblocksx[0]; + unsigned face; + unsigned width = pt->width[0], height = pt->height[0]; + + assert(width == height); /* cubemap images are square */ + + /* double pitch for cube layouts */ + tex->stride = round_up(nblocks * pt->block.size * 2, 4); + tex->total_nblocksy = nblocks * 4; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 6, + width, height, + 1); + width /= 2; + height /= 2; + } + + for (face = 0; face < 6; face++) { + unsigned x = initial_offsets[face][0] * nblocks; + unsigned y = initial_offsets[face][1] * nblocks; + unsigned d = nblocks; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_image_offset(tex, level, face, x, y); + d >>= 1; + x += step_offsets[face][0] * d; + y += step_offsets[face][1] * d; + } + } + break; + } + case PIPE_TEXTURE_3D:{ + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + unsigned stack_nblocksy = 0; + + /* Calculate the size of a single slice. + */ + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + + /* XXX: hardware expects/requires 9 levels at minimum. + */ + for (level = 0; level <= MAX2(8, pt->last_level); + level++) { + i915_miptree_set_level_info(tex, level, depth, + width, height, depth); + + + stack_nblocksy += MAX2(2, nblocksy); + + width = minify(width); + height = minify(height); + depth = minify(depth); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + } + + /* Fixup depth image_offsets: + */ + depth = pt->depth[0]; + for (level = 0; level <= pt->last_level; level++) { + unsigned i; + for (i = 0; i < depth; i++) + i915_miptree_set_image_offset(tex, level, i, + 0, i * stack_nblocksy); + + depth = minify(depth); + } + + + /* Multiply slice size by texture depth for total size. It's + * remarkable how wasteful of memory the i915 texture layouts + * are. They are largely fixed in the i945. + */ + tex->total_nblocksy = stack_nblocksy * pt->depth[0]; + break; + } + + default:{ + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->total_nblocksy = 0; + + for (level = 0; level <= pt->last_level; level++) { + i915_miptree_set_level_info(tex, level, 1, + width, height, 1); + i915_miptree_set_image_offset(tex, level, 0, + 0, tex->total_nblocksy); + + nblocksy = round_up(MAX2(2, nblocksy), 2); + + tex->total_nblocksy += nblocksy; + + width = minify(width); + height = minify(height); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + } + break; + } + } + /* + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + tex->pitch, + tex->total_nblocksy, pt->block.size, tex->stride * tex->total_nblocksy); + */ + + return TRUE; +} + + +static boolean +i945_miptree_layout(struct i915_texture * tex) +{ + struct pipe_texture *pt = &tex->base; + unsigned level; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + i945_miptree_layout_cube(tex); + break; + case PIPE_TEXTURE_3D:{ + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + unsigned pack_x_pitch, pack_x_nr; + unsigned pack_y_pitch; + + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->total_nblocksy = 0; + + pack_y_pitch = MAX2(pt->nblocksy[0], 2); + pack_x_pitch = tex->stride / pt->block.size; + pack_x_nr = 1; + + for (level = 0; level <= pt->last_level; level++) { + unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6; + int x = 0; + int y = 0; + unsigned q, j; + + i915_miptree_set_level_info(tex, level, nr_images, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + i915_miptree_set_image_offset(tex, level, q, x, y + tex->total_nblocksy); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + tex->total_nblocksy += y; + + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + } + + width = minify(width); + height = minify(height); + depth = minify(depth); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + } + break; + } + + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: +// case PIPE_TEXTURE_RECTANGLE: + i945_miptree_layout_2d(tex); + break; + default: + assert(0); + return FALSE; + } + + /* + DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + tex->pitch, + tex->total_nblocksy, pt->block.size, tex->stride * tex->total_nblocksy); + */ + + return TRUE; +} + + +static struct pipe_texture * +i915_texture_create(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct i915_screen *i915screen = i915_screen(screen); + struct pipe_winsys *ws = screen->winsys; + struct i915_texture *tex = CALLOC_STRUCT(i915_texture); + size_t tex_size; + + if (!tex) + return NULL; + + tex->base = *templat; + tex->base.refcount = 1; + tex->base.screen = screen; + + tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); + tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); + + if (i915screen->is_i945) { + if (!i945_miptree_layout(tex)) + goto fail; + } else { + if (!i915_miptree_layout(tex)) + goto fail; + } + + tex_size = tex->stride * tex->total_nblocksy; + + tex->buffer = ws->buffer_create(ws, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex_size); + + if (!tex->buffer) + goto fail; + +#if 0 + void *ptr = ws->buffer_map(ws, tex->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE); + memset(ptr, 0x80, tex_size); + ws->buffer_unmap(ws, tex->buffer); +#endif + + return &tex->base; + +fail: + FREE(tex); + return NULL; +} + + +static void +i915_texture_release(struct pipe_screen *screen, + struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct i915_texture *tex = (struct i915_texture *)*pt; + uint i; + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); + */ + + pipe_buffer_reference(screen, &tex->buffer, NULL); + + for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) + if (tex->image_offset[i]) + FREE(tex->image_offset[i]); + + FREE(tex); + } + *pt = NULL; +} + +static struct pipe_surface * +i915_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct i915_texture *tex = (struct i915_texture *)pt; + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface *ps; + unsigned offset; /* in bytes */ + + if (pt->target == PIPE_TEXTURE_CUBE) { + offset = tex->image_offset[level][face]; + } + else if (pt->target == PIPE_TEXTURE_3D) { + offset = tex->image_offset[level][zslice]; + } + else { + offset = tex->image_offset[level][0]; + assert(face == 0); + assert(zslice == 0); + } + + ps = CALLOC_STRUCT(pipe_surface); + if (ps) { + ps->refcount = 1; + ps->winsys = ws; + pipe_texture_reference(&ps->texture, pt); + pipe_buffer_reference(screen, &ps->buffer, tex->buffer); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = tex->stride; + ps->offset = offset; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; + } + return ps; +} + +static struct pipe_texture * +i915_texture_blanket(struct pipe_screen * screen, + const struct pipe_texture *base, + const unsigned *stride, + struct pipe_buffer *buffer) +{ + struct i915_texture *tex; + assert(screen); + + /* Only supports one type */ + if (base->target != PIPE_TEXTURE_2D || + base->last_level != 0 || + base->depth[0] != 1) { + return NULL; + } + + tex = CALLOC_STRUCT(i915_texture); + if (!tex) + return NULL; + + tex->base = *base; + tex->base.refcount = 1; + tex->base.screen = screen; + + tex->stride = stride[0]; + + i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); + i915_miptree_set_image_offset(tex, 0, 0, 0, 0); + + pipe_buffer_reference(screen, &tex->buffer, buffer); + + return &tex->base; +} + +void +i915_init_texture_functions(struct i915_context *i915) +{ +// i915->pipe.texture_update = i915_texture_update; +} + +static void +i915_tex_surface_release(struct pipe_screen *screen, + struct pipe_surface **surface) +{ + struct pipe_surface *surf = *surface; + + if (--surf->refcount == 0) { + + /* This really should not be possible, but it's actually + * happening quite a bit... Will fix. + */ + if (surf->status == PIPE_SURFACE_STATUS_CLEAR) { + debug_printf("XXX destroying a surface with pending clears...\n"); + assert(0); + } + + pipe_texture_reference(&surf->texture, NULL); + pipe_buffer_reference(screen, &surf->buffer, NULL); + FREE(surf); + } + + *surface = NULL; +} + +void +i915_init_screen_texture_functions(struct pipe_screen *screen) +{ + screen->texture_create = i915_texture_create; + screen->texture_release = i915_texture_release; + screen->get_tex_surface = i915_get_tex_surface; + screen->texture_blanket = i915_texture_blanket; + screen->tex_surface_release = i915_tex_surface_release; +} diff --git a/src/gallium/drivers/i915simple/i915_texture.h b/src/gallium/drivers/i915simple/i915_texture.h new file mode 100644 index 0000000000..7225016a9f --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_texture.h @@ -0,0 +1,43 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef I915_TEXTURE_H +#define I915_TEXTURE_H + +struct i915_context; +struct pipe_screen; + + +extern void +i915_init_texture_functions(struct i915_context *i915); + + +extern void +i915_init_screen_texture_functions(struct pipe_screen *screen); + + +#endif /* I915_TEXTURE_H */ diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h new file mode 100644 index 0000000000..81904c2a74 --- /dev/null +++ b/src/gallium/drivers/i915simple/i915_winsys.h @@ -0,0 +1,121 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * This is the interface that i915simple requires any window system + * hosting it to implement. This is the only include file in i915simple + * which is public. + * + */ + +#ifndef I915_WINSYS_H +#define I915_WINSYS_H + + +#include "pipe/p_defines.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Pipe drivers are (meant to be!) independent of both GL and the + * window system. The window system provides a buffer manager and a + * set of additional hooks for things like command buffer submission, + * etc. + * + * There clearly has to be some agreement between the window system + * driver and the hardware driver about the format of command buffers, + * etc. + */ + +struct i915_batchbuffer; +struct pipe_buffer; +struct pipe_fence_handle; +struct pipe_winsys; +struct pipe_screen; + + +/** + * Additional winsys interface for i915simple. + * + * It is an over-simple batchbuffer mechanism. Will want to improve the + * performance of this, perhaps based on the cmdstream stuff. It + * would be pretty impossible to implement swz on top of this + * interface. + * + * Will also need additions/changes to implement static/dynamic + * indirect state. + */ +struct i915_winsys { + + void (*destroy)( struct i915_winsys *sws ); + + /** + * Get the current batch buffer from the winsys. + */ + struct i915_batchbuffer *(*batch_get)( struct i915_winsys *sws ); + + /** + * Emit a relocation to a buffer. + * + * Used not only when the buffer addresses are not pinned, but also to + * ensure refered buffers will not be destroyed until the current batch + * buffer execution is finished. + * + * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and + * I915_BUFFER_ACCESS_READ macros. + */ + void (*batch_reloc)( struct i915_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags, + unsigned delta ); + + /** + * Flush the batch. + */ + void (*batch_flush)( struct i915_winsys *sws, + struct pipe_fence_handle **fence ); +}; + +#define I915_BUFFER_ACCESS_WRITE 0x1 +#define I915_BUFFER_ACCESS_READ 0x2 + +#define I915_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) + + +struct pipe_context *i915_create_context( struct pipe_screen *, + struct pipe_winsys *, + struct i915_winsys * ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile new file mode 100644 index 0000000000..e97146e57c --- /dev/null +++ b/src/gallium/drivers/i965simple/Makefile @@ -0,0 +1,54 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = i965simple + +C_SOURCES = \ + brw_blit.c \ + brw_flush.c \ + brw_screen.c \ + brw_surface.c \ + brw_cc.c \ + brw_clip.c \ + brw_clip_line.c \ + brw_clip_point.c \ + brw_clip_state.c \ + brw_clip_tri.c \ + brw_clip_util.c \ + brw_context.c \ + brw_curbe.c \ + brw_draw.c \ + brw_draw_upload.c \ + brw_eu.c \ + brw_eu_debug.c \ + brw_eu_emit.c \ + brw_eu_util.c \ + brw_gs.c \ + brw_gs_emit.c \ + brw_gs_state.c \ + brw_misc_state.c \ + brw_sf.c \ + brw_sf_emit.c \ + brw_sf_state.c \ + brw_state.c \ + brw_state_batch.c \ + brw_state_cache.c \ + brw_state_pool.c \ + brw_state_upload.c \ + brw_tex_layout.c \ + brw_urb.c \ + brw_util.c \ + brw_vs.c \ + brw_vs_emit.c \ + brw_vs_state.c \ + brw_wm.c \ + brw_wm_iz.c \ + brw_wm_decl.c \ + brw_wm_glsl.c \ + brw_wm_sampler_state.c \ + brw_wm_state.c \ + brw_wm_surface_state.c + +include ../../Makefile.template + +symlinks: diff --git a/src/gallium/drivers/i965simple/SConscript b/src/gallium/drivers/i965simple/SConscript new file mode 100644 index 0000000000..43fc2a4005 --- /dev/null +++ b/src/gallium/drivers/i965simple/SConscript @@ -0,0 +1,54 @@ +Import('*') + +env = env.Clone() + +i965simple = env.ConvenienceLibrary( + target = 'i965simple', + source = [ + 'brw_blit.c', + 'brw_cc.c', + 'brw_clip.c', + 'brw_clip_line.c', + 'brw_clip_point.c', + 'brw_clip_state.c', + 'brw_clip_tri.c', + 'brw_clip_util.c', + 'brw_context.c', + 'brw_curbe.c', + 'brw_draw.c', + 'brw_draw_upload.c', + 'brw_eu.c', + 'brw_eu_debug.c', + 'brw_eu_emit.c', + 'brw_eu_util.c', + 'brw_flush.c', + 'brw_gs.c', + 'brw_gs_emit.c', + 'brw_gs_state.c', + 'brw_misc_state.c', + 'brw_screen.c', + 'brw_sf.c', + 'brw_sf_emit.c', + 'brw_sf_state.c', + 'brw_state.c', + 'brw_state_batch.c', + 'brw_state_cache.c', + 'brw_state_pool.c', + 'brw_state_upload.c', + 'brw_surface.c', + 'brw_tex_layout.c', + 'brw_urb.c', + 'brw_util.c', + 'brw_vs.c', + 'brw_vs_emit.c', + 'brw_vs_state.c', + 'brw_wm.c', + 'brw_wm_decl.c', + 'brw_wm_glsl.c', + 'brw_wm_iz.c', + 'brw_wm_sampler_state.c', + 'brw_wm_state.c', + 'brw_wm_surface_state.c', + ]) + +Export('i965simple') diff --git a/src/gallium/drivers/i965simple/brw_batch.h b/src/gallium/drivers/i965simple/brw_batch.h new file mode 100644 index 0000000000..5f5932a488 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_batch.h @@ -0,0 +1,59 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_BATCH_H +#define BRW_BATCH_H + +#include "brw_winsys.h" + +#define BATCH_LOCALS + +#define INTEL_BATCH_NO_CLIPRECTS 0x1 +#define INTEL_BATCH_CLIPRECTS 0x2 + +#define BEGIN_BATCH( dwords, relocs ) \ + brw->winsys->batch_start(brw->winsys, dwords, relocs) + +#define OUT_BATCH( dword ) \ + brw->winsys->batch_dword(brw->winsys, dword) + +#define OUT_RELOC( buf, flags, delta ) \ + brw->winsys->batch_reloc(brw->winsys, buf, flags, delta) + +#define ADVANCE_BATCH() \ + brw->winsys->batch_end( brw->winsys ) + +/* XXX: this is bogus - need proper handling for out-of-memory in batchbuffer. + */ +#define FLUSH_BATCH(fence) do { \ + brw->winsys->batch_flush(brw->winsys, fence); \ + brw->hardware_dirty = ~0; \ +} while (0) + +#define BRW_BATCH_STRUCT(brw, s) brw_batchbuffer_data( brw->winsys, (s), sizeof(*(s))) + +#endif diff --git a/src/gallium/drivers/i965simple/brw_blit.c b/src/gallium/drivers/i965simple/brw_blit.c new file mode 100644 index 0000000000..8494f70493 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_blit.c @@ -0,0 +1,218 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include <stdio.h> +#include <errno.h> + +#include "brw_batch.h" +#include "brw_blit.h" +#include "brw_context.h" +#include "brw_reg.h" + +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +#define FILE_DEBUG_FLAG DEBUG_BLIT + +void brw_fill_blit(struct brw_context *brw, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short x, short y, + short w, short h, + unsigned color) +{ + unsigned BR13, CMD; + BATCH_LOCALS; + + dst_pitch *= cpp; + + switch(cpp) { + case 1: + case 2: + case 3: + BR13 = (0xF0 << 16) | (1<<24); + CMD = XY_COLOR_BLT_CMD; + break; + case 4: + BR13 = (0xF0 << 16) | (1<<24) | (1<<25); + CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + return; + } + + if (dst_tiled) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } + + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( dst_pitch | BR13 ); + OUT_BATCH( (y << 16) | x ); + OUT_BATCH( ((y+h) << 16) | (x+w) ); + OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, dst_offset ); + OUT_BATCH( color ); + ADVANCE_BATCH(); +} + +static unsigned translate_raster_op(unsigned logicop) +{ + switch(logicop) { + case PIPE_LOGICOP_CLEAR: return 0x00; + case PIPE_LOGICOP_AND: return 0x88; + case PIPE_LOGICOP_AND_REVERSE: return 0x44; + case PIPE_LOGICOP_COPY: return 0xCC; + case PIPE_LOGICOP_AND_INVERTED: return 0x22; + case PIPE_LOGICOP_NOOP: return 0xAA; + case PIPE_LOGICOP_XOR: return 0x66; + case PIPE_LOGICOP_OR: return 0xEE; + case PIPE_LOGICOP_NOR: return 0x11; + case PIPE_LOGICOP_EQUIV: return 0x99; + case PIPE_LOGICOP_INVERT: return 0x55; + case PIPE_LOGICOP_OR_REVERSE: return 0xDD; + case PIPE_LOGICOP_COPY_INVERTED: return 0x33; + case PIPE_LOGICOP_OR_INVERTED: return 0xBB; + case PIPE_LOGICOP_NAND: return 0x77; + case PIPE_LOGICOP_SET: return 0xFF; + default: return 0; + } +} + + +/* Copy BitBlt + */ +void brw_copy_blit(struct brw_context *brw, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + boolean src_tiled, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short src_x, short src_y, + short dst_x, short dst_y, + short w, short h, + unsigned logic_op) +{ + unsigned CMD, BR13; + int dst_y2 = dst_y + h; + int dst_x2 = dst_x + w; + BATCH_LOCALS; + + + DBG("%s src:buf(%d)/%d %d,%d dst:buf(%d)/%d %d,%d sz:%dx%d op:%d\n", + __FUNCTION__, + src_buffer, src_pitch, src_x, src_y, + dst_buffer, dst_pitch, dst_x, dst_y, + w,h,logic_op); + + assert( logic_op - PIPE_LOGICOP_CLEAR >= 0 ); + assert( logic_op - PIPE_LOGICOP_CLEAR < 0x10 ); + + src_pitch *= cpp; + dst_pitch *= cpp; + + switch(cpp) { + case 1: + case 2: + case 3: + BR13 = (translate_raster_op(logic_op) << 16) | (1<<24); + CMD = XY_SRC_COPY_BLT_CMD; + break; + case 4: + BR13 = (translate_raster_op(logic_op) << 16) | (1<<24) | + (1<<25); + CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; + break; + default: + return; + } + + if (src_tiled) { + CMD |= XY_SRC_TILED; + src_pitch /= 4; + } + + if (dst_tiled) { + CMD |= XY_DST_TILED; + dst_pitch /= 4; + } + + if (dst_y2 < dst_y || + dst_x2 < dst_x) { + return; + } + + dst_pitch &= 0xffff; + src_pitch &= 0xffff; + + /* Initial y values don't seem to work with negative pitches. If + * we adjust the offsets manually (below), it seems to work fine. + * + * On the other hand, if we always adjust, the hardware doesn't + * know which blit directions to use, so overlapping copypixels get + * the wrong result. + */ + if (dst_pitch > 0 && src_pitch > 0) { + BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( dst_pitch | BR13 ); + OUT_BATCH( (dst_y << 16) | dst_x ); + OUT_BATCH( (dst_y2 << 16) | dst_x2 ); + OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, + dst_offset ); + OUT_BATCH( (src_y << 16) | src_x ); + OUT_BATCH( src_pitch ); + OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ, + src_offset ); + ADVANCE_BATCH(); + } + else { + BEGIN_BATCH(8, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH( CMD ); + OUT_BATCH( (dst_pitch & 0xffff) | BR13 ); + OUT_BATCH( (0 << 16) | dst_x ); + OUT_BATCH( (h << 16) | dst_x2 ); + OUT_RELOC( dst_buffer, BRW_BUFFER_ACCESS_WRITE, + dst_offset + dst_y * dst_pitch ); + OUT_BATCH( (src_pitch & 0xffff) ); + OUT_RELOC( src_buffer, BRW_BUFFER_ACCESS_READ, + src_offset + src_y * src_pitch ); + ADVANCE_BATCH(); + } +} + + + diff --git a/src/gallium/drivers/i965simple/brw_blit.h b/src/gallium/drivers/i965simple/brw_blit.h new file mode 100644 index 0000000000..111c5d91d3 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_blit.h @@ -0,0 +1,33 @@ +#ifndef BRW_BLIT_H +#define BRW_BLIT_H + +#include "pipe/p_compiler.h" + +struct pipe_buffer; +struct brw_context; + +void brw_fill_blit(struct brw_context *intel, + unsigned cpp, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short x, short y, + short w, short h, + unsigned color); +void brw_copy_blit(struct brw_context *intel, + unsigned do_flip, + unsigned cpp, + short src_pitch, + struct pipe_buffer *src_buffer, + unsigned src_offset, + boolean src_tiled, + short dst_pitch, + struct pipe_buffer *dst_buffer, + unsigned dst_offset, + boolean dst_tiled, + short src_x, short src_y, + short dst_x, short dst_y, + short w, short h, + unsigned logic_op); +#endif diff --git a/src/gallium/drivers/i965simple/brw_cc.c b/src/gallium/drivers/i965simple/brw_cc.c new file mode 100644 index 0000000000..79d4150383 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_cc.c @@ -0,0 +1,269 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_util.h" + + +static int brw_translate_compare_func(int func) +{ + switch(func) { + case PIPE_FUNC_NEVER: + return BRW_COMPAREFUNCTION_NEVER; + case PIPE_FUNC_LESS: + return BRW_COMPAREFUNCTION_LESS; + case PIPE_FUNC_LEQUAL: + return BRW_COMPAREFUNCTION_LEQUAL; + case PIPE_FUNC_GREATER: + return BRW_COMPAREFUNCTION_GREATER; + case PIPE_FUNC_GEQUAL: + return BRW_COMPAREFUNCTION_GEQUAL; + case PIPE_FUNC_NOTEQUAL: + return BRW_COMPAREFUNCTION_NOTEQUAL; + case PIPE_FUNC_EQUAL: + return BRW_COMPAREFUNCTION_EQUAL; + case PIPE_FUNC_ALWAYS: + return BRW_COMPAREFUNCTION_ALWAYS; + } + + debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); + return BRW_COMPAREFUNCTION_ALWAYS; +} + +static int brw_translate_stencil_op(int op) +{ + switch(op) { + case PIPE_STENCIL_OP_KEEP: + return BRW_STENCILOP_KEEP; + case PIPE_STENCIL_OP_ZERO: + return BRW_STENCILOP_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return BRW_STENCILOP_REPLACE; + case PIPE_STENCIL_OP_INCR: + return BRW_STENCILOP_INCRSAT; + case PIPE_STENCIL_OP_DECR: + return BRW_STENCILOP_DECRSAT; + case PIPE_STENCIL_OP_INCR_WRAP: + return BRW_STENCILOP_INCR; + case PIPE_STENCIL_OP_DECR_WRAP: + return BRW_STENCILOP_DECR; + case PIPE_STENCIL_OP_INVERT: + return BRW_STENCILOP_INVERT; + default: + return BRW_STENCILOP_ZERO; + } +} + + +static int brw_translate_logic_op(int opcode) +{ + switch(opcode) { + case PIPE_LOGICOP_CLEAR: + return BRW_LOGICOPFUNCTION_CLEAR; + case PIPE_LOGICOP_AND: + return BRW_LOGICOPFUNCTION_AND; + case PIPE_LOGICOP_AND_REVERSE: + return BRW_LOGICOPFUNCTION_AND_REVERSE; + case PIPE_LOGICOP_COPY: + return BRW_LOGICOPFUNCTION_COPY; + case PIPE_LOGICOP_COPY_INVERTED: + return BRW_LOGICOPFUNCTION_COPY_INVERTED; + case PIPE_LOGICOP_AND_INVERTED: + return BRW_LOGICOPFUNCTION_AND_INVERTED; + case PIPE_LOGICOP_NOOP: + return BRW_LOGICOPFUNCTION_NOOP; + case PIPE_LOGICOP_XOR: + return BRW_LOGICOPFUNCTION_XOR; + case PIPE_LOGICOP_OR: + return BRW_LOGICOPFUNCTION_OR; + case PIPE_LOGICOP_OR_INVERTED: + return BRW_LOGICOPFUNCTION_OR_INVERTED; + case PIPE_LOGICOP_NOR: + return BRW_LOGICOPFUNCTION_NOR; + case PIPE_LOGICOP_EQUIV: + return BRW_LOGICOPFUNCTION_EQUIV; + case PIPE_LOGICOP_INVERT: + return BRW_LOGICOPFUNCTION_INVERT; + case PIPE_LOGICOP_OR_REVERSE: + return BRW_LOGICOPFUNCTION_OR_REVERSE; + case PIPE_LOGICOP_NAND: + return BRW_LOGICOPFUNCTION_NAND; + case PIPE_LOGICOP_SET: + return BRW_LOGICOPFUNCTION_SET; + default: + return BRW_LOGICOPFUNCTION_SET; + } +} + + +static void upload_cc_vp( struct brw_context *brw ) +{ + struct brw_cc_viewport ccv; + + memset(&ccv, 0, sizeof(ccv)); + + ccv.min_depth = 0.0; + ccv.max_depth = 1.0; + + brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv ); +} + +const struct brw_tracked_state brw_cc_vp = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_cc_vp +}; + + +static void upload_cc_unit( struct brw_context *brw ) +{ + struct brw_cc_unit_state cc; + + memset(&cc, 0, sizeof(cc)); + + /* BRW_NEW_DEPTH_STENCIL */ + if (brw->attribs.DepthStencil->stencil[0].enabled) { + cc.cc0.stencil_enable = brw->attribs.DepthStencil->stencil[0].enabled; + cc.cc0.stencil_func = brw_translate_compare_func(brw->attribs.DepthStencil->stencil[0].func); + cc.cc0.stencil_fail_op = brw_translate_stencil_op(brw->attribs.DepthStencil->stencil[0].fail_op); + cc.cc0.stencil_pass_depth_fail_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[0].zfail_op); + cc.cc0.stencil_pass_depth_pass_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[0].zpass_op); + cc.cc1.stencil_ref = brw->attribs.DepthStencil->stencil[0].ref_value; + cc.cc1.stencil_write_mask = brw->attribs.DepthStencil->stencil[0].write_mask; + cc.cc1.stencil_test_mask = brw->attribs.DepthStencil->stencil[0].value_mask; + + if (brw->attribs.DepthStencil->stencil[1].enabled) { + cc.cc0.bf_stencil_enable = brw->attribs.DepthStencil->stencil[1].enabled; + cc.cc0.bf_stencil_func = brw_translate_compare_func( + brw->attribs.DepthStencil->stencil[1].func); + cc.cc0.bf_stencil_fail_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[1].fail_op); + cc.cc0.bf_stencil_pass_depth_fail_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[1].zfail_op); + cc.cc0.bf_stencil_pass_depth_pass_op = brw_translate_stencil_op( + brw->attribs.DepthStencil->stencil[1].zpass_op); + cc.cc1.bf_stencil_ref = brw->attribs.DepthStencil->stencil[1].ref_value; + cc.cc2.bf_stencil_write_mask = brw->attribs.DepthStencil->stencil[1].write_mask; + cc.cc2.bf_stencil_test_mask = brw->attribs.DepthStencil->stencil[1].value_mask; + } + + /* Not really sure about this: + */ + if (brw->attribs.DepthStencil->stencil[0].write_mask || + brw->attribs.DepthStencil->stencil[1].write_mask) + cc.cc0.stencil_write_enable = 1; + } + + /* BRW_NEW_BLEND */ + if (brw->attribs.Blend->logicop_enable) { + cc.cc2.logicop_enable = 1; + cc.cc5.logicop_func = brw_translate_logic_op( brw->attribs.Blend->logicop_func ); + } + else if (brw->attribs.Blend->blend_enable) { + int eqRGB = brw->attribs.Blend->rgb_func; + int eqA = brw->attribs.Blend->alpha_func; + int srcRGB = brw->attribs.Blend->rgb_src_factor; + int dstRGB = brw->attribs.Blend->rgb_dst_factor; + int srcA = brw->attribs.Blend->alpha_src_factor; + int dstA = brw->attribs.Blend->alpha_dst_factor; + + if (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX) { + srcRGB = dstRGB = PIPE_BLENDFACTOR_ONE; + } + + if (eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX) { + srcA = dstA = PIPE_BLENDFACTOR_ONE; + } + + cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); + cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); + cc.cc6.blend_function = brw_translate_blend_equation( eqRGB ); + + cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); + cc.cc5.ia_blend_function = brw_translate_blend_equation( eqA ); + + cc.cc3.blend_enable = 1; + cc.cc3.ia_blend_enable = (srcA != srcRGB || + dstA != dstRGB || + eqA != eqRGB); + } + + /* BRW_NEW_ALPHATEST + */ + if (brw->attribs.DepthStencil->alpha.enabled) { + cc.cc3.alpha_test = 1; + cc.cc3.alpha_test_func = + brw_translate_compare_func(brw->attribs.DepthStencil->alpha.func); + + cc.cc7.alpha_ref.ub[0] = float_to_ubyte(brw->attribs.DepthStencil->alpha.ref); + + cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + } + + if (brw->attribs.Blend->dither) { + cc.cc5.dither_enable = 1; + cc.cc6.y_dither_offset = 0; + cc.cc6.x_dither_offset = 0; + } + + if (brw->attribs.DepthStencil->depth.enabled) { + cc.cc2.depth_test = brw->attribs.DepthStencil->depth.enabled; + cc.cc2.depth_test_function = brw_translate_compare_func(brw->attribs.DepthStencil->depth.func); + cc.cc2.depth_write_enable = brw->attribs.DepthStencil->depth.writemask; + } + + /* CACHE_NEW_CC_VP */ + cc.cc4.cc_viewport_state_offset = brw->cc.vp_gs_offset >> 5; + + if (BRW_DEBUG & DEBUG_STATS) + cc.cc5.statistics_enable = 1; + + brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc ); +} + +const struct brw_tracked_state brw_cc_unit = { + .dirty = { + .brw = BRW_NEW_DEPTH_STENCIL | BRW_NEW_BLEND | BRW_NEW_ALPHA_TEST, + .cache = CACHE_NEW_CC_VP + }, + .update = upload_cc_unit +}; + diff --git a/src/gallium/drivers/i965simple/brw_clip.c b/src/gallium/drivers/i965simple/brw_clip.c new file mode 100644 index 0000000000..268124cc53 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip.c @@ -0,0 +1,206 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_clip.h" + +#define FRONT_UNFILLED_BIT 0x1 +#define BACK_UNFILLED_BIT 0x2 + + +static void compile_clip_prog( struct brw_context *brw, + struct brw_clip_prog_key *key ) +{ + struct brw_clip_compile c; + const unsigned *program; + unsigned program_size; + unsigned delta; + unsigned i; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + c.func.single_program_flow = 1; + + c.key = *key; + + + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.header_position_offset = ATTR_SIZE; + + for (i = 0, delta = REG_SIZE; i < PIPE_MAX_SHADER_OUTPUTS; i++) + if (c.key.attrs & (1<<i)) { + c.offset[i] = delta; + delta += ATTR_SIZE; + } + + c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; + + c.prog_data.clip_mode = c.key.clip_mode; /* XXX */ + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + + /* Would ideally have the option of producing a program which could + * do all three: + */ + switch (key->primitive) { + case PIPE_PRIM_TRIANGLES: +#if 0 + if (key->do_unfilled) + brw_emit_unfilled_clip( &c ); + else +#endif + brw_emit_tri_clip( &c ); + break; + case PIPE_PRIM_LINES: + brw_emit_line_clip( &c ); + break; + case PIPE_PRIM_POINTS: + brw_emit_point_clip( &c ); + break; + default: + assert(0); + return; + } + + + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->clip.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, + struct brw_clip_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_CLIP_PROG], + key, sizeof(*key), + &brw->clip.prog_data, + &brw->clip.prog_gs_offset); +} + + + + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_clip_prog(struct brw_context *brw) +{ + struct brw_clip_prog_key key; + + memset(&key, 0, sizeof(key)); + + /* Populate the key: + */ + /* BRW_NEW_REDUCED_PRIMITIVE */ + key.primitive = brw->reduced_primitive; + /* CACHE_NEW_VS_PROG */ + key.attrs = brw->vs.prog_data->outputs_written; + /* BRW_NEW_RASTER */ + key.do_flat_shading = (brw->attribs.Raster->flatshade); + /* BRW_NEW_CLIP */ + key.nr_userclip = brw->attribs.Clip.nr; /* XXX */ + +#if 0 + key.clip_mode = BRW_CLIPMODE_NORMAL; + + if (key.primitive == PIPE_PRIM_TRIANGLES) { + if (brw->attribs.Raster->cull_mode == PIPE_WINDING_BOTH) + key.clip_mode = BRW_CLIPMODE_REJECT_ALL; + else { + if (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || + brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL) + key.do_unfilled = 1; + + /* Most cases the fixed function units will handle. Cases where + * one or more polygon faces are unfilled will require help: + */ + if (key.do_unfilled) { + key.clip_mode = BRW_CLIPMODE_CLIP_NON_REJECTED; + + if (brw->attribs.Raster->offset_cw || + brw->attribs.Raster->offset_ccw) { + key.offset_units = brw->attribs.Raster->offset_units; + key.offset_factor = brw->attribs.Raster->offset_scale; + } + key.fill_ccw = brw->attribs.Raster->fill_ccw; + key.fill_cw = brw->attribs.Raster->fill_cw; + key.offset_ccw = brw->attribs.Raster->offset_ccw; + key.offset_cw = brw->attribs.Raster->offset_cw; + if (brw->attribs.Raster->light_twoside && + key.fill_cw != CLIP_CULL) + key.copy_bfc_cw = 1; + } + } + } +#else + key.clip_mode = BRW_CLIPMODE_ACCEPT_ALL; +#endif + + if (!search_cache(brw, &key)) + compile_clip_prog( brw, &key ); +} + +const struct brw_tracked_state brw_clip_prog = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_CLIP | + BRW_NEW_REDUCED_PRIMITIVE), + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_clip_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_clip.h b/src/gallium/drivers/i965simple/brw_clip.h new file mode 100644 index 0000000000..d70fc094ff --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip.h @@ -0,0 +1,170 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef BRW_CLIP_H +#define BRW_CLIP_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_VERTS (3+6+6) + +/* Note that if unfilled primitives are being emitted, we have to fix + * up polygon offset and flatshading at this point: + */ +struct brw_clip_prog_key { + unsigned attrs:32; + unsigned primitive:4; + unsigned nr_userclip:3; + unsigned do_flat_shading:1; + unsigned do_unfilled:1; + unsigned fill_cw:2; /* includes cull information */ + unsigned fill_ccw:2; /* includes cull information */ + unsigned offset_cw:1; + unsigned offset_ccw:1; + unsigned pad0:17; + + unsigned copy_bfc_cw:1; + unsigned copy_bfc_ccw:1; + unsigned clip_mode:3; + unsigned pad1:27; + + float offset_factor; + float offset_units; +}; + + +#define CLIP_LINE 0 +#define CLIP_POINT 1 +#define CLIP_FILL 2 +#define CLIP_CULL 3 + + +#define PRIM_MASK (0x1f) + +struct brw_clip_compile { + struct brw_compile func; + struct brw_clip_prog_key key; + struct brw_clip_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_VERTS]; + + struct brw_reg t; + struct brw_reg t0, t1; + struct brw_reg dp0, dp1; + + struct brw_reg dpPrev; + struct brw_reg dp; + struct brw_reg loopcount; + struct brw_reg nr_verts; + struct brw_reg planemask; + + struct brw_reg inlist; + struct brw_reg outlist; + struct brw_reg freelist; + + struct brw_reg dir; + struct brw_reg tmp0, tmp1; + struct brw_reg offset; + + struct brw_reg fixed_planes; + struct brw_reg plane_equation; + } reg; + + /* 3 different ways of expressing vertex size: + */ + unsigned nr_attrs; + unsigned nr_regs; + unsigned nr_bytes; + + unsigned first_tmp; + unsigned last_tmp; + + boolean need_direction; + + unsigned last_mrf; + + unsigned header_position_offset; + unsigned offset[PIPE_MAX_ATTRIBS]; +}; + +#define ATTR_SIZE (4*4) + +/* Points are only culled, so no need for a clip routine, however it + * works out easier to have a dummy one. + */ +void brw_emit_unfilled_clip( struct brw_clip_compile *c ); +void brw_emit_tri_clip( struct brw_clip_compile *c ); +void brw_emit_line_clip( struct brw_clip_compile *c ); +void brw_emit_point_clip( struct brw_clip_compile *c ); + +/* brw_clip_tri.c, for use by the unfilled clip routine: + */ +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ); +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ); +void brw_clip_tri( struct brw_clip_compile *c ); +void brw_clip_tri_emit_polygon( struct brw_clip_compile *c ); +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + unsigned nr_verts ); + + +/* Utils: + */ + +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + boolean force_edgeflag ); + +void brw_clip_init_planes( struct brw_clip_compile *c ); + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + boolean allocate, + boolean eot, + unsigned header); + +void brw_clip_kill_thread(struct brw_clip_compile *c); + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ); +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ); + +void brw_clip_copy_colors( struct brw_clip_compile *c, + unsigned to, unsigned from ); + +void brw_clip_init_clipmask( struct brw_clip_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_clip_line.c b/src/gallium/drivers/i965simple/brw_clip_line.c new file mode 100644 index 0000000000..75d9e5fcda --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_line.c @@ -0,0 +1,245 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +static void brw_clip_line_alloc_regs( struct brw_clip_compile *c ) +{ + unsigned i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < 4; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.t0 = brw_vec1_grf(i, 1); + c->reg.t1 = brw_vec1_grf(i, 2); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dp0 = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp1 = brw_vec1_grf(i, 4); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +/* Line clipping, more or less following the following algorithm: + * + * for (p=0;p<MAX_PLANES;p++) { + * if (clipmask & (1 << p)) { + * float dp0 = DOTPROD( vtx0, plane[p] ); + * float dp1 = DOTPROD( vtx1, plane[p] ); + * + * if (IS_NEGATIVE(dp1)) { + * float t = dp1 / (dp1 - dp0); + * if (t > t1) t1 = t; + * } else { + * float t = dp0 / (dp0 - dp1); + * if (t > t0) t0 = t; + * } + * + * if (t0 + t1 >= 1.0) + * return; + * } + * } + * + * interp( ctx, newvtx0, vtx0, vtx1, t0 ); + * interp( ctx, newvtx1, vtx1, vtx0, t1 ); + * + */ +static void clip_and_emit_line( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_indirect vtx0 = brw_indirect(0, 0); + struct brw_indirect vtx1 = brw_indirect(1, 0); + struct brw_indirect newvtx0 = brw_indirect(2, 0); + struct brw_indirect newvtx1 = brw_indirect(3, 0); + struct brw_indirect plane_ptr = brw_indirect(4, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *is_negative; + struct brw_instruction *is_neg2; + struct brw_instruction *not_culled; + struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); + + brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(newvtx0), brw_address(c->reg.vertex[2])); + brw_MOV(p, get_addr_reg(newvtx1), brw_address(c->reg.vertex[3])); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + + /* Note: init t0, t1 together: + */ + brw_MOV(p, vec2(c->reg.t0), brw_imm_f(0)); + + brw_clip_init_planes(c); + brw_clip_init_clipmask(c); + + /* -ve rhw workaround */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, v1_null_ud, c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + +#if 0 + /* dp = DP4(vtx->position, plane) + */ + brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + + /* if (IS_NEGATIVE(dp1)) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); +#else + #warning "disabled" +#endif + is_negative = brw_IF(p, BRW_EXECUTE_1); + { + brw_ADD(p, c->reg.t, c->reg.dp1, negate(c->reg.dp0)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp1); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t1 ); + brw_MOV(p, c->reg.t1, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + is_negative = brw_ELSE(p, is_negative); + { + /* Coming back in. We know that both cannot be negative + * because the line would have been culled in that case. + */ + + /* If both are positive, do nothing */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.dp0, brw_imm_f(0.0)); + is_neg2 = brw_IF(p, BRW_EXECUTE_1); + { + brw_ADD(p, c->reg.t, c->reg.dp0, negate(c->reg.dp1)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp0); + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_G, c->reg.t, c->reg.t0 ); + brw_MOV(p, c->reg.t0, c->reg.t); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_neg2); + } + brw_ENDIF(p, is_negative); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* while (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); + + brw_ADD(p, c->reg.t, c->reg.t0, c->reg.t1); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.t, brw_imm_f(1.0)); + not_culled = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_interp_vertex(c, newvtx0, vtx0, vtx1, c->reg.t0, FALSE); + brw_clip_interp_vertex(c, newvtx1, vtx1, vtx0, c->reg.t1, FALSE); + + brw_clip_emit_vue(c, newvtx0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, newvtx1, 0, 1, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, not_culled); + brw_clip_kill_thread(c); +} + + + +void brw_emit_line_clip( struct brw_clip_compile *c ) +{ + brw_clip_line_alloc_regs(c); + + if (c->key.do_flat_shading) + brw_clip_copy_colors(c, 0, 1); + + clip_and_emit_line(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_point.c b/src/gallium/drivers/i965simple/brw_clip_point.c new file mode 100644 index 0000000000..6fce7210d1 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_point.c @@ -0,0 +1,47 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + +/* Point clipping, nothing to do? + */ +void brw_emit_point_clip( struct brw_clip_compile *c ) +{ + /* Send an empty message to kill the thread: + */ + brw_clip_tri_alloc_regs(c, 0); + brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_state.c b/src/gallium/drivers/i965simple/brw_clip_state.c new file mode 100644 index 0000000000..8e78dd51be --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_state.c @@ -0,0 +1,93 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + +static void upload_clip_unit( struct brw_context *brw ) +{ + struct brw_clip_unit_state clip; + + memset(&clip, 0, sizeof(clip)); + + /* CACHE_NEW_CLIP_PROG */ + clip.thread0.grf_reg_count = + align(brw->clip.prog_data->total_grf, 16) / 16 - 1; + clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; + clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; + clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; + clip.clip5.clip_mode = brw->clip.prog_data->clip_mode; + + /* BRW_NEW_CURBE_OFFSETS */ + clip.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + + /* BRW_NEW_URB_FENCE */ + clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries; + clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + clip.thread4.max_threads = 1; /* 2 threads */ + + if (BRW_DEBUG & DEBUG_STATS) + clip.thread4.stats_enable = 1; + + /* CONSTANT */ + clip.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + clip.thread1.single_program_flow = 1; + clip.thread3.dispatch_grf_start_reg = 1; + clip.thread3.urb_entry_read_offset = 0; + clip.clip5.userclip_enable_flags = 0x7f; + clip.clip5.userclip_must_clip = 1; + clip.clip5.guard_band_enable = 0; + clip.clip5.viewport_z_clip_enable = 1; + clip.clip5.viewport_xy_clip_enable = 1; + clip.clip5.vertex_position_space = BRW_CLIP_NDCSPACE; + clip.clip5.api_mode = BRW_CLIP_API_OGL; + clip.clip6.clipper_viewport_state_ptr = 0; + clip.viewport_xmin = -1; + clip.viewport_xmax = 1; + clip.viewport_ymin = -1; + clip.viewport_ymax = 1; + + brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip ); +} + + +const struct brw_tracked_state brw_clip_unit = { + .dirty = { + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_CLIP_PROG + }, + .update = upload_clip_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_clip_tri.c b/src/gallium/drivers/i965simple/brw_clip_tri.c new file mode 100644 index 0000000000..c5da7b825e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_tri.c @@ -0,0 +1,566 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + +static struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmps( struct brw_clip_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +void brw_clip_tri_alloc_regs( struct brw_clip_compile *c, + unsigned nr_verts ) +{ + unsigned i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + if (c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec4_grf(i, 0); + i += (6 + c->key.nr_userclip + 1) / 2; + + c->prog_data.curb_read_length = (6 + c->key.nr_userclip + 1) / 2; + } + else + c->prog_data.curb_read_length = 0; + + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + if (c->nr_attrs & 1) { + for (j = 0; j < 3; j++) { + unsigned delta = c->nr_attrs*16 + 32; + brw_MOV(&c->func, byte_offset(c->reg.vertex[j], delta), brw_imm_f(0)); + } + } + + c->reg.t = brw_vec1_grf(i, 0); + c->reg.loopcount = retype(brw_vec1_grf(i, 1), BRW_REGISTER_TYPE_UD); + c->reg.nr_verts = retype(brw_vec1_grf(i, 2), BRW_REGISTER_TYPE_UD); + c->reg.planemask = retype(brw_vec1_grf(i, 3), BRW_REGISTER_TYPE_UD); + c->reg.plane_equation = brw_vec4_grf(i, 4); + i++; + + c->reg.dpPrev = brw_vec1_grf(i, 0); /* fixme - dp4 will clobber r.1,2,3 */ + c->reg.dp = brw_vec1_grf(i, 4); + i++; + + c->reg.inlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.outlist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + c->reg.freelist = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, i, 0); + i++; + + if (!c->key.nr_userclip) { + c->reg.fixed_planes = brw_vec8_grf(i, 0); + i++; + } + + if (c->key.do_unfilled) { + c->reg.dir = brw_vec4_grf(i, 0); + c->reg.offset = brw_vec4_grf(i, 4); + i++; + c->reg.tmp0 = brw_vec4_grf(i, 0); + c->reg.tmp1 = brw_vec4_grf(i, 4); + i++; + } + + c->first_tmp = i; + c->last_tmp = i; + + c->prog_data.urb_read_length = c->nr_regs; /* ? */ + c->prog_data.total_grf = i; +} + + + +void brw_clip_tri_init_vertices( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + struct brw_instruction *is_rev; + + /* Initial list of indices for incoming vertexes: + */ + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); + + /* XXX: Is there an easier way to do this? Need to reverse every + * second tristrip element: Can ignore sometimes? + */ + is_rev = brw_IF(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[1]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[0]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(-1)); + } + is_rev = brw_ELSE(p, is_rev); + { + brw_MOV(p, get_element(c->reg.inlist, 0), brw_address(c->reg.vertex[0]) ); + brw_MOV(p, get_element(c->reg.inlist, 1), brw_address(c->reg.vertex[1]) ); + if (c->need_direction) + brw_MOV(p, c->reg.dir, brw_imm_f(1)); + } + brw_ENDIF(p, is_rev); + + brw_MOV(p, get_element(c->reg.inlist, 2), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, brw_vec8_grf(c->reg.outlist.nr, 0), brw_imm_f(0)); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(3)); +} + + + +void brw_clip_tri_flat_shade( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = c->reg.loopcount; /* handy temporary */ + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_copy_colors(c, 1, 0); + brw_clip_copy_colors(c, 2, 0); + } + is_poly = brw_ELSE(p, is_poly); + { + brw_clip_copy_colors(c, 0, 2); + brw_clip_copy_colors(c, 1, 2); + } + brw_ENDIF(p, is_poly); +} + + + +/* Use mesa's clipping algorithms, translated to GEN4 assembly. + */ +void brw_clip_tri( struct brw_clip_compile *c ) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_indirect vtx = brw_indirect(0, 0); + struct brw_indirect vtxPrev = brw_indirect(1, 0); + struct brw_indirect vtxOut = brw_indirect(2, 0); + struct brw_indirect plane_ptr = brw_indirect(3, 0); + struct brw_indirect inlist_ptr = brw_indirect(4, 0); + struct brw_indirect outlist_ptr = brw_indirect(5, 0); + struct brw_indirect freelist_ptr = brw_indirect(6, 0); + struct brw_instruction *plane_loop; + struct brw_instruction *plane_active; + struct brw_instruction *vertex_loop; + struct brw_instruction *next_test; + struct brw_instruction *prev_test; + + brw_MOV(p, get_addr_reg(vtxPrev), brw_address(c->reg.vertex[2]) ); + brw_MOV(p, get_addr_reg(plane_ptr), brw_clip_plane0_address(c)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + + brw_MOV(p, get_addr_reg(freelist_ptr), brw_address(c->reg.vertex[3]) ); + + plane_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* if (planemask & 1) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, vec1(brw_null_reg()), c->reg.planemask, brw_imm_ud(1)); + + plane_active = brw_IF(p, BRW_EXECUTE_1); + { + /* vtxOut = freelist_ptr++ + */ + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(freelist_ptr) ); + brw_ADD(p, get_addr_reg(freelist_ptr), get_addr_reg(freelist_ptr), brw_imm_uw(c->nr_regs * REG_SIZE)); + + if (c->key.nr_userclip) + brw_MOV(p, c->reg.plane_equation, deref_4f(plane_ptr, 0)); + else + brw_MOV(p, c->reg.plane_equation, deref_4b(plane_ptr, 0)); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, c->reg.nr_verts, brw_imm_ud(0)); + + vertex_loop = brw_DO(p, BRW_EXECUTE_1); + { + /* vtx = *input_ptr; + */ + brw_MOV(p, get_addr_reg(vtx), deref_1uw(inlist_ptr, 0)); + + /* IS_NEGATIVE(prev) */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + prev_test = brw_IF(p, BRW_EXECUTE_1); + { + /* IS_POSITIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + + /* Coming back in. + */ + brw_ADD(p, c->reg.t, c->reg.dpPrev, negate(c->reg.dp)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dpPrev); + + /* If (vtxOut == 0) vtxOut = vtxPrev + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtxPrev) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtxPrev, vtx, c->reg.t, FALSE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + + } + prev_test = brw_ELSE(p, prev_test); + { + /* *outlist_ptr++ = vtxPrev; + * nr_verts++; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxPrev)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + + /* IS_NEGATIVE(next) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + next_test = brw_IF(p, BRW_EXECUTE_1); + { + /* Going out of bounds. Avoid division by zero as we + * know dp != dpPrev from DIFFERENT_SIGNS, above. + */ + brw_ADD(p, c->reg.t, c->reg.dp, negate(c->reg.dpPrev)); + brw_math_invert(p, c->reg.t, c->reg.t); + brw_MUL(p, c->reg.t, c->reg.t, c->reg.dp); + + /* If (vtxOut == 0) vtxOut = vtx + */ + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, get_addr_reg(vtxOut), brw_imm_uw(0) ); + brw_MOV(p, get_addr_reg(vtxOut), get_addr_reg(vtx) ); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_clip_interp_vertex(c, vtxOut, vtx, vtxPrev, c->reg.t, TRUE); + + /* *outlist_ptr++ = vtxOut; + * nr_verts++; + * vtxOut = 0; + */ + brw_MOV(p, deref_1uw(outlist_ptr, 0), get_addr_reg(vtxOut)); + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_uw(sizeof(short))); + brw_ADD(p, c->reg.nr_verts, c->reg.nr_verts, brw_imm_ud(1)); + brw_MOV(p, get_addr_reg(vtxOut), brw_imm_uw(0) ); + } + brw_ENDIF(p, next_test); + } + brw_ENDIF(p, prev_test); + + /* vtxPrev = vtx; + * inlist_ptr++; + */ + brw_MOV(p, get_addr_reg(vtxPrev), get_addr_reg(vtx)); + brw_ADD(p, get_addr_reg(inlist_ptr), get_addr_reg(inlist_ptr), brw_imm_uw(sizeof(short))); + + /* while (--loopcount != 0) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, vertex_loop); + + /* vtxPrev = *(outlist_ptr-1) OR: outlist[nr_verts-1] + * inlist = outlist + * inlist_ptr = &inlist[0] + * outlist_ptr = &outlist[0] + */ + brw_ADD(p, get_addr_reg(outlist_ptr), get_addr_reg(outlist_ptr), brw_imm_w(-2)); + brw_MOV(p, get_addr_reg(vtxPrev), deref_1uw(outlist_ptr, 0)); + brw_MOV(p, brw_vec8_grf(c->reg.inlist.nr, 0), brw_vec8_grf(c->reg.outlist.nr, 0)); + brw_MOV(p, get_addr_reg(inlist_ptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(outlist_ptr), brw_address(c->reg.outlist)); + } + brw_ENDIF(p, plane_active); + + /* plane_ptr++; + */ + brw_ADD(p, get_addr_reg(plane_ptr), get_addr_reg(plane_ptr), brw_clip_plane_stride(c)); + + /* nr_verts >= 3 + */ + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + c->reg.nr_verts, + brw_imm_ud(3)); + + /* && (planemask>>=1) != 0 + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(1)); + } + brw_WHILE(p, plane_loop); +#else + #warning "disabled" +#endif +} + + + +void brw_clip_tri_emit_polygon(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop, *if_insn; + + /* for (loopcount = nr_verts-2; loopcount > 0; loopcount--) + */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, + c->reg.loopcount, + c->reg.nr_verts, + brw_imm_d(-2)); + + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect vptr = brw_indirect(1, 0); + + brw_MOV(p, get_addr_reg(vptr), brw_address(c->reg.inlist)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_clip_emit_vue(c, v0, 1, 0, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_START)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_TRIFAN << 2)); + + brw_ADD(p, get_addr_reg(vptr), get_addr_reg(vptr), brw_imm_uw(2)); + brw_MOV(p, get_addr_reg(v0), deref_1uw(vptr, 0)); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + + brw_clip_emit_vue(c, v0, 0, 1, ((_3DPRIM_TRIFAN << 2) | R02_PRIM_END)); + } + brw_ENDIF(p, if_insn); +} + +static void do_clip_tri( struct brw_clip_compile *c ) +{ + brw_clip_init_planes(c); + + brw_clip_tri(c); +} + + +static void maybe_do_clip_tri( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + do_clip_tri(c); + } + brw_ENDIF(p, do_clip); +} + +static void brw_clip_test( struct brw_clip_compile *c ) +{ +#if 0 + struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + + struct brw_reg v0 = get_tmp(c); + struct brw_reg v1 = get_tmp(c); + struct brw_reg v2 = get_tmp(c); + + struct brw_indirect vt0 = brw_indirect(0, 0); + struct brw_indirect vt1 = brw_indirect(1, 0); + struct brw_indirect vt2 = brw_indirect(2, 0); + + struct brw_compile *p = &c->func; + + brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); + brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); + brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); + brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + + /* test nearz, xmin, ymin plane */ + brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + /* test farz, xmax, ymax plane */ + brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_XOR(p, t, t1, t2); + brw_XOR(p, t1, t2, t3); + brw_OR(p, t, t, t1); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 0), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 1), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ, + get_element(t, 2), brw_imm_ud(0)); + brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + release_tmps(c); +#else + #warning "disabled" +#endif +} + + +void brw_emit_tri_clip( struct brw_clip_compile *c ) +{ + struct brw_instruction *neg_rhw; + struct brw_compile *p = &c->func; + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + brw_clip_init_clipmask(c); + + /* if -ve rhw workaround bit is set, + do cliptest */ + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2), + brw_imm_ud(1<<20)); + neg_rhw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_test(c); + } + brw_ENDIF(p, neg_rhw); + + /* Can't push into do_clip_tri because with polygon (or quad) + * flatshading, need to apply the flatshade here because we don't + * respect the PV when converting to trifan for emit: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + if (c->key.clip_mode == BRW_CLIPMODE_NORMAL) + do_clip_tri(c); + else + maybe_do_clip_tri(c); + + brw_clip_tri_emit_polygon(c); + + /* Send an empty message to kill the thread: + */ + brw_clip_kill_thread(c); +} diff --git a/src/gallium/drivers/i965simple/brw_clip_unfilled.c b/src/gallium/drivers/i965simple/brw_clip_unfilled.c new file mode 100644 index 0000000000..b774a76dd6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_unfilled.c @@ -0,0 +1,477 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + +/* This is performed against the original triangles, so no indirection + * required: +BZZZT! + */ +static void compute_tri_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg e = c->reg.tmp0; + struct brw_reg f = c->reg.tmp1; + struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); + + + /* Calculate the vectors of two edges of the triangle: + */ + brw_ADD(p, e, v0, negate(v2)); + brw_ADD(p, f, v1, negate(v2)); + + /* Take their crossproduct: + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, vec4(brw_null_reg()), brw_swizzle(e, 1,2,0,3), brw_swizzle(f,2,0,1,3)); + brw_MAC(p, vec4(e), negate(brw_swizzle(e, 2,0,1,3)), brw_swizzle(f,1,2,0,3)); + brw_set_access_mode(p, BRW_ALIGN_1); + + brw_MUL(p, c->reg.dir, c->reg.dir, vec4(e)); +} + + +static void cull_direction( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + unsigned conditional; + + assert (!(c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL)); + + if (c->key.fill_ccw == CLIP_CULL) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, ccw); +} + + + +static void copy_bfc( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + unsigned conditional; + + /* Do we have any colors to copy? + */ + if (!(c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) && + !(c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1])) + return; + + /* In some wierd degnerate cases we can end up testing the + * direction twice, once for culling and once for bfc copying. Oh + * well, that's what you get for setting wierd GL state. + */ + if (c->key.copy_bfc_ccw) + conditional = BRW_CONDITIONAL_GE; + else + conditional = BRW_CONDITIONAL_L; + + brw_CMP(p, + vec1(brw_null_reg()), + conditional, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + unsigned i; + + for (i = 0; i < 3; i++) { + if (c->offset[VERT_RESULT_COL0] && c->offset[VERT_RESULT_BFC0]) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL0]), + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC0])); + + if (c->offset[VERT_RESULT_COL1] && c->offset[VERT_RESULT_BFC1]) + brw_MOV(p, + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_COL1]), + byte_offset(c->reg.vertex[i], c->offset[VERT_RESULT_BFC1])); + } + } + brw_ENDIF(p, ccw); +} + + + + +/* + float iz = 1.0 / dir.z; + float ac = dir.x * iz; + float bc = dir.y * iz; + offset = ctx->Polygon.OffsetUnits * DEPTH_SCALE; + offset += MAX2( abs(ac), abs(bc) ) * ctx->Polygon.OffsetFactor; + offset *= MRD; +*/ +static void compute_offset( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg off = c->reg.offset; + struct brw_reg dir = c->reg.dir; + + brw_math_invert(p, get_element(off, 2), get_element(dir, 2)); + brw_MUL(p, vec2(off), dir, get_element(off, 2)); + + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + brw_abs(get_element(off, 0)), + brw_abs(get_element(off, 1))); + + brw_SEL(p, vec1(off), brw_abs(get_element(off, 0)), brw_abs(get_element(off, 1))); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_MUL(p, vec1(off), off, brw_imm_f(c->key.offset_factor)); + brw_ADD(p, vec1(off), off, brw_imm_f(c->key.offset_units)); +} + + +static void merge_edgeflags( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *is_poly; + struct brw_reg tmp0 = get_element_ud(c->reg.tmp0, 0); + + brw_AND(p, tmp0, get_element_ud(c->reg.R0, 2), brw_imm_ud(PRIM_MASK)); + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_EQ, + tmp0, + brw_imm_ud(_3DPRIM_POLYGON)); + + /* Get away with using reg.vertex because we know that this is not + * a _3DPRIM_TRISTRIP_REVERSE: + */ + is_poly = brw_IF(p, BRW_EXECUTE_1); + { + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<8)); + brw_MOV(p, byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_EQ); + brw_AND(p, vec1(brw_null_reg()), get_element_ud(c->reg.R0, 2), brw_imm_ud(1<<9)); + brw_MOV(p, byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_EDGE]), brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + brw_ENDIF(p, is_poly); +} + + + +static void apply_one_offset( struct brw_clip_compile *c, + struct brw_indirect vert ) +{ + struct brw_compile *p = &c->func; + struct brw_reg pos = deref_4f(vert, c->offset[VERT_RESULT_HPOS]); + struct brw_reg z = get_element(pos, 2); + + brw_ADD(p, z, z, vec1(c->reg.offset)); +} + + + +/*********************************************************************** + * Output clipped polygon as an unfilled primitive: + */ +static void emit_lines(struct brw_clip_compile *c, + boolean do_offset) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_edge; + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v1 = brw_indirect(1, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + struct brw_indirect v1ptr = brw_indirect(3, 0); + + /* Need a seperate loop for offset: + */ + if (do_offset) { + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + apply_one_offset(c, v0); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_G); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); + } + + /* v1ptr = &inlist[nr_verts] + * *v1ptr = v0 + */ + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v0ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_ADD(p, get_addr_reg(v1ptr), get_addr_reg(v1ptr), retype(c->reg.nr_verts, BRW_REGISTER_TYPE_UW)); + brw_MOV(p, deref_1uw(v1ptr, 0), deref_1uw(v0ptr, 0)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_MOV(p, get_addr_reg(v1), deref_1uw(v0ptr, 2)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw edge if edgeflag != 0 */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset[VERT_RESULT_EDGE]), + brw_imm_f(0)); + draw_edge = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_START); + brw_clip_emit_vue(c, v1, 1, 0, (_3DPRIM_LINESTRIP << 2) | R02_PRIM_END); + } + brw_ENDIF(p, draw_edge); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + +static void emit_points(struct brw_clip_compile *c, + boolean do_offset ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *loop; + struct brw_instruction *draw_point; + + struct brw_indirect v0 = brw_indirect(0, 0); + struct brw_indirect v0ptr = brw_indirect(2, 0); + + brw_MOV(p, c->reg.loopcount, c->reg.nr_verts); + brw_MOV(p, get_addr_reg(v0ptr), brw_address(c->reg.inlist)); + + loop = brw_DO(p, BRW_EXECUTE_1); + { + brw_MOV(p, get_addr_reg(v0), deref_1uw(v0ptr, 0)); + brw_ADD(p, get_addr_reg(v0ptr), get_addr_reg(v0ptr), brw_imm_uw(2)); + + /* draw if edgeflag != 0 + */ + brw_CMP(p, + vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, + deref_1f(v0, c->offset[VERT_RESULT_EDGE]), + brw_imm_f(0)); + draw_point = brw_IF(p, BRW_EXECUTE_1); + { + if (do_offset) + apply_one_offset(c, v0); + + brw_clip_emit_vue(c, v0, 1, 0, (_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END); + } + brw_ENDIF(p, draw_point); + + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + brw_ADD(p, c->reg.loopcount, c->reg.loopcount, brw_imm_d(-1)); + } + brw_WHILE(p, loop); +} + + + + + + + +static void emit_primitives( struct brw_clip_compile *c, + unsigned mode, + boolean do_offset ) +{ + switch (mode) { + case CLIP_FILL: + brw_clip_tri_emit_polygon(c); + break; + + case CLIP_LINE: + emit_lines(c, do_offset); + break; + + case CLIP_POINT: + emit_points(c, do_offset); + break; + + case CLIP_CULL: + assert(0); + break; + } +} + + + +static void emit_unfilled_primitives( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *ccw; + + /* Direction culling has already been done. + */ + if (c->key.fill_ccw != c->key.fill_cw && + c->key.fill_ccw != CLIP_CULL && + c->key.fill_cw != CLIP_CULL) + { + brw_CMP(p, + vec1(brw_null_reg()), + BRW_CONDITIONAL_GE, + get_element(c->reg.dir, 2), + brw_imm_f(0)); + + ccw = brw_IF(p, BRW_EXECUTE_1); + { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } + ccw = brw_ELSE(p, ccw); + { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + brw_ENDIF(p, ccw); + } + else if (c->key.fill_cw != CLIP_CULL) { + emit_primitives(c, c->key.fill_cw, c->key.offset_cw); + } + else if (c->key.fill_ccw != CLIP_CULL) { + emit_primitives(c, c->key.fill_ccw, c->key.offset_ccw); + } +} + + + + +static void check_nr_verts( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_L, c->reg.nr_verts, brw_imm_d(3)); + if_insn = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_kill_thread(c); + } + brw_ENDIF(p, if_insn); +} + + +void brw_emit_unfilled_clip( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *do_clip; + + + c->need_direction = ((c->key.offset_ccw || c->key.offset_cw) || + (c->key.fill_ccw != c->key.fill_cw) || + c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL || + c->key.copy_bfc_cw || + c->key.copy_bfc_ccw); + + brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6); + brw_clip_tri_init_vertices(c); + + assert(c->offset[VERT_RESULT_EDGE]); + + if (c->key.fill_ccw == CLIP_CULL && + c->key.fill_cw == CLIP_CULL) { + brw_clip_kill_thread(c); + return; + } + + merge_edgeflags(c); + + /* Need to use the inlist indirection here: + */ + if (c->need_direction) + compute_tri_direction(c); + + if (c->key.fill_ccw == CLIP_CULL || + c->key.fill_cw == CLIP_CULL) + cull_direction(c); + + if (c->key.offset_ccw || + c->key.offset_cw) + compute_offset(c); + + if (c->key.copy_bfc_ccw || + c->key.copy_bfc_cw) + copy_bfc(c); + + /* Need to do this whether we clip or not: + */ + if (c->key.do_flat_shading) + brw_clip_tri_flat_shade(c); + + brw_clip_init_clipmask(c); + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_NZ, c->reg.planemask, brw_imm_ud(0)); + do_clip = brw_IF(p, BRW_EXECUTE_1); + { + brw_clip_init_planes(c); + brw_clip_tri(c); + check_nr_verts(c); + } + brw_ENDIF(p, do_clip); + + emit_unfilled_primitives(c); + brw_clip_kill_thread(c); +} + + + diff --git a/src/gallium/drivers/i965simple/brw_clip_util.c b/src/gallium/drivers/i965simple/brw_clip_util.c new file mode 100644 index 0000000000..6d58ceafff --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_clip_util.c @@ -0,0 +1,351 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_clip.h" + + + + + +static struct brw_reg get_tmp( struct brw_clip_compile *c ) +{ + struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_clip_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + + +static struct brw_reg make_plane_ud(unsigned x, unsigned y, unsigned z, unsigned w) +{ + return brw_imm_ud((w<<24) | (z<<16) | (y<<8) | x); +} + + +void brw_clip_init_planes( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + + if (!c->key.nr_userclip) { + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 0), make_plane_ud( 0, 0, 0xff, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 1), make_plane_ud( 0, 0, 1, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 2), make_plane_ud( 0, 0xff, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 3), make_plane_ud( 0, 1, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 4), make_plane_ud(0xff, 0, 0, 1)); + brw_MOV(p, get_element_ud(c->reg.fixed_planes, 5), make_plane_ud( 1, 0, 0, 1)); + } +} + + + +#define W 3 + +/* Project 'pos' to screen space (or back again), overwrite with results: + */ +static void brw_clip_project_position(struct brw_clip_compile *c, struct brw_reg pos ) +{ + struct brw_compile *p = &c->func; + + /* calc rhw + */ + brw_math_invert(p, get_element(pos, W), get_element(pos, W)); + + /* value.xyz *= value.rhw + */ + brw_set_access_mode(p, BRW_ALIGN_16); + brw_MUL(p, brw_writemask(pos, TGSI_WRITEMASK_XYZ), pos, brw_swizzle1(pos, W)); + brw_set_access_mode(p, BRW_ALIGN_1); +} + + +static void brw_clip_project_vertex( struct brw_clip_compile *c, + struct brw_indirect vert_addr ) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + + /* Fixup position. Extract from the original vertex and re-project + * to screen space: + */ + brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS])); + brw_clip_project_position(c, tmp); + brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp); + + release_tmp(c, tmp); +#else + #warning "disabled" +#endif +} + + + + +/* Interpolate between two vertices and put the result into a0.0. + * Increment a0.0 accordingly. + */ +void brw_clip_interp_vertex( struct brw_clip_compile *c, + struct brw_indirect dest_ptr, + struct brw_indirect v0_ptr, /* from */ + struct brw_indirect v1_ptr, /* to */ + struct brw_reg t0, + boolean force_edgeflag) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg tmp = get_tmp(c); + unsigned i; + + /* Just copy the vertex header: + */ + brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1); + + /* Iterate over each attribute (could be done in pairs?) + */ + for (i = 0; i < c->nr_attrs; i++) { + unsigned delta = i*16 + 32; + + if (delta == c->offset[VERT_RESULT_EDGE]) { + if (force_edgeflag) + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1)); + else + brw_MOV(p, deref_4f(dest_ptr, delta), deref_4f(v0_ptr, delta)); + } + else { + /* Interpolate: + * + * New = attr0 + t*attr1 - t*attr0 + */ + brw_MUL(p, + vec4(brw_null_reg()), + deref_4f(v1_ptr, delta), + t0); + + brw_MAC(p, + tmp, + negate(deref_4f(v0_ptr, delta)), + t0); + + brw_ADD(p, + deref_4f(dest_ptr, delta), + deref_4f(v0_ptr, delta), + tmp); + } + } + + if (i & 1) { + unsigned delta = i*16 + 32; + brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(0)); + } + + release_tmp(c, tmp); + + /* Recreate the projected (NDC) coordinate in the new vertex + * header: + */ + brw_clip_project_vertex(c, dest_ptr ); +#else + #warning "disabled" +#endif +} + + + + +#define MAX_MRF 16 + +void brw_clip_emit_vue(struct brw_clip_compile *c, + struct brw_indirect vert, + boolean allocate, + boolean eot, + unsigned header) +{ + struct brw_compile *p = &c->func; + unsigned start = c->last_mrf; + + assert(!(allocate && eot)); + + /* Cycle through mrf regs - probably futile as we have to wait for + * the allocation response anyway. Also, the order this function + * is invoked doesn't correspond to the order the instructions will + * be executed, so it won't have any effect in many cases. + */ +#if 0 + if (start + c->nr_regs + 1 >= MAX_MRF) + start = 0; + + c->last_mrf = start + c->nr_regs + 1; +#endif + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy_from_indirect(p, brw_message_reg(start+1), vert, c->nr_regs); + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + + /* Send each vertex as a seperate write to the urb. This + * is different to the concept in brw_sf_emit.c, where + * subsequent writes are used to build up a single urb + * entry. Each of these writes instantiates a seperate + * urb entry - (I think... what about 'allocate'?) + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + start, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response_length */ + eot, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_clip_kill_thread(struct brw_clip_compile *c) +{ + struct brw_compile *p = &c->func; + + /* Send an empty message to kill the thread and release any + * allocated urb entry: + */ + brw_urb_WRITE(p, + retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + 0, /* allocate */ + 0, /* used */ + 0, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, + BRW_URB_SWIZZLE_NONE); +} + + + + +struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c ) +{ + return brw_address(c->reg.fixed_planes); +} + + +struct brw_reg brw_clip_plane_stride( struct brw_clip_compile *c ) +{ + if (c->key.nr_userclip) { + return brw_imm_uw(16); + } + else { + return brw_imm_uw(4); + } +} + + +/* If flatshading, distribute color from provoking vertex prior to + * clipping. + */ +void brw_clip_copy_colors( struct brw_clip_compile *c, + unsigned to, unsigned from ) +{ +#if 0 + struct brw_compile *p = &c->func; + + if (c->offset[VERT_RESULT_COL0]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL0]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL0])); + + if (c->offset[VERT_RESULT_COL1]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_COL1]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_COL1])); + + if (c->offset[VERT_RESULT_BFC0]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC0]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC0])); + + if (c->offset[VERT_RESULT_BFC1]) + brw_MOV(p, + byte_offset(c->reg.vertex[to], c->offset[VERT_RESULT_BFC1]), + byte_offset(c->reg.vertex[from], c->offset[VERT_RESULT_BFC1])); +#else + #warning "disabled" +#endif +} + + + +void brw_clip_init_clipmask( struct brw_clip_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_reg incoming = get_element_ud(c->reg.R0, 2); + + /* Shift so that lowest outcode bit is rightmost: + */ + brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26)); + + if (c->key.nr_userclip) { + struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD); + + /* Rearrange userclip outcodes so that they come directly after + * the fixed plane bits. + */ + brw_AND(p, tmp, incoming, brw_imm_ud(0x3f<<14)); + brw_SHR(p, tmp, tmp, brw_imm_ud(8)); + brw_OR(p, c->reg.planemask, c->reg.planemask, tmp); + + release_tmp(c, tmp); + } +} + diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c new file mode 100644 index 0000000000..96920df008 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -0,0 +1,114 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_draw.h" +#include "brw_vs.h" +#include "brw_tex_layout.h" +#include "brw_winsys.h" + +#include "pipe/p_winsys.h" +#include "pipe/p_context.h" +#include "util/u_memory.h" +#include "pipe/p_screen.h" + + +#ifndef BRW_DEBUG +int BRW_DEBUG = (0); +#endif + + +static void brw_destroy(struct pipe_context *pipe) +{ + struct brw_context *brw = brw_context(pipe); + + if(brw->winsys->destroy) + brw->winsys->destroy(brw->winsys); + + FREE(brw); +} + + +static void brw_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + int x, y, w, h; + /* FIXME: corny... */ + + x = 0; + y = 0; + w = ps->width; + h = ps->height; + + pipe->surface_fill(pipe, ps, x, y, w, h, clearValue); +} + + +struct pipe_context *brw_create(struct pipe_screen *screen, + struct brw_winsys *brw_winsys, + unsigned pci_id) +{ + struct brw_context *brw; + + debug_printf("%s: creating brw_context with pci id 0x%x\n", + __FUNCTION__, pci_id); + + brw = CALLOC_STRUCT(brw_context); + if (brw == NULL) + return NULL; + + brw->winsys = brw_winsys; + brw->pipe.winsys = screen->winsys; + brw->pipe.screen = screen; + + brw->pipe.destroy = brw_destroy; + brw->pipe.clear = brw_clear; + + brw_init_surface_functions(brw); + brw_init_texture_functions(brw); + brw_init_state_functions(brw); + brw_init_flush_functions(brw); + brw_init_draw_functions( brw ); + + + brw_init_state( brw ); + + brw->pci_id = pci_id; + brw->dirty = ~0; + brw->hardware_dirty = ~0; + + memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind)); + + return &brw->pipe; +} + diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h new file mode 100644 index 0000000000..3079485180 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -0,0 +1,684 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRWCONTEXT_INC +#define BRWCONTEXT_INC + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "tgsi/tgsi_scan.h" + +#include "brw_structs.h" +#include "brw_winsys.h" + + +/* Glossary: + * + * URB - uniform resource buffer. A mid-sized buffer which is + * partitioned between the fixed function units and used for passing + * values (vertices, primitives, constants) between them. + * + * CURBE - constant URB entry. An urb region (entry) used to hold + * constant values which the fixed function units can be instructed to + * preload into the GRF when spawining a thread. + * + * VUE - vertex URB entry. An urb entry holding a vertex and usually + * a vertex header. The header contains control information and + * things like primitive type, Begin/end flags and clip codes. + * + * PUE - primitive URB entry. An urb entry produced by the setup (SF) + * unit holding rasterization and interpolation parameters. + * + * GRF - general register file. One of several register files + * addressable by programmed threads. The inputs (r0, payload, curbe, + * urb) of the thread are preloaded to this area before the thread is + * spawned. The registers are individually 8 dwords wide and suitable + * for general usage. Registers holding thread input values are not + * special and may be overwritten. + * + * MRF - message register file. Threads communicate (and terminate) + * by sending messages. Message parameters are placed in contigous + * MRF registers. All program output is via these messages. URB + * entries are populated by sending a message to the shared URB + * function containing the new data, together with a control word, + * often an unmodified copy of R0. + * + * R0 - GRF register 0. Typically holds control information used when + * sending messages to other threads. + * + * EU or GEN4 EU: The name of the programmable subsystem of the + * i965 hardware. Threads are executed by the EU, the registers + * described above are part of the EU architecture. + * + * Fixed function units: + * + * CS - Command streamer. Notional first unit, little software + * interaction. Holds the URB entries used for constant data, ie the + * CURBEs. + * + * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of + * this unit is responsible for pulling vertices out of vertex buffers + * in vram and injecting them into the processing pipe as VUEs. If + * enabled, it first passes them to a VS thread which is a good place + * for the driver to implement any active vertex shader. + * + * GS - Geometry Shader. This corresponds to a new DX10 concept. If + * enabled, incoming strips etc are passed to GS threads in individual + * line/triangle/point units. The GS thread may perform arbitary + * computation and emit whatever primtives with whatever vertices it + * chooses. This makes GS an excellent place to implement GL's + * unfilled polygon modes, though of course it is capable of much + * more. Additionally, GS is used to translate away primitives not + * handled by latter units, including Quads and Lineloops. + * + * CS - Clipper. Mesa's clipping algorithms are imported to run on + * this unit. The fixed function part performs cliptesting against + * the 6 fixed clipplanes and makes descisions on whether or not the + * incoming primitive needs to be passed to a thread for clipping. + * User clip planes are handled via cooperation with the VS thread. + * + * SF - Strips Fans or Setup: Triangles are prepared for + * rasterization. Interpolation coefficients are calculated. + * Flatshading and two-side lighting usually performed here. + * + * WM - Windower. Interpolation of vertex attributes performed here. + * Fragment shader implemented here. SIMD aspects of EU taken full + * advantage of, as pixels are processed in blocks of 16. + * + * CC - Color Calculator. No EU threads associated with this unit. + * Handles blending and (presumably) depth and stencil testing. + */ + +#define BRW_MAX_CURBE (32*16) + +struct brw_context; +struct brw_winsys; + + +/* Raised when we receive new state across the pipe interface: + */ +#define BRW_NEW_VIEWPORT 0x1 +#define BRW_NEW_RASTERIZER 0x2 +#define BRW_NEW_FS 0x4 +#define BRW_NEW_BLEND 0x8 +#define BRW_NEW_CLIP 0x10 +#define BRW_NEW_SCISSOR 0x20 +#define BRW_NEW_STIPPLE 0x40 +#define BRW_NEW_FRAMEBUFFER 0x80 +#define BRW_NEW_ALPHA_TEST 0x100 +#define BRW_NEW_DEPTH_STENCIL 0x200 +#define BRW_NEW_SAMPLER 0x400 +#define BRW_NEW_TEXTURE 0x800 +#define BRW_NEW_CONSTANTS 0x1000 +#define BRW_NEW_VBO 0x2000 +#define BRW_NEW_VS 0x4000 + +/* Raised for other internal events: + */ +#define BRW_NEW_URB_FENCE 0x10000 +#define BRW_NEW_PSP 0x20000 +#define BRW_NEW_CURBE_OFFSETS 0x40000 +#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 +#define BRW_NEW_PRIMITIVE 0x100000 +#define BRW_NEW_SCENE 0x200000 +#define BRW_NEW_SF_LINKAGE 0x400000 + +extern int BRW_DEBUG; + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_PRIMS 0x8 +#define DEBUG_VERTS 0x10 +#define DEBUG_FALLBACKS 0x20 +#define DEBUG_VERBOSE 0x40 +#define DEBUG_DRI 0x80 +#define DEBUG_DMA 0x100 +#define DEBUG_SANITY 0x200 +#define DEBUG_SYNC 0x400 +#define DEBUG_SLEEP 0x800 +#define DEBUG_PIXEL 0x1000 +#define DEBUG_STATS 0x2000 +#define DEBUG_TILE 0x4000 +#define DEBUG_SINGLE_THREAD 0x8000 +#define DEBUG_WM 0x10000 +#define DEBUG_URB 0x20000 +#define DEBUG_VS 0x40000 +#define DEBUG_BATCH 0x80000 +#define DEBUG_BUFMGR 0x100000 +#define DEBUG_BLIT 0x200000 +#define DEBUG_REGION 0x400000 +#define DEBUG_MIPTREE 0x800000 + +#define DBG(...) do { \ + if (BRW_DEBUG & FILE_DEBUG_FLAG) \ + debug_printf(__VA_ARGS__); \ +} while(0) + +#define PRINT(...) do { \ + debug_printf(__VA_ARGS__); \ +} while(0) + +struct brw_state_flags { + unsigned cache; + unsigned brw; +}; + + +struct brw_vertex_program { + struct pipe_shader_state program; + struct tgsi_shader_info info; + int id; +}; + + +struct brw_fragment_program { + struct pipe_shader_state program; + struct tgsi_shader_info info; + + boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ + int id; +}; + + +struct pipe_setup_linkage { + struct { + unsigned vp_output:5; + unsigned interp_mode:4; + unsigned bf_vp_output:5; + } fp_input[PIPE_MAX_SHADER_INPUTS]; + + unsigned fp_input_count:5; + unsigned max_vp_output:5; +}; + + + +struct brw_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned stride; + unsigned depth_pitch; /* per-image on i945? */ + unsigned total_nblocksy; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* Includes image offset tables: + */ + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ + +struct brw_wm_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + + unsigned first_curbe_grf; + unsigned total_grf; + unsigned total_scratch; + + /* Internally generated constants for the CURBE. These are loaded + * ahead of the data from the constant buffer. + */ + const float internal_const[8]; + unsigned nr_internal_consts; + unsigned max_const; + + boolean error; +}; + +struct brw_sf_prog_data { + unsigned urb_read_length; + unsigned total_grf; + + /* Each vertex may have upto 12 attributes, 4 components each, + * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 + * rows. + * + * Actually we use 4 for each, so call it 12 rows. + */ + unsigned urb_entry_size; +}; + +struct brw_clip_prog_data { + unsigned curb_read_length; /* user planes? */ + unsigned clip_mode; + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_gs_prog_data { + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_vs_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + unsigned total_grf; + unsigned outputs_written; + + unsigned inputs_read; + + unsigned max_const; + + float imm_buf[PIPE_MAX_CONSTANT][4]; + unsigned num_imm; + unsigned num_consts; + + /* Used for calculating urb partitions: + */ + unsigned urb_entry_size; +}; + + +#define BRW_MAX_TEX_UNIT 8 +#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 + +/* Create a fixed sized struct for caching binding tables: + */ +struct brw_surface_binding_table { + unsigned surf_ss_offset[BRW_WM_MAX_SURF]; +}; + + +struct brw_cache; + +struct brw_mem_pool { + struct pipe_buffer *buffer; + + unsigned size; + unsigned offset; /* offset of first free byte */ + + struct brw_context *brw; +}; + +struct brw_cache_item { + unsigned hash; + unsigned key_size; /* for variable-sized keys */ + const void *key; + + unsigned offset; /* offset within pool's buffer */ + unsigned data_size; + + struct brw_cache_item *next; +}; + + + +struct brw_cache { + unsigned id; + + const char *name; + + struct brw_context *brw; + struct brw_mem_pool *pool; + + struct brw_cache_item **items; + unsigned size, n_items; + + unsigned key_size; /* for fixed-size keys */ + unsigned aux_size; + + unsigned last_addr; /* offset of active item */ +}; + + + + +/* Considered adding a member to this struct to document which flags + * an update might raise so that ordering of the state atoms can be + * checked or derived at runtime. Dropped the idea in favor of having + * a debug mode where the state is monitored for flags which are + * raised that have already been tested against. + */ +struct brw_tracked_state { + struct brw_state_flags dirty; + void (*update)( struct brw_context *brw ); +}; + + +/* Flags for brw->state.cache. + */ +#define CACHE_NEW_CC_VP (1<<BRW_CC_VP) +#define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) +#define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) +#define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) +#define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) +#define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) +#define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) +#define CACHE_NEW_SF_VP (1<<BRW_SF_VP) +#define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) +#define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) +#define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) +#define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) +#define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) +#define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) +#define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) +#define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) +#define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) +#define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) + + + + +enum brw_mempool_id { + BRW_GS_POOL, + BRW_SS_POOL, + BRW_MAX_POOL +}; + + +struct brw_cached_batch_item { + struct header *header; + unsigned sz; + struct brw_cached_batch_item *next; +}; + + + +/* Protect against a future where PIPE_MAX_ATTRIBS > 32. Wouldn't life + * be easier if C allowed arrays of packed elements? + */ +#define ATTRIB_BIT_DWORDS ((PIPE_MAX_ATTRIBS+31)/32) + + + + +struct brw_vertex_info { + unsigned varying; /* varying:1[PIPE_MAX_ATTRIBS] */ + unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_MAX_ATTRIBS] */ +}; + + + + + +struct brw_context +{ + struct pipe_context pipe; + struct brw_winsys *winsys; + + unsigned primitive; + unsigned reduced_primitive; + + boolean emit_state_always; + + struct { + struct brw_state_flags dirty; + } state; + + + struct { + const struct pipe_blend_state *Blend; + const struct pipe_depth_stencil_alpha_state *DepthStencil; + const struct pipe_poly_stipple *PolygonStipple; + const struct pipe_rasterizer_state *Raster; + const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; + const struct brw_vertex_program *VertexProgram; + const struct brw_fragment_program *FragmentProgram; + + struct pipe_clip_state Clip; + struct pipe_blend_color BlendColor; + struct pipe_scissor_state Scissor; + struct pipe_viewport_state Viewport; + struct pipe_framebuffer_state FrameBuffer; + + const struct pipe_constant_buffer *Constants[2]; + const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; + } attribs; + + unsigned num_samplers; + unsigned num_textures; + + struct brw_mem_pool pool[BRW_MAX_POOL]; + struct brw_cache cache[BRW_MAX_CACHE]; + struct brw_cached_batch_item *cached_batch_items; + + struct { + + /* Arrays with buffer objects to copy non-bufferobj arrays into + * for upload: + */ + const struct pipe_vertex_buffer *vbo_array[PIPE_MAX_ATTRIBS]; + + struct brw_vertex_element_state inputs[PIPE_MAX_ATTRIBS]; + +#define BRW_NR_UPLOAD_BUFS 17 +#define BRW_UPLOAD_INIT_SIZE (128*1024) + + /* Summary of size and varying of active arrays, so we can check + * for changes to this state: + */ + struct brw_vertex_info info; + } vb; + + + unsigned hardware_dirty; + unsigned dirty; + unsigned pci_id; + /* BRW_NEW_URB_ALLOCATIONS: + */ + struct { + unsigned vsize; /* vertex size plus header in urb registers */ + unsigned csize; /* constant buffer size in urb registers */ + unsigned sfsize; /* setup data size in urb registers */ + + boolean constrained; + + unsigned nr_vs_entries; + unsigned nr_gs_entries; + unsigned nr_clip_entries; + unsigned nr_sf_entries; + unsigned nr_cs_entries; + +/* unsigned vs_size; */ +/* unsigned gs_size; */ +/* unsigned clip_size; */ +/* unsigned sf_size; */ +/* unsigned cs_size; */ + + unsigned vs_start; + unsigned gs_start; + unsigned clip_start; + unsigned sf_start; + unsigned cs_start; + } urb; + + + /* BRW_NEW_CURBE_OFFSETS: + */ + struct { + unsigned wm_start; + unsigned wm_size; + unsigned clip_start; + unsigned clip_size; + unsigned vs_start; + unsigned vs_size; + unsigned total_size; + + unsigned gs_offset; + + float *last_buf; + unsigned last_bufsz; + } curbe; + + struct { + struct brw_vs_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } vs; + + struct { + struct brw_gs_prog_data *prog_data; + + boolean prog_active; + unsigned prog_gs_offset; + unsigned state_gs_offset; + } gs; + + struct { + struct brw_clip_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } clip; + + + struct { + struct brw_sf_prog_data *prog_data; + + struct pipe_setup_linkage linkage; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } sf; + + struct { + struct brw_wm_prog_data *prog_data; + +// struct brw_wm_compiler *compile_data; + + + /** + * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER + * cache + */ + struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + + unsigned render_surf; + unsigned nr_surfaces; + + unsigned max_threads; + struct pipe_buffer *scratch_buffer; + unsigned scratch_buffer_size; + + unsigned sampler_count; + unsigned sampler_gs_offset; + + struct brw_surface_binding_table bind; + unsigned bind_ss_offset; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } wm; + + + struct { + unsigned vp_gs_offset; + unsigned state_gs_offset; + } cc; + + + /* Used to give every program string a unique id + */ + unsigned program_id; +}; + + +#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) + + +/*====================================================================== + * brw_vtbl.c + */ +void brw_do_flush( struct brw_context *brw, + unsigned flags ); + + +/*====================================================================== + * brw_state.c + */ +void brw_validate_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + + +/*====================================================================== + * brw_tex.c + */ +void brwUpdateTextureState( struct brw_context *brw ); + + +/* brw_urb.c + */ +void brw_upload_urb_fence(struct brw_context *brw); + +void brw_upload_constant_buffer_state(struct brw_context *brw); + +void brw_init_surface_functions(struct brw_context *brw); +void brw_init_state_functions(struct brw_context *brw); +void brw_init_flush_functions(struct brw_context *brw); +void brw_init_string_functions(struct brw_context *brw); + +/*====================================================================== + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static inline struct brw_context * +brw_context( struct pipe_context *ctx ) +{ + return (struct brw_context *)ctx; +} + +#endif + diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c new file mode 100644 index 0000000000..824ee7fd6d --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_curbe.c @@ -0,0 +1,369 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_batch.h" +#include "brw_util.h" +#include "brw_wm.h" +#include "pipe/p_state.h" +#include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#define FILE_DEBUG_FLAG DEBUG_FALLBACKS + +/* Partition the CURBE between the various users of constant values: + */ +static void calculate_curbe_offsets( struct brw_context *brw ) +{ + /* CACHE_NEW_WM_PROG */ + unsigned nr_fp_regs = align(brw->wm.prog_data->max_const, 16); + + /* BRW_NEW_VERTEX_PROGRAM */ + unsigned nr_vp_regs = align(brw->vs.prog_data->max_const, 16); + unsigned nr_clip_regs = 0; + unsigned total_regs; + +#if 0 + /* BRW_NEW_CLIP ? */ + if (brw->attribs.Transform->ClipPlanesEnabled) { + unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled); + nr_clip_regs = align(nr_planes * 4, 16); + } +#endif + + + total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; + + /* This can happen - what to do? Probably rather than falling + * back, the best thing to do is emit programs which code the + * constants as immediate values. Could do this either as a static + * cap on WM and VS, or adaptively. + * + * Unfortunately, this is currently dependent on the results of the + * program generation process (in the case of wm), so this would + * introduce the need to re-generate programs in the event of a + * curbe allocation failure. + */ + /* Max size is 32 - just large enough to + * hold the 128 parameters allowed by + * the fragment and vertex program + * api's. It's not clear what happens + * when both VP and FP want to use 128 + * parameters, though. + */ + assert(total_regs <= 32); + + /* Lazy resize: + */ + if (nr_fp_regs > brw->curbe.wm_size || + nr_vp_regs > brw->curbe.vs_size || + nr_clip_regs != brw->curbe.clip_size || + (total_regs < brw->curbe.total_size / 4 && + brw->curbe.total_size > 16)) { + + unsigned reg = 0; + + /* Calculate a new layout: + */ + reg = 0; + brw->curbe.wm_start = reg; + brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; + brw->curbe.clip_start = reg; + brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; + brw->curbe.vs_start = reg; + brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; + brw->curbe.total_size = reg; + +#if 0 + if (0) + DBG("curbe wm %d+%d clip %d+%d vs %d+%d\n", + brw->curbe.wm_start, + brw->curbe.wm_size, + brw->curbe.clip_start, + brw->curbe.clip_size, + brw->curbe.vs_start, + brw->curbe.vs_size ); +#endif + + brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; + } +} + + +const struct brw_tracked_state brw_curbe_offsets = { + .dirty = { + .brw = (BRW_NEW_CLIP | + BRW_NEW_VS), + .cache = CACHE_NEW_WM_PROG + }, + .update = calculate_curbe_offsets +}; + + + +/* Define the number of curbes within CS's urb allocation. Multiple + * urb entries -> multiple curbes. These will be used by + * fixed-function hardware in a double-buffering scheme to avoid a + * pipeline stall each time the contents of the curbe is changed. + */ +void brw_upload_constant_buffer_state(struct brw_context *brw) +{ + struct brw_constant_buffer_state cbs; + memset(&cbs, 0, sizeof(cbs)); + + /* It appears that this is the state packet for the CS unit, ie. the + * urb entries detailed here are housed in the CS range from the + * URB_FENCE command. + */ + cbs.header.opcode = CMD_CONST_BUFFER_STATE; + cbs.header.length = sizeof(cbs)/4 - 2; + + /* BRW_NEW_URB_FENCE */ + cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries; + cbs.bits0.urb_entry_size = brw->urb.csize - 1; + + assert(brw->urb.nr_cs_entries); + BRW_CACHED_BATCH_STRUCT(brw, &cbs); +} + + +static float fixed_plane[6][4] = { + { 0, 0, -1, 1 }, + { 0, 0, 1, 1 }, + { 0, -1, 0, 1 }, + { 0, 1, 0, 1 }, + {-1, 0, 0, 1 }, + { 1, 0, 0, 1 } +}; + +/* Upload a new set of constants. Too much variability to go into the + * cache mechanism, but maybe would benefit from a comparison against + * the current uploaded set of constants. + */ +static void upload_constant_buffer(struct brw_context *brw) +{ + struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; + unsigned sz = brw->curbe.total_size; + unsigned bufsz = sz * sizeof(float); + float *buf; + unsigned i; + + + if (sz == 0) { + struct brw_constant_buffer cb; + cb.header.opcode = CMD_CONST_BUFFER; + cb.header.length = sizeof(cb)/4 - 2; + cb.header.valid = 0; + cb.bits0.buffer_length = 0; + cb.bits0.buffer_address = 0; + BRW_BATCH_STRUCT(brw, &cb); + + if (brw->curbe.last_buf) { + free(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + brw->curbe.last_bufsz = 0; + } + + return; + } + + buf = (float *)malloc(bufsz); + + memset(buf, 0, bufsz); + + if (brw->curbe.wm_size) { + unsigned offset = brw->curbe.wm_start * 16; + + /* First the constant buffer constants: + */ + + /* Then any internally generated constants: + */ + for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++) + buf[offset + i] = brw->wm.prog_data->internal_const[i]; + + assert(brw->wm.prog_data->max_const == + brw->wm.prog_data->nr_internal_consts); + } + + + /* The clipplanes are actually delivered to both CLIP and VS units. + * VS uses them to calculate the outcode bitmasks. + */ + if (brw->curbe.clip_size) { + unsigned offset = brw->curbe.clip_start * 16; + unsigned j; + + /* If any planes are going this way, send them all this way: + */ + for (i = 0; i < 6; i++) { + buf[offset + i * 4 + 0] = fixed_plane[i][0]; + buf[offset + i * 4 + 1] = fixed_plane[i][1]; + buf[offset + i * 4 + 2] = fixed_plane[i][2]; + buf[offset + i * 4 + 3] = fixed_plane[i][3]; + } + + /* Clip planes: BRW_NEW_CLIP: + */ + for (j = 0; j < brw->attribs.Clip.nr; j++) { + buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0]; + buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1]; + buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2]; + buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3]; + i++; + } + } + + + if (brw->curbe.vs_size) { + unsigned offset = brw->curbe.vs_start * 16; + /*unsigned nr = vp->max_const;*/ + const struct pipe_constant_buffer *cbuffer = brw->attribs.Constants[0]; + struct pipe_winsys *ws = brw->pipe.winsys; + /* FIXME: buffer size is num_consts + num_immediates */ + if (brw->vs.prog_data->num_consts) { + /* map the vertex constant buffer and copy to curbe: */ + void *data = ws->buffer_map(ws, cbuffer->buffer, 0); + /* FIXME: this is wrong. the cbuffer->size currently + * represents size of consts + immediates. so if we'll + * have both we'll copy over the end of the buffer + * with the subsequent memcpy */ + memcpy(&buf[offset], data, cbuffer->size); + ws->buffer_unmap(ws, cbuffer->buffer); + offset += cbuffer->size; + } + /*immediates*/ + if (brw->vs.prog_data->num_imm) { + memcpy(&buf[offset], brw->vs.prog_data->imm_buf, + brw->vs.prog_data->num_imm * 4 * sizeof(float)); + } + } + + if (1) { + for (i = 0; i < sz; i+=4) + debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, + buf[i+0], buf[i+1], buf[i+2], buf[i+3]); + + debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n", + brw->curbe.last_buf, buf, + bufsz, brw->curbe.last_bufsz, + brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); + } + + if (brw->curbe.last_buf && + bufsz == brw->curbe.last_bufsz && + memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { + free(buf); +/* return; */ + } + else { + if (brw->curbe.last_buf) + free(brw->curbe.last_buf); + brw->curbe.last_buf = buf; + brw->curbe.last_bufsz = bufsz; + + + if (!brw_pool_alloc(pool, + bufsz, + 1 << 6, + &brw->curbe.gs_offset)) { + debug_printf("out of GS memory for curbe\n"); + assert(0); + return; + } + + + /* Copy data to the buffer: + */ + brw->winsys->buffer_subdata_typed(brw->winsys, + pool->buffer, + brw->curbe.gs_offset, + bufsz, + buf, + BRW_CONSTANT_BUFFER ); + } + + /* TODO: only emit the constant_buffer packet when necessary, ie: + - contents have changed + - offset has changed + - hw requirements due to other packets emitted. + */ + { + struct brw_constant_buffer cb; + + memset(&cb, 0, sizeof(cb)); + + cb.header.opcode = CMD_CONST_BUFFER; + cb.header.length = sizeof(cb)/4 - 2; + cb.header.valid = 1; + cb.bits0.buffer_length = sz - 1; + cb.bits0.buffer_address = brw->curbe.gs_offset >> 6; + + /* Because this provokes an action (ie copy the constants into the + * URB), it shouldn't be shortcircuited if identical to the + * previous time - because eg. the urb destination may have + * changed, or the urb contents different to last time. + * + * Note that the data referred to is actually copied internally, + * not just used in place according to passed pointer. + * + * It appears that the CS unit takes care of using each available + * URB entry (Const URB Entry == CURBE) in turn, and issuing + * flushes as necessary when doublebuffering of CURBEs isn't + * possible. + */ + BRW_BATCH_STRUCT(brw, &cb); + } +} + +/* This tracked state is unique in that the state it monitors varies + * dynamically depending on the parameters tracked by the fragment and + * vertex programs. This is the template used as a starting point, + * each context will maintain a copy of this internally and update as + * required. + */ +const struct brw_tracked_state brw_constant_buffer = { + .dirty = { + .brw = (BRW_NEW_CLIP | + BRW_NEW_CONSTANTS | + BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ + BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ + BRW_NEW_CURBE_OFFSETS), + .cache = (CACHE_NEW_WM_PROG) + }, + .update = upload_constant_buffer +}; + diff --git a/src/gallium/drivers/i965simple/brw_defines.h b/src/gallium/drivers/i965simple/brw_defines.h new file mode 100644 index 0000000000..9379a397f6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_defines.h @@ -0,0 +1,852 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_DEFINES_H +#define BRW_DEFINES_H + +/* + */ +#define MI_NOOP 0x00 +#define MI_USER_INTERRUPT 0x02 +#define MI_WAIT_FOR_EVENT 0x03 +#define MI_FLUSH 0x04 +#define MI_REPORT_HEAD 0x07 +#define MI_ARB_ON_OFF 0x08 +#define MI_BATCH_BUFFER_END 0x0A +#define MI_OVERLAY_FLIP 0x11 +#define MI_LOAD_SCAN_LINES_INCL 0x12 +#define MI_LOAD_SCAN_LINES_EXCL 0x13 +#define MI_DISPLAY_BUFFER_INFO 0x14 +#define MI_SET_CONTEXT 0x18 +#define MI_STORE_DATA_IMM 0x20 +#define MI_STORE_DATA_INDEX 0x21 +#define MI_LOAD_REGISTER_IMM 0x22 +#define MI_STORE_REGISTER_MEM 0x24 +#define MI_BATCH_BUFFER_START 0x31 + +#define MI_SYNCHRONOUS_FLIP 0x0 +#define MI_ASYNCHRONOUS_FLIP 0x1 + +#define MI_BUFFER_SECURE 0x0 +#define MI_BUFFER_NONSECURE 0x1 + +#define MI_ARBITRATE_AT_CHAIN_POINTS 0x0 +#define MI_ARBITRATE_BETWEEN_INSTS 0x1 +#define MI_NO_ARBITRATION 0x3 + +#define MI_CONDITION_CODE_WAIT_DISABLED 0x0 +#define MI_CONDITION_CODE_WAIT_0 0x1 +#define MI_CONDITION_CODE_WAIT_1 0x2 +#define MI_CONDITION_CODE_WAIT_2 0x3 +#define MI_CONDITION_CODE_WAIT_3 0x4 +#define MI_CONDITION_CODE_WAIT_4 0x5 + +#define MI_DISPLAY_PIPE_A 0x0 +#define MI_DISPLAY_PIPE_B 0x1 + +#define MI_DISPLAY_PLANE_A 0x0 +#define MI_DISPLAY_PLANE_B 0x1 +#define MI_DISPLAY_PLANE_C 0x2 + +#define MI_STANDARD_FLIP 0x0 +#define MI_ENQUEUE_FLIP_PERFORM_BASE_FRAME_NUMBER_LOAD 0x1 +#define MI_ENQUEUE_FLIP_TARGET_FRAME_NUMBER_RELATIVE 0x2 +#define MI_ENQUEUE_FLIP_ABSOLUTE_TARGET_FRAME_NUMBER 0x3 + +#define MI_PHYSICAL_ADDRESS 0x0 +#define MI_VIRTUAL_ADDRESS 0x1 + +#define MI_BUFFER_MEMORY_MAIN 0x0 +#define MI_BUFFER_MEMORY_GTT 0x2 +#define MI_BUFFER_MEMORY_PER_PROCESS_GTT 0x3 + +#define MI_FLIP_CONTINUE 0x0 +#define MI_FLIP_ON 0x1 +#define MI_FLIP_OFF 0x2 + +#define MI_UNTRUSTED_REGISTER_SPACE 0x0 +#define MI_TRUSTED_REGISTER_SPACE 0x1 + +/* 3D state: + */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 +#define _3DPRIMITIVE 0x00 + +#define PIPE_CONTROL_NOWRITE 0x00 +#define PIPE_CONTROL_WRITEIMMEDIATE 0x01 +#define PIPE_CONTROL_WRITEDEPTH 0x02 +#define PIPE_CONTROL_WRITETIMESTAMP 0x03 + +#define PIPE_CONTROL_GTTWRITE_PROCESS_LOCAL 0x00 +#define PIPE_CONTROL_GTTWRITE_GLOBAL 0x01 + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define BRW_ANISORATIO_2 0 +#define BRW_ANISORATIO_4 1 +#define BRW_ANISORATIO_6 2 +#define BRW_ANISORATIO_8 3 +#define BRW_ANISORATIO_10 4 +#define BRW_ANISORATIO_12 5 +#define BRW_ANISORATIO_14 6 +#define BRW_ANISORATIO_16 7 + +#define BRW_BLENDFACTOR_ONE 0x1 +#define BRW_BLENDFACTOR_SRC_COLOR 0x2 +#define BRW_BLENDFACTOR_SRC_ALPHA 0x3 +#define BRW_BLENDFACTOR_DST_ALPHA 0x4 +#define BRW_BLENDFACTOR_DST_COLOR 0x5 +#define BRW_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define BRW_BLENDFACTOR_CONST_COLOR 0x7 +#define BRW_BLENDFACTOR_CONST_ALPHA 0x8 +#define BRW_BLENDFACTOR_SRC1_COLOR 0x9 +#define BRW_BLENDFACTOR_SRC1_ALPHA 0x0A +#define BRW_BLENDFACTOR_ZERO 0x11 +#define BRW_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define BRW_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define BRW_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define BRW_BLENDFACTOR_INV_DST_COLOR 0x15 +#define BRW_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define BRW_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define BRW_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define BRW_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define BRW_BLENDFUNCTION_ADD 0 +#define BRW_BLENDFUNCTION_SUBTRACT 1 +#define BRW_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define BRW_BLENDFUNCTION_MIN 3 +#define BRW_BLENDFUNCTION_MAX 4 + +#define BRW_ALPHATEST_FORMAT_UNORM8 0 +#define BRW_ALPHATEST_FORMAT_FLOAT32 1 + +#define BRW_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define BRW_CHROMAKEY_REPLACE_BLACK 1 + +#define BRW_CLIP_API_OGL 0 +#define BRW_CLIP_API_DX 1 + +#define BRW_CLIPMODE_NORMAL 0 +#define BRW_CLIPMODE_CLIP_ALL 1 +#define BRW_CLIPMODE_CLIP_NON_REJECTED 2 +#define BRW_CLIPMODE_REJECT_ALL 3 +#define BRW_CLIPMODE_ACCEPT_ALL 4 + +#define BRW_CLIP_NDCSPACE 0 +#define BRW_CLIP_SCREENSPACE 1 + +#define BRW_COMPAREFUNCTION_ALWAYS 0 +#define BRW_COMPAREFUNCTION_NEVER 1 +#define BRW_COMPAREFUNCTION_LESS 2 +#define BRW_COMPAREFUNCTION_EQUAL 3 +#define BRW_COMPAREFUNCTION_LEQUAL 4 +#define BRW_COMPAREFUNCTION_GREATER 5 +#define BRW_COMPAREFUNCTION_NOTEQUAL 6 +#define BRW_COMPAREFUNCTION_GEQUAL 7 + +#define BRW_COVERAGE_PIXELS_HALF 0 +#define BRW_COVERAGE_PIXELS_1 1 +#define BRW_COVERAGE_PIXELS_2 2 +#define BRW_COVERAGE_PIXELS_4 3 + +#define BRW_CULLMODE_BOTH 0 +#define BRW_CULLMODE_NONE 1 +#define BRW_CULLMODE_FRONT 2 +#define BRW_CULLMODE_BACK 3 + +#define BRW_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define BRW_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define BRW_DEPTHFORMAT_D32_FLOAT 1 +#define BRW_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define BRW_DEPTHFORMAT_D16_UNORM 5 + +#define BRW_FLOATING_POINT_IEEE_754 0 +#define BRW_FLOATING_POINT_NON_IEEE_754 1 + +#define BRW_FRONTWINDING_CW 0 +#define BRW_FRONTWINDING_CCW 1 + +#define BRW_SPRITE_POINT_ENABLE 16 + +#define BRW_INDEX_BYTE 0 +#define BRW_INDEX_WORD 1 +#define BRW_INDEX_DWORD 2 + +#define BRW_LOGICOPFUNCTION_CLEAR 0 +#define BRW_LOGICOPFUNCTION_NOR 1 +#define BRW_LOGICOPFUNCTION_AND_INVERTED 2 +#define BRW_LOGICOPFUNCTION_COPY_INVERTED 3 +#define BRW_LOGICOPFUNCTION_AND_REVERSE 4 +#define BRW_LOGICOPFUNCTION_INVERT 5 +#define BRW_LOGICOPFUNCTION_XOR 6 +#define BRW_LOGICOPFUNCTION_NAND 7 +#define BRW_LOGICOPFUNCTION_AND 8 +#define BRW_LOGICOPFUNCTION_EQUIV 9 +#define BRW_LOGICOPFUNCTION_NOOP 10 +#define BRW_LOGICOPFUNCTION_OR_INVERTED 11 +#define BRW_LOGICOPFUNCTION_COPY 12 +#define BRW_LOGICOPFUNCTION_OR_REVERSE 13 +#define BRW_LOGICOPFUNCTION_OR 14 +#define BRW_LOGICOPFUNCTION_SET 15 + +#define BRW_MAPFILTER_NEAREST 0x0 +#define BRW_MAPFILTER_LINEAR 0x1 +#define BRW_MAPFILTER_ANISOTROPIC 0x2 + +#define BRW_MIPFILTER_NONE 0 +#define BRW_MIPFILTER_NEAREST 1 +#define BRW_MIPFILTER_LINEAR 3 + +#define BRW_POLYGON_FRONT_FACING 0 +#define BRW_POLYGON_BACK_FACING 1 + +#define BRW_PREFILTER_ALWAYS 0x0 +#define BRW_PREFILTER_NEVER 0x1 +#define BRW_PREFILTER_LESS 0x2 +#define BRW_PREFILTER_EQUAL 0x3 +#define BRW_PREFILTER_LEQUAL 0x4 +#define BRW_PREFILTER_GREATER 0x5 +#define BRW_PREFILTER_NOTEQUAL 0x6 +#define BRW_PREFILTER_GEQUAL 0x7 + +#define BRW_PROVOKING_VERTEX_0 0 +#define BRW_PROVOKING_VERTEX_1 1 +#define BRW_PROVOKING_VERTEX_2 2 + +#define BRW_RASTRULE_UPPER_LEFT 0 +#define BRW_RASTRULE_UPPER_RIGHT 1 + +#define BRW_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define BRW_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define BRW_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define BRW_STENCILOP_KEEP 0 +#define BRW_STENCILOP_ZERO 1 +#define BRW_STENCILOP_REPLACE 2 +#define BRW_STENCILOP_INCRSAT 3 +#define BRW_STENCILOP_DECRSAT 4 +#define BRW_STENCILOP_INCR 5 +#define BRW_STENCILOP_DECR 6 +#define BRW_STENCILOP_INVERT 7 + +#define BRW_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define BRW_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define BRW_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define BRW_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define BRW_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define BRW_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define BRW_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define BRW_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define BRW_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define BRW_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define BRW_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define BRW_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define BRW_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define BRW_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define BRW_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define BRW_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define BRW_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define BRW_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define BRW_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define BRW_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define BRW_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define BRW_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define BRW_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define BRW_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define BRW_SURFACEFORMAT_R32G32_SINT 0x086 +#define BRW_SURFACEFORMAT_R32G32_UINT 0x087 +#define BRW_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define BRW_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define BRW_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define BRW_SURFACEFORMAT_R32G32_UNORM 0x08B +#define BRW_SURFACEFORMAT_R32G32_SNORM 0x08C +#define BRW_SURFACEFORMAT_R64_FLOAT 0x08D +#define BRW_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define BRW_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define BRW_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define BRW_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define BRW_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define BRW_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define BRW_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define BRW_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define BRW_SURFACEFORMAT_R32G32_USCALED 0x096 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define BRW_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define BRW_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define BRW_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define BRW_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define BRW_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define BRW_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define BRW_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define BRW_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define BRW_SURFACEFORMAT_R16G16_SINT 0x0CE +#define BRW_SURFACEFORMAT_R16G16_UINT 0x0CF +#define BRW_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define BRW_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define BRW_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define BRW_SURFACEFORMAT_R32_SINT 0x0D6 +#define BRW_SURFACEFORMAT_R32_UINT 0x0D7 +#define BRW_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define BRW_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define BRW_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define BRW_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define BRW_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define BRW_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define BRW_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define BRW_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define BRW_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define BRW_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define BRW_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define BRW_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define BRW_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define BRW_SURFACEFORMAT_R32_UNORM 0x0F1 +#define BRW_SURFACEFORMAT_R32_SNORM 0x0F2 +#define BRW_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define BRW_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define BRW_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define BRW_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define BRW_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define BRW_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define BRW_SURFACEFORMAT_R32_USCALED 0x0F9 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define BRW_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define BRW_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define BRW_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define BRW_SURFACEFORMAT_R8G8_UNORM 0x106 +#define BRW_SURFACEFORMAT_R8G8_SNORM 0x107 +#define BRW_SURFACEFORMAT_R8G8_SINT 0x108 +#define BRW_SURFACEFORMAT_R8G8_UINT 0x109 +#define BRW_SURFACEFORMAT_R16_UNORM 0x10A +#define BRW_SURFACEFORMAT_R16_SNORM 0x10B +#define BRW_SURFACEFORMAT_R16_SINT 0x10C +#define BRW_SURFACEFORMAT_R16_UINT 0x10D +#define BRW_SURFACEFORMAT_R16_FLOAT 0x10E +#define BRW_SURFACEFORMAT_I16_UNORM 0x111 +#define BRW_SURFACEFORMAT_L16_UNORM 0x112 +#define BRW_SURFACEFORMAT_A16_UNORM 0x113 +#define BRW_SURFACEFORMAT_L8A8_UNORM 0x114 +#define BRW_SURFACEFORMAT_I16_FLOAT 0x115 +#define BRW_SURFACEFORMAT_L16_FLOAT 0x116 +#define BRW_SURFACEFORMAT_A16_FLOAT 0x117 +#define BRW_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define BRW_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define BRW_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define BRW_SURFACEFORMAT_R8G8_USCALED 0x11D +#define BRW_SURFACEFORMAT_R16_SSCALED 0x11E +#define BRW_SURFACEFORMAT_R16_USCALED 0x11F +#define BRW_SURFACEFORMAT_R8_UNORM 0x140 +#define BRW_SURFACEFORMAT_R8_SNORM 0x141 +#define BRW_SURFACEFORMAT_R8_SINT 0x142 +#define BRW_SURFACEFORMAT_R8_UINT 0x143 +#define BRW_SURFACEFORMAT_A8_UNORM 0x144 +#define BRW_SURFACEFORMAT_I8_UNORM 0x145 +#define BRW_SURFACEFORMAT_L8_UNORM 0x146 +#define BRW_SURFACEFORMAT_P4A4_UNORM 0x147 +#define BRW_SURFACEFORMAT_A4P4_UNORM 0x148 +#define BRW_SURFACEFORMAT_R8_SSCALED 0x149 +#define BRW_SURFACEFORMAT_R8_USCALED 0x14A +#define BRW_SURFACEFORMAT_R1_UINT 0x181 +#define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define BRW_SURFACEFORMAT_BC1_UNORM 0x186 +#define BRW_SURFACEFORMAT_BC2_UNORM 0x187 +#define BRW_SURFACEFORMAT_BC3_UNORM 0x188 +#define BRW_SURFACEFORMAT_BC4_UNORM 0x189 +#define BRW_SURFACEFORMAT_BC5_UNORM 0x18A +#define BRW_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define BRW_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define BRW_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define BRW_SURFACEFORMAT_MONO8 0x18E +#define BRW_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define BRW_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define BRW_SURFACEFORMAT_DXT1_RGB 0x191 +#define BRW_SURFACEFORMAT_FXT1 0x192 +#define BRW_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define BRW_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define BRW_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define BRW_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define BRW_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define BRW_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define BRW_SURFACEFORMAT_BC4_SNORM 0x199 +#define BRW_SURFACEFORMAT_BC5_SNORM 0x19A +#define BRW_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define BRW_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define BRW_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define BRW_SURFACEFORMAT_R16G16B16_USCALED 0x19F + +#define BRW_SURFACERETURNFORMAT_FLOAT32 0 +#define BRW_SURFACERETURNFORMAT_S1 1 + +#define BRW_SURFACE_1D 0 +#define BRW_SURFACE_2D 1 +#define BRW_SURFACE_3D 2 +#define BRW_SURFACE_CUBE 3 +#define BRW_SURFACE_BUFFER 4 +#define BRW_SURFACE_NULL 7 + +#define BRW_TEXCOORDMODE_WRAP 0 +#define BRW_TEXCOORDMODE_MIRROR 1 +#define BRW_TEXCOORDMODE_CLAMP 2 +#define BRW_TEXCOORDMODE_CUBE 3 +#define BRW_TEXCOORDMODE_CLAMP_BORDER 4 +#define BRW_TEXCOORDMODE_MIRROR_ONCE 5 + +#define BRW_THREAD_PRIORITY_NORMAL 0 +#define BRW_THREAD_PRIORITY_HIGH 1 + +#define BRW_TILEWALK_XMAJOR 0 +#define BRW_TILEWALK_YMAJOR 1 + +#define BRW_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define BRW_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +#define BRW_VERTEXBUFFER_ACCESS_VERTEXDATA 0 +#define BRW_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 + +#define BRW_VFCOMPONENT_NOSTORE 0 +#define BRW_VFCOMPONENT_STORE_SRC 1 +#define BRW_VFCOMPONENT_STORE_0 2 +#define BRW_VFCOMPONENT_STORE_1_FLT 3 +#define BRW_VFCOMPONENT_STORE_1_INT 4 +#define BRW_VFCOMPONENT_STORE_VID 5 +#define BRW_VFCOMPONENT_STORE_IID 6 +#define BRW_VFCOMPONENT_STORE_PID 7 + + + +/* Execution Unit (EU) defines + */ + +#define BRW_ALIGN_1 0 +#define BRW_ALIGN_16 1 + +#define BRW_ADDRESS_DIRECT 0 +#define BRW_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define BRW_CHANNEL_X 0 +#define BRW_CHANNEL_Y 1 +#define BRW_CHANNEL_Z 2 +#define BRW_CHANNEL_W 3 + +#define BRW_COMPRESSION_NONE 0 +#define BRW_COMPRESSION_2NDHALF 1 +#define BRW_COMPRESSION_COMPRESSED 2 + +#define BRW_CONDITIONAL_NONE 0 +#define BRW_CONDITIONAL_Z 1 +#define BRW_CONDITIONAL_NZ 2 +#define BRW_CONDITIONAL_EQ 1 /* Z */ +#define BRW_CONDITIONAL_NEQ 2 /* NZ */ +#define BRW_CONDITIONAL_G 3 +#define BRW_CONDITIONAL_GE 4 +#define BRW_CONDITIONAL_L 5 +#define BRW_CONDITIONAL_LE 6 +#define BRW_CONDITIONAL_C 7 +#define BRW_CONDITIONAL_O 8 + +#define BRW_DEBUG_NONE 0 +#define BRW_DEBUG_BREAKPOINT 1 + +#define BRW_DEPENDENCY_NORMAL 0 +#define BRW_DEPENDENCY_NOTCLEARED 1 +#define BRW_DEPENDENCY_NOTCHECKED 2 +#define BRW_DEPENDENCY_DISABLE 3 + +#define BRW_EXECUTE_1 0 +#define BRW_EXECUTE_2 1 +#define BRW_EXECUTE_4 2 +#define BRW_EXECUTE_8 3 +#define BRW_EXECUTE_16 4 +#define BRW_EXECUTE_32 5 + +#define BRW_HORIZONTAL_STRIDE_0 0 +#define BRW_HORIZONTAL_STRIDE_1 1 +#define BRW_HORIZONTAL_STRIDE_2 2 +#define BRW_HORIZONTAL_STRIDE_4 3 + +#define BRW_INSTRUCTION_NORMAL 0 +#define BRW_INSTRUCTION_SATURATE 1 + +#define BRW_MASK_ENABLE 0 +#define BRW_MASK_DISABLE 1 + +#define BRW_OPCODE_MOV 1 +#define BRW_OPCODE_SEL 2 +#define BRW_OPCODE_NOT 4 +#define BRW_OPCODE_AND 5 +#define BRW_OPCODE_OR 6 +#define BRW_OPCODE_XOR 7 +#define BRW_OPCODE_SHR 8 +#define BRW_OPCODE_SHL 9 +#define BRW_OPCODE_RSR 10 +#define BRW_OPCODE_RSL 11 +#define BRW_OPCODE_ASR 12 +#define BRW_OPCODE_CMP 16 +#define BRW_OPCODE_JMPI 32 +#define BRW_OPCODE_IF 34 +#define BRW_OPCODE_IFF 35 +#define BRW_OPCODE_ELSE 36 +#define BRW_OPCODE_ENDIF 37 +#define BRW_OPCODE_DO 38 +#define BRW_OPCODE_WHILE 39 +#define BRW_OPCODE_BREAK 40 +#define BRW_OPCODE_CONTINUE 41 +#define BRW_OPCODE_HALT 42 +#define BRW_OPCODE_MSAVE 44 +#define BRW_OPCODE_MRESTORE 45 +#define BRW_OPCODE_PUSH 46 +#define BRW_OPCODE_POP 47 +#define BRW_OPCODE_WAIT 48 +#define BRW_OPCODE_SEND 49 +#define BRW_OPCODE_ADD 64 +#define BRW_OPCODE_MUL 65 +#define BRW_OPCODE_AVG 66 +#define BRW_OPCODE_FRC 67 +#define BRW_OPCODE_RNDU 68 +#define BRW_OPCODE_RNDD 69 +#define BRW_OPCODE_RNDE 70 +#define BRW_OPCODE_RNDZ 71 +#define BRW_OPCODE_MAC 72 +#define BRW_OPCODE_MACH 73 +#define BRW_OPCODE_LZD 74 +#define BRW_OPCODE_SAD2 80 +#define BRW_OPCODE_SADA2 81 +#define BRW_OPCODE_DP4 84 +#define BRW_OPCODE_DPH 85 +#define BRW_OPCODE_DP3 86 +#define BRW_OPCODE_DP2 87 +#define BRW_OPCODE_DPA2 88 +#define BRW_OPCODE_LINE 89 +#define BRW_OPCODE_NOP 126 + +#define BRW_PREDICATE_NONE 0 +#define BRW_PREDICATE_NORMAL 1 +#define BRW_PREDICATE_ALIGN1_ANYV 2 +#define BRW_PREDICATE_ALIGN1_ALLV 3 +#define BRW_PREDICATE_ALIGN1_ANY2H 4 +#define BRW_PREDICATE_ALIGN1_ALL2H 5 +#define BRW_PREDICATE_ALIGN1_ANY4H 6 +#define BRW_PREDICATE_ALIGN1_ALL4H 7 +#define BRW_PREDICATE_ALIGN1_ANY8H 8 +#define BRW_PREDICATE_ALIGN1_ALL8H 9 +#define BRW_PREDICATE_ALIGN1_ANY16H 10 +#define BRW_PREDICATE_ALIGN1_ALL16H 11 +#define BRW_PREDICATE_ALIGN16_REPLICATE_X 2 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define BRW_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define BRW_PREDICATE_ALIGN16_REPLICATE_W 5 +#define BRW_PREDICATE_ALIGN16_ANY4H 6 +#define BRW_PREDICATE_ALIGN16_ALL4H 7 + +#define BRW_ARCHITECTURE_REGISTER_FILE 0 +#define BRW_GENERAL_REGISTER_FILE 1 +#define BRW_MESSAGE_REGISTER_FILE 2 +#define BRW_IMMEDIATE_VALUE 3 + +#define BRW_REGISTER_TYPE_UD 0 +#define BRW_REGISTER_TYPE_D 1 +#define BRW_REGISTER_TYPE_UW 2 +#define BRW_REGISTER_TYPE_W 3 +#define BRW_REGISTER_TYPE_UB 4 +#define BRW_REGISTER_TYPE_B 5 +#define BRW_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define BRW_REGISTER_TYPE_HF 6 +#define BRW_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define BRW_REGISTER_TYPE_F 7 + +#define BRW_ARF_NULL 0x00 +#define BRW_ARF_ADDRESS 0x10 +#define BRW_ARF_ACCUMULATOR 0x20 +#define BRW_ARF_FLAG 0x30 +#define BRW_ARF_MASK 0x40 +#define BRW_ARF_MASK_STACK 0x50 +#define BRW_ARF_MASK_STACK_DEPTH 0x60 +#define BRW_ARF_STATE 0x70 +#define BRW_ARF_CONTROL 0x80 +#define BRW_ARF_NOTIFICATION_COUNT 0x90 +#define BRW_ARF_IP 0xA0 + +#define BRW_AMASK 0 +#define BRW_IMASK 1 +#define BRW_LMASK 2 +#define BRW_CMASK 3 + + + +#define BRW_THREAD_NORMAL 0 +#define BRW_THREAD_ATOMIC 1 +#define BRW_THREAD_SWITCH 2 + +#define BRW_VERTICAL_STRIDE_0 0 +#define BRW_VERTICAL_STRIDE_1 1 +#define BRW_VERTICAL_STRIDE_2 2 +#define BRW_VERTICAL_STRIDE_4 3 +#define BRW_VERTICAL_STRIDE_8 4 +#define BRW_VERTICAL_STRIDE_16 5 +#define BRW_VERTICAL_STRIDE_32 6 +#define BRW_VERTICAL_STRIDE_64 7 +#define BRW_VERTICAL_STRIDE_128 8 +#define BRW_VERTICAL_STRIDE_256 9 +#define BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define BRW_WIDTH_1 0 +#define BRW_WIDTH_2 1 +#define BRW_WIDTH_4 2 +#define BRW_WIDTH_8 3 +#define BRW_WIDTH_16 4 + +#define BRW_STATELESS_BUFFER_BOUNDARY_1K 0 +#define BRW_STATELESS_BUFFER_BOUNDARY_2K 1 +#define BRW_STATELESS_BUFFER_BOUNDARY_4K 2 +#define BRW_STATELESS_BUFFER_BOUNDARY_8K 3 +#define BRW_STATELESS_BUFFER_BOUNDARY_16K 4 +#define BRW_STATELESS_BUFFER_BOUNDARY_32K 5 +#define BRW_STATELESS_BUFFER_BOUNDARY_64K 6 +#define BRW_STATELESS_BUFFER_BOUNDARY_128K 7 +#define BRW_STATELESS_BUFFER_BOUNDARY_256K 8 +#define BRW_STATELESS_BUFFER_BOUNDARY_512K 9 +#define BRW_STATELESS_BUFFER_BOUNDARY_1M 10 +#define BRW_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define BRW_POLYGON_FACING_FRONT 0 +#define BRW_POLYGON_FACING_BACK 1 + +#define BRW_MESSAGE_TARGET_NULL 0 +#define BRW_MESSAGE_TARGET_MATH 1 +#define BRW_MESSAGE_TARGET_SAMPLER 2 +#define BRW_MESSAGE_TARGET_GATEWAY 3 +#define BRW_MESSAGE_TARGET_DATAPORT_READ 4 +#define BRW_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define BRW_MESSAGE_TARGET_URB 6 +#define BRW_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define BRW_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define BRW_SAMPLER_RETURN_FORMAT_UINT32 2 +#define BRW_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define BRW_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD8_LD 3 +#define BRW_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define BRW_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define BRW_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define BRW_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define BRW_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define BRW_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define BRW_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define BRW_MATH_FUNCTION_INV 1 +#define BRW_MATH_FUNCTION_LOG 2 +#define BRW_MATH_FUNCTION_EXP 3 +#define BRW_MATH_FUNCTION_SQRT 4 +#define BRW_MATH_FUNCTION_RSQ 5 +#define BRW_MATH_FUNCTION_SIN 6 /* was 7 */ +#define BRW_MATH_FUNCTION_COS 7 /* was 8 */ +#define BRW_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define BRW_MATH_FUNCTION_TAN 9 +#define BRW_MATH_FUNCTION_POW 10 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define BRW_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define BRW_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define BRW_MATH_INTEGER_UNSIGNED 0 +#define BRW_MATH_INTEGER_SIGNED 1 + +#define BRW_MATH_PRECISION_FULL 0 +#define BRW_MATH_PRECISION_PARTIAL 1 + +#define BRW_MATH_SATURATE_NONE 0 +#define BRW_MATH_SATURATE_SATURATE 1 + +#define BRW_MATH_DATA_VECTOR 0 +#define BRW_MATH_DATA_SCALAR 1 + +#define BRW_URB_OPCODE_WRITE 0 + +#define BRW_URB_SWIZZLE_NONE 0 +#define BRW_URB_SWIZZLE_INTERLEAVE 1 +#define BRW_URB_SWIZZLE_TRANSPOSE 2 + +#define BRW_SCRATCH_SPACE_SIZE_1K 0 +#define BRW_SCRATCH_SPACE_SIZE_2K 1 +#define BRW_SCRATCH_SPACE_SIZE_4K 2 +#define BRW_SCRATCH_SPACE_SIZE_8K 3 +#define BRW_SCRATCH_SPACE_SIZE_16K 4 +#define BRW_SCRATCH_SPACE_SIZE_32K 5 +#define BRW_SCRATCH_SPACE_SIZE_64K 6 +#define BRW_SCRATCH_SPACE_SIZE_128K 7 +#define BRW_SCRATCH_SPACE_SIZE_256K 8 +#define BRW_SCRATCH_SPACE_SIZE_512K 9 +#define BRW_SCRATCH_SPACE_SIZE_1M 10 +#define BRW_SCRATCH_SPACE_SIZE_2M 11 + + + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_PIPELINE_SELECT 0x6104 + +#define CMD_PIPELINED_STATE_POINTERS 0x7800 +#define CMD_BINDING_TABLE_PTRS 0x7801 +#define CMD_VERTEX_BUFFER 0x7808 +#define CMD_VERTEX_ELEMENT 0x7809 +#define CMD_INDEX_BUFFER 0x780a +#define CMD_VF_STATISTICS 0x780b + +#define CMD_DRAW_RECT 0x7900 +#define CMD_BLEND_CONSTANT_COLOR 0x7901 +#define CMD_CHROMA_KEY 0x7904 +#define CMD_DEPTH_BUFFER 0x7905 +#define CMD_POLY_STIPPLE_OFFSET 0x7906 +#define CMD_POLY_STIPPLE_PATTERN 0x7907 +#define CMD_LINE_STIPPLE_PATTERN 0x7908 +#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_3D_PRIM 0x7b00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Various values from the R0 vertex header: + */ +#define R02_PRIM_END 0x1 +#define R02_PRIM_START 0x2 + + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_draw.c b/src/gallium/drivers/i965simple/brw_draw.c new file mode 100644 index 0000000000..7598e3dc8a --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw.c @@ -0,0 +1,239 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> + +#include "brw_batch.h" +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" + +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" + +static unsigned hw_prim[PIPE_PRIM_POLYGON+1] = { + _3DPRIM_POINTLIST, + _3DPRIM_LINELIST, + _3DPRIM_LINELOOP, + _3DPRIM_LINESTRIP, + _3DPRIM_TRILIST, + _3DPRIM_TRISTRIP, + _3DPRIM_TRIFAN, + _3DPRIM_QUADLIST, + _3DPRIM_QUADSTRIP, + _3DPRIM_POLYGON +}; + + +static const int reduced_prim[PIPE_PRIM_POLYGON+1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; + + +/* When the primitive changes, set a state bit and re-validate. Not + * the nicest and would rather deal with this by having all the + * programs be immune to the active primitive (ie. cope with all + * possibilities). That may not be realistic however. + */ +static void brw_set_prim(struct brw_context *brw, int prim) +{ + PRINT("PRIM: %d\n", prim); + + /* Slight optimization to avoid the GS program when not needed: + */ + if (prim == PIPE_PRIM_QUAD_STRIP && + brw->attribs.Raster->flatshade && + brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_FILL && + brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_FILL) + prim = PIPE_PRIM_TRIANGLE_STRIP; + + if (prim != brw->primitive) { + brw->primitive = prim; + brw->state.dirty.brw |= BRW_NEW_PRIMITIVE; + + if (reduced_prim[prim] != brw->reduced_primitive) { + brw->reduced_primitive = reduced_prim[prim]; + brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE; + } + + brw_validate_state(brw); + } + +} + + +static unsigned trim(int prim, unsigned length) +{ + if (prim == PIPE_PRIM_QUAD_STRIP) + return length > 3 ? (length - length % 2) : 0; + else if (prim == PIPE_PRIM_QUADS) + return length - length % 4; + else + return length; +} + + + +static boolean brw_emit_prim( struct brw_context *brw, + boolean indexed, + unsigned start, + unsigned count ) + +{ + struct brw_3d_primitive prim_packet; + + if (BRW_DEBUG & DEBUG_PRIMS) + PRINT("PRIM: %d %d %d\n", brw->primitive, start, count); + + prim_packet.header.opcode = CMD_3D_PRIM; + prim_packet.header.length = sizeof(prim_packet)/4 - 2; + prim_packet.header.pad = 0; + prim_packet.header.topology = hw_prim[brw->primitive]; + prim_packet.header.indexed = indexed; + + prim_packet.verts_per_instance = trim(brw->primitive, count); + prim_packet.start_vert_location = start; + prim_packet.instance_count = 1; + prim_packet.start_instance_location = 0; + prim_packet.base_vert_location = 0; + + if (prim_packet.verts_per_instance == 0) + return TRUE; + + return brw_batchbuffer_data( brw->winsys, + &prim_packet, + sizeof(prim_packet) ); +} + + +/* May fail if out of video memory for texture or vbo upload, or on + * fallback conditions. + */ +static boolean brw_try_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *index_buffer, + unsigned index_size, + unsigned mode, + unsigned start, + unsigned count ) +{ + struct brw_context *brw = brw_context(pipe); + + /* Set the first primitive ahead of validate_state: + */ + brw_set_prim(brw, mode); + + /* Upload index, vertex data: + */ + if (index_buffer && + !brw_upload_indices( brw, index_buffer, index_size, start, count )) + return FALSE; + + if (!brw_upload_vertex_buffers(brw)) + return FALSE; + + if (!brw_upload_vertex_elements( brw )) + return FALSE; + + /* XXX: Need to separate validate and upload of state. + */ + if (brw->state.dirty.brw) + brw_validate_state( brw ); + + if (!brw_emit_prim(brw, index_buffer != NULL, + start, count)) + return FALSE; + + return TRUE; +} + + + +static boolean brw_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, + unsigned start, + unsigned count ) +{ + if (!brw_try_draw_elements( pipe, + indexBuffer, + indexSize, + mode, start, count )) + { + /* flush ? */ + + if (!brw_try_draw_elements( pipe, + indexBuffer, + indexSize, + mode, start, + count )) { + assert(0); + return FALSE; + } + } + + return TRUE; +} + + + +static boolean brw_draw_arrays( struct pipe_context *pipe, + unsigned mode, + unsigned start, + unsigned count ) +{ + if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) { + /* flush ? */ + + if (!brw_try_draw_elements( pipe, NULL, 0, mode, start, count )) { + assert(0); + return FALSE; + } + } + + return TRUE; +} + + + +void brw_init_draw_functions( struct brw_context *brw ) +{ + brw->pipe.draw_arrays = brw_draw_arrays; + brw->pipe.draw_elements = brw_draw_elements; +} + + diff --git a/src/gallium/drivers/i965simple/brw_draw.h b/src/gallium/drivers/i965simple/brw_draw.h new file mode 100644 index 0000000000..62fe0d5d0e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw.h @@ -0,0 +1,55 @@ + /************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef BRW_DRAW_H +#define BRW_DRAW_H + +#include "pipe/p_context.h" + +struct brw_context; + + + +void brw_init_draw_functions( struct brw_context *brw ); + + +boolean brw_upload_vertices( struct brw_context *brw, + unsigned min_index, + unsigned max_index ); + +boolean brw_upload_indices(struct brw_context *brw, + const struct pipe_buffer *index_buffer, + int ib_size, int start, int count); + +boolean brw_upload_vertex_buffers( struct brw_context *brw ); +boolean brw_upload_vertex_elements( struct brw_context *brw ); + +unsigned brw_translate_surface_format( unsigned id ); + + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c new file mode 100644 index 0000000000..7c20ea52af --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_draw_upload.c @@ -0,0 +1,300 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <stdlib.h> + +#include "brw_batch.h" +#include "brw_draw.h" +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_state.h" + + +struct brw_array_state { + union header_union header; + + struct { + union { + struct { + unsigned pitch:11; + unsigned pad:15; + unsigned access_type:1; + unsigned vb_index:5; + } bits; + unsigned dword; + } vb0; + + struct pipe_buffer *buffer; + unsigned offset; + + unsigned max_index; + unsigned instance_data_step_rate; + + } vb[BRW_VBP_MAX]; +}; + + + +unsigned brw_translate_surface_format( unsigned id ) +{ + switch (id) { + case PIPE_FORMAT_R64_FLOAT: + return BRW_SURFACEFORMAT_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return BRW_SURFACEFORMAT_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return BRW_SURFACEFORMAT_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return BRW_SURFACEFORMAT_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return BRW_SURFACEFORMAT_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return BRW_SURFACEFORMAT_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return BRW_SURFACEFORMAT_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return BRW_SURFACEFORMAT_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return BRW_SURFACEFORMAT_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return BRW_SURFACEFORMAT_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return BRW_SURFACEFORMAT_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return BRW_SURFACEFORMAT_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return BRW_SURFACEFORMAT_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return BRW_SURFACEFORMAT_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return BRW_SURFACEFORMAT_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return BRW_SURFACEFORMAT_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return BRW_SURFACEFORMAT_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return BRW_SURFACEFORMAT_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return BRW_SURFACEFORMAT_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return BRW_SURFACEFORMAT_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return BRW_SURFACEFORMAT_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return BRW_SURFACEFORMAT_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return BRW_SURFACEFORMAT_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return BRW_SURFACEFORMAT_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return BRW_SURFACEFORMAT_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return BRW_SURFACEFORMAT_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return BRW_SURFACEFORMAT_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return BRW_SURFACEFORMAT_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return BRW_SURFACEFORMAT_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return BRW_SURFACEFORMAT_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return BRW_SURFACEFORMAT_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return BRW_SURFACEFORMAT_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return BRW_SURFACEFORMAT_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return BRW_SURFACEFORMAT_R8G8B8A8_SSCALED; + + default: + assert(0); + return 0; + } +} + +static unsigned get_index_type(int type) +{ + switch (type) { + case 1: return BRW_INDEX_BYTE; + case 2: return BRW_INDEX_WORD; + case 4: return BRW_INDEX_DWORD; + default: assert(0); return 0; + } +} + + +boolean brw_upload_vertex_buffers( struct brw_context *brw ) +{ + struct brw_array_state vbp; + unsigned nr_enabled = 0; + unsigned i; + + memset(&vbp, 0, sizeof(vbp)); + + /* This is a hardware limit: + */ + + for (i = 0; i < BRW_VEP_MAX; i++) + { + if (brw->vb.vbo_array[i] == NULL) { + nr_enabled = i; + break; + } + + vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->pitch; + vbp.vb[i].vb0.bits.pad = 0; + vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; + vbp.vb[i].vb0.bits.vb_index = i; + vbp.vb[i].offset = brw->vb.vbo_array[i]->buffer_offset; + vbp.vb[i].buffer = brw->vb.vbo_array[i]->buffer; + vbp.vb[i].max_index = brw->vb.vbo_array[i]->max_index; + } + + + vbp.header.bits.length = (1 + nr_enabled * 4) - 2; + vbp.header.bits.opcode = CMD_VERTEX_BUFFER; + + BEGIN_BATCH(vbp.header.bits.length+2, 0); + OUT_BATCH( vbp.header.dword ); + + for (i = 0; i < nr_enabled; i++) { + OUT_BATCH( vbp.vb[i].vb0.dword ); + OUT_RELOC( vbp.vb[i].buffer, PIPE_BUFFER_USAGE_GPU_READ, + vbp.vb[i].offset); + OUT_BATCH( vbp.vb[i].max_index ); + OUT_BATCH( vbp.vb[i].instance_data_step_rate ); + } + ADVANCE_BATCH(); + return TRUE; +} + + + +boolean brw_upload_vertex_elements( struct brw_context *brw ) +{ + struct brw_vertex_element_packet vep; + + unsigned i; + unsigned nr_enabled = brw->attribs.VertexProgram->info.num_inputs; + + memset(&vep, 0, sizeof(vep)); + + for (i = 0; i < nr_enabled; i++) + vep.ve[i] = brw->vb.inputs[i]; + + + vep.header.length = (1 + nr_enabled * sizeof(vep.ve[0])/4) - 2; + vep.header.opcode = CMD_VERTEX_ELEMENT; + brw_cached_batch_struct(brw, &vep, 4 + nr_enabled * sizeof(vep.ve[0])); + + return TRUE; +} + +boolean brw_upload_indices( struct brw_context *brw, + const struct pipe_buffer *index_buffer, + int ib_size, int start, int count) +{ + /* Emit the indexbuffer packet: + */ + { + struct brw_indexbuffer ib; + + memset(&ib, 0, sizeof(ib)); + + ib.header.bits.opcode = CMD_INDEX_BUFFER; + ib.header.bits.length = sizeof(ib)/4 - 2; + ib.header.bits.index_format = get_index_type(ib_size); + ib.header.bits.cut_index_enable = 0; + + + BEGIN_BATCH(4, 0); + OUT_BATCH( ib.header.dword ); + OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start); + OUT_RELOC( index_buffer, PIPE_BUFFER_USAGE_GPU_READ, start + count); + OUT_BATCH( 0 ); + ADVANCE_BATCH(); + } + return TRUE; +} diff --git a/src/gallium/drivers/i965simple/brw_eu.c b/src/gallium/drivers/i965simple/brw_eu.c new file mode 100644 index 0000000000..e2002d1821 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu.c @@ -0,0 +1,130 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + +/* How does predicate control work when execution_size != 8? Do I + * need to test/set for 0xffff when execution_size is 16? + */ +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ) +{ + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + if (value != 0xff) { + if (value != p->flag_value) { + brw_push_insn_state(p); + brw_MOV(p, brw_flag_reg(), brw_imm_uw(value)); + p->flag_value = value; + brw_pop_insn_state(p); + } + + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } +} + +void brw_set_predicate_control( struct brw_compile *p, unsigned pc ) +{ + p->current->header.predicate_control = pc; +} + +void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ) +{ + p->current->header.destreg__conditonalmod = conditional; +} + +void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ) +{ + p->current->header.access_mode = access_mode; +} + +void brw_set_compression_control( struct brw_compile *p, boolean compression_control ) +{ + p->current->header.compression_control = compression_control; +} + +void brw_set_mask_control( struct brw_compile *p, unsigned value ) +{ + p->current->header.mask_control = value; +} + +void brw_set_saturate( struct brw_compile *p, unsigned value ) +{ + p->current->header.saturate = value; +} + +void brw_push_insn_state( struct brw_compile *p ) +{ + assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]); + memcpy(p->current+1, p->current, sizeof(struct brw_instruction)); + p->current++; +} + +void brw_pop_insn_state( struct brw_compile *p ) +{ + assert(p->current != p->stack); + p->current--; +} + + +/*********************************************************************** + */ +void brw_init_compile( struct brw_compile *p ) +{ + p->nr_insn = 0; + p->current = p->stack; + memset(p->current, 0, sizeof(p->current[0])); + + /* Some defaults? + */ + brw_set_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */ + brw_set_saturate(p, 0); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_predicate_control_flag_value(p, 0xff); +} + + +const unsigned *brw_get_program( struct brw_compile *p, + unsigned *sz ) +{ + unsigned i; + + for (i = 0; i < 8; i++) + brw_NOP(p); + + *sz = p->nr_insn * sizeof(struct brw_instruction); + return (const unsigned *)p->store; +} + diff --git a/src/gallium/drivers/i965simple/brw_eu.h b/src/gallium/drivers/i965simple/brw_eu.h new file mode 100644 index 0000000000..23151ae9ed --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu.h @@ -0,0 +1,888 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_EU_H +#define BRW_EU_H + +#include "brw_structs.h" +#include "brw_defines.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_shader_tokens.h" + +#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) +#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) +#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) +#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) + + +#define REG_SIZE (8*4) + + +/* These aren't hardware structs, just something useful for us to pass around: + * + * Align1 operation has a lot of control over input ranges. Used in + * WM programs to implement shaders decomposed into "channel serial" + * or "structure of array" form: + */ +struct brw_reg +{ + unsigned type:4; + unsigned file:2; + unsigned nr:8; + unsigned subnr:5; /* :1 in align16 */ + unsigned negate:1; /* source only */ + unsigned abs:1; /* source only */ + unsigned vstride:4; /* source only */ + unsigned width:3; /* src only, align1 only */ + unsigned hstride:2; /* src only, align1 only */ + unsigned address_mode:1; /* relative addressing, hopefully! */ + unsigned pad0:1; + + union { + struct { + unsigned swizzle:8; /* src only, align16 only */ + unsigned writemask:4; /* dest only, align16 only */ + int indirect_offset:10; /* relative addressing offset */ + unsigned pad1:10; /* two dwords total */ + } bits; + + float f; + int d; + unsigned ud; + } dw1; +}; + + +struct brw_indirect { + unsigned addr_subnr:4; + int addr_offset:10; + unsigned pad:18; +}; + + +#define BRW_EU_MAX_INSN_STACK 5 +#define BRW_EU_MAX_INSN 1200 + +struct brw_compile { + struct brw_instruction store[BRW_EU_MAX_INSN]; + unsigned nr_insn; + + /* Allow clients to push/pop instruction state: + */ + struct brw_instruction stack[BRW_EU_MAX_INSN_STACK]; + struct brw_instruction *current; + + unsigned flag_value; + boolean single_program_flow; +}; + + + +static __inline int type_sz( unsigned type ) +{ + switch( type ) { + case BRW_REGISTER_TYPE_UD: + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_F: + return 4; + case BRW_REGISTER_TYPE_HF: + case BRW_REGISTER_TYPE_UW: + case BRW_REGISTER_TYPE_W: + return 2; + case BRW_REGISTER_TYPE_UB: + case BRW_REGISTER_TYPE_B: + return 1; + default: + return 0; + } +} + +static __inline struct brw_reg brw_reg( unsigned file, + unsigned nr, + unsigned subnr, + unsigned type, + unsigned vstride, + unsigned width, + unsigned hstride, + unsigned swizzle, + unsigned writemask) +{ + + struct brw_reg reg; + reg.type = type; + reg.file = file; + reg.nr = nr; + reg.subnr = subnr * type_sz(type); + reg.negate = 0; + reg.abs = 0; + reg.vstride = vstride; + reg.width = width; + reg.hstride = hstride; + reg.address_mode = BRW_ADDRESS_DIRECT; + reg.pad0 = 0; + + /* Could do better: If the reg is r5.3<0;1,0>, we probably want to + * set swizzle and writemask to W, as the lower bits of subnr will + * be lost when converted to align16. This is probably too much to + * keep track of as you'd want it adjusted by suboffset(), etc. + * Perhaps fix up when converting to align16? + */ + reg.dw1.bits.swizzle = swizzle; + reg.dw1.bits.writemask = writemask; + reg.dw1.bits.indirect_offset = 0; + reg.dw1.bits.pad1 = 0; + return reg; +} + +static __inline struct brw_reg brw_vec16_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_16, + BRW_WIDTH_16, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + TGSI_WRITEMASK_XYZW); +} + +static __inline struct brw_reg brw_vec8_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + TGSI_WRITEMASK_XYZW); +} + + +static __inline struct brw_reg brw_vec4_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_4, + BRW_WIDTH_4, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYZW, + TGSI_WRITEMASK_XYZW); +} + + +static __inline struct brw_reg brw_vec2_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_2, + BRW_WIDTH_2, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XYXY, + TGSI_WRITEMASK_XY); +} + +static __inline struct brw_reg brw_vec1_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return brw_reg(file, + nr, + subnr, + BRW_REGISTER_TYPE_F, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XXXX, + TGSI_WRITEMASK_X); +} + + +static __inline struct brw_reg retype( struct brw_reg reg, + unsigned type ) +{ + reg.type = type; + return reg; +} + +static __inline struct brw_reg suboffset( struct brw_reg reg, + unsigned delta ) +{ + reg.subnr += delta * type_sz(reg.type); + return reg; +} + + +static __inline struct brw_reg offset( struct brw_reg reg, + unsigned delta ) +{ + reg.nr += delta; + return reg; +} + + +static __inline struct brw_reg byte_offset( struct brw_reg reg, + unsigned bytes ) +{ + unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; + reg.nr = newoffset / REG_SIZE; + reg.subnr = newoffset % REG_SIZE; + return reg; +} + + +static __inline struct brw_reg brw_uw16_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_uw8_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_uw1_reg( unsigned file, + unsigned nr, + unsigned subnr ) +{ + return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); +} + +static __inline struct brw_reg brw_imm_reg( unsigned type ) +{ + return brw_reg( BRW_IMMEDIATE_VALUE, + 0, + 0, + type, + BRW_VERTICAL_STRIDE_0, + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + 0, + 0); +} + +static __inline struct brw_reg brw_imm_f( float f ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); + imm.dw1.f = f; + return imm; +} + +static __inline struct brw_reg brw_imm_d( int d ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); + imm.dw1.d = d; + return imm; +} + +static __inline struct brw_reg brw_imm_ud( unsigned ud ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); + imm.dw1.ud = ud; + return imm; +} + +static __inline struct brw_reg brw_imm_uw( ushort uw ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); + imm.dw1.ud = uw; + return imm; +} + +static __inline struct brw_reg brw_imm_w( short w ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); + imm.dw1.d = w; + return imm; +} + +/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type + * numbers alias with _V and _VF below: + */ + +/* Vector of eight signed half-byte values: + */ +static __inline struct brw_reg brw_imm_v( unsigned v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_8; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +/* Vector of four 8-bit float values: + */ +static __inline struct brw_reg brw_imm_vf( unsigned v ) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = v; + return imm; +} + +#define VF_ZERO 0x0 +#define VF_ONE 0x30 +#define VF_NEG (1<<7) + +static __inline struct brw_reg brw_imm_vf4( unsigned v0, + unsigned v1, + unsigned v2, + unsigned v3) +{ + struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); + imm.vstride = BRW_VERTICAL_STRIDE_0; + imm.width = BRW_WIDTH_4; + imm.hstride = BRW_HORIZONTAL_STRIDE_1; + imm.dw1.ud = ((v0 << 0) | + (v1 << 8) | + (v2 << 16) | + (v3 << 24)); + return imm; +} + + +static __inline struct brw_reg brw_address( struct brw_reg reg ) +{ + return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); +} + + +static __inline struct brw_reg brw_vec1_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_vec8_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_vec4_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + + +static __inline struct brw_reg brw_vec2_grf( unsigned nr, + unsigned subnr ) +{ + return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_uw8_grf( unsigned nr, + unsigned subnr ) +{ + return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); +} + +static __inline struct brw_reg brw_null_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_NULL, + 0); +} + +static __inline struct brw_reg brw_address_reg( unsigned subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, + subnr); +} + +/* If/else instructions break in align16 mode if writemask & swizzle + * aren't xyzw. This goes against the convention for other scalar + * regs: + */ +static __inline struct brw_reg brw_ip_reg( void ) +{ + return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_IP, + 0, + BRW_REGISTER_TYPE_UD, + BRW_VERTICAL_STRIDE_4, /* ? */ + BRW_WIDTH_1, + BRW_HORIZONTAL_STRIDE_0, + BRW_SWIZZLE_XYZW, /* NOTE! */ + TGSI_WRITEMASK_XYZW); /* NOTE! */ +} + +static __inline struct brw_reg brw_acc_reg( void ) +{ + return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ACCUMULATOR, + 0); +} + + +static __inline struct brw_reg brw_flag_reg( void ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG, + 0); +} + + +static __inline struct brw_reg brw_mask_reg( unsigned subnr ) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_MASK, + subnr); +} + +static __inline struct brw_reg brw_message_reg( unsigned nr ) +{ + return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, + nr, + 0); +} + + + + +/* This is almost always called with a numeric constant argument, so + * make things easy to evaluate at compile time: + */ +static __inline unsigned cvt( unsigned val ) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + case 16: return 5; + case 32: return 6; + } + return 0; +} + +static __inline struct brw_reg stride( struct brw_reg reg, + unsigned vstride, + unsigned width, + unsigned hstride ) +{ + + reg.vstride = cvt(vstride); + reg.width = cvt(width) - 1; + reg.hstride = cvt(hstride); + return reg; +} + +static __inline struct brw_reg vec16( struct brw_reg reg ) +{ + return stride(reg, 16,16,1); +} + +static __inline struct brw_reg vec8( struct brw_reg reg ) +{ + return stride(reg, 8,8,1); +} + +static __inline struct brw_reg vec4( struct brw_reg reg ) +{ + return stride(reg, 4,4,1); +} + +static __inline struct brw_reg vec2( struct brw_reg reg ) +{ + return stride(reg, 2,2,1); +} + +static __inline struct brw_reg vec1( struct brw_reg reg ) +{ + return stride(reg, 0,1,0); +} + +static __inline struct brw_reg get_element( struct brw_reg reg, unsigned elt ) +{ + return vec1(suboffset(reg, elt)); +} + +static __inline struct brw_reg get_element_ud( struct brw_reg reg, unsigned elt ) +{ + return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); +} + + +static __inline struct brw_reg brw_swizzle( struct brw_reg reg, + unsigned x, + unsigned y, + unsigned z, + unsigned w) +{ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x), + BRW_GET_SWZ(reg.dw1.bits.swizzle, y), + BRW_GET_SWZ(reg.dw1.bits.swizzle, z), + BRW_GET_SWZ(reg.dw1.bits.swizzle, w)); + return reg; +} + + +static __inline struct brw_reg brw_swizzle1( struct brw_reg reg, + unsigned x ) +{ + return brw_swizzle(reg, x, x, x, x); +} + +static __inline struct brw_reg brw_writemask( struct brw_reg reg, + unsigned mask ) +{ + reg.dw1.bits.writemask &= mask; + return reg; +} + +static __inline struct brw_reg brw_set_writemask( struct brw_reg reg, + unsigned mask ) +{ + reg.dw1.bits.writemask = mask; + return reg; +} + +static __inline struct brw_reg negate( struct brw_reg reg ) +{ + reg.negate ^= 1; + return reg; +} + +static __inline struct brw_reg brw_abs( struct brw_reg reg ) +{ + reg.abs = 1; + return reg; +} + +/*********************************************************************** + */ +static __inline struct brw_reg brw_vec4_indirect( unsigned subnr, + int offset ) +{ + struct brw_reg reg = brw_vec4_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static __inline struct brw_reg brw_vec1_indirect( unsigned subnr, + int offset ) +{ + struct brw_reg reg = brw_vec1_grf(0, 0); + reg.subnr = subnr; + reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; + reg.dw1.bits.indirect_offset = offset; + return reg; +} + +static __inline struct brw_reg deref_4f(struct brw_indirect ptr, int offset) +{ + return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static __inline struct brw_reg deref_1f(struct brw_indirect ptr, int offset) +{ + return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); +} + +static __inline struct brw_reg deref_4b(struct brw_indirect ptr, int offset) +{ + return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); +} + +static __inline struct brw_reg deref_1uw(struct brw_indirect ptr, int offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); +} + +static __inline struct brw_reg deref_1ud(struct brw_indirect ptr, int offset) +{ + return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); +} + +static __inline struct brw_reg get_addr_reg(struct brw_indirect ptr) +{ + return brw_address_reg(ptr.addr_subnr); +} + +static __inline struct brw_indirect brw_indirect_offset( struct brw_indirect ptr, int offset ) +{ + ptr.addr_offset += offset; + return ptr; +} + +static __inline struct brw_indirect brw_indirect( unsigned addr_subnr, int offset ) +{ + struct brw_indirect ptr; + ptr.addr_subnr = addr_subnr; + ptr.addr_offset = offset; + ptr.pad = 0; + return ptr; +} + +static __inline struct brw_instruction *current_insn( struct brw_compile *p) +{ + return &p->store[p->nr_insn]; +} + +void brw_pop_insn_state( struct brw_compile *p ); +void brw_push_insn_state( struct brw_compile *p ); +void brw_set_mask_control( struct brw_compile *p, unsigned value ); +void brw_set_saturate( struct brw_compile *p, unsigned value ); +void brw_set_access_mode( struct brw_compile *p, unsigned access_mode ); +void brw_set_compression_control( struct brw_compile *p, boolean control ); +void brw_set_predicate_control_flag_value( struct brw_compile *p, unsigned value ); +void brw_set_predicate_control( struct brw_compile *p, unsigned pc ); +void brw_set_conditionalmod( struct brw_compile *p, unsigned conditional ); + +void brw_init_compile( struct brw_compile *p ); +const unsigned *brw_get_program( struct brw_compile *p, unsigned *sz ); + + +struct brw_instruction *brw_alu1( struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src ); + +struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ); + +/* Helpers for regular instructions: + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0); + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1); + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(JMPI) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + +#undef ALU1 +#undef ALU2 + + + +/* Helpers for SEND instruction: + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + boolean allocate, + boolean used, + unsigned msg_length, + unsigned response_length, + boolean eot, + boolean writes_complete, + unsigned offset, + unsigned swizzle); + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + boolean eot); + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + boolean eot); + +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned precision ); + +void brw_math( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision ); + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + unsigned scratch_offset ); + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + unsigned msg_reg_nr, + unsigned scratch_offset ); + +/* If/else/endif. Works by manipulating the execution flags on each + * channel. + */ +struct brw_instruction *brw_IF(struct brw_compile *p, + unsigned execute_size); + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn); + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *if_or_else_insn); + + +/* DO/WHILE loops: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, + unsigned execute_size); + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *patch_insn); + +struct brw_instruction *brw_BREAK(struct brw_compile *p); +struct brw_instruction *brw_CONT(struct brw_compile *p); +/* Forward jumps: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn); + + + +void brw_NOP(struct brw_compile *p); + +/* Special case: there is never a destination, execution size will be + * taken from src0: + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1); + +void brw_print_reg( struct brw_reg reg ); + + +/*********************************************************************** + * brw_eu_util.c: + */ + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + unsigned count); + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + unsigned count); + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count); + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count); + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src); + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ); +#endif diff --git a/src/gallium/drivers/i965simple/brw_eu_debug.c b/src/gallium/drivers/i965simple/brw_eu_debug.c new file mode 100644 index 0000000000..4a94ddefa6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu_debug.c @@ -0,0 +1,90 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_debug.h" + +#include "brw_eu.h" + +void brw_print_reg( struct brw_reg hwreg ) +{ + static const char *file[] = { + "arf", + "grf", + "msg", + "imm" + }; + + static const char *type[] = { + "ud", + "d", + "uw", + "w", + "ub", + "vf", + "hf", + "f" + }; + + debug_printf("%s%s", + hwreg.abs ? "abs/" : "", + hwreg.negate ? "-" : ""); + + if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.nr % 2 == 0 && + hwreg.subnr == 0 && + hwreg.vstride == BRW_VERTICAL_STRIDE_8 && + hwreg.width == BRW_WIDTH_8 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_1 && + hwreg.type == BRW_REGISTER_TYPE_F) { + debug_printf("vec%d", hwreg.nr); + } + else if (hwreg.file == BRW_GENERAL_REGISTER_FILE && + hwreg.vstride == BRW_VERTICAL_STRIDE_0 && + hwreg.width == BRW_WIDTH_1 && + hwreg.hstride == BRW_HORIZONTAL_STRIDE_0 && + hwreg.type == BRW_REGISTER_TYPE_F) { + debug_printf("scl%d.%d", hwreg.nr, hwreg.subnr / 4); + } + else { + debug_printf("%s%d.%d<%d;%d,%d>:%s", + file[hwreg.file], + hwreg.nr, + hwreg.subnr / type_sz(hwreg.type), + hwreg.vstride ? (1<<(hwreg.vstride-1)) : 0, + 1<<hwreg.width, + hwreg.hstride ? (1<<(hwreg.hstride-1)) : 0, + type[hwreg.type]); + } +} + + + diff --git a/src/gallium/drivers/i965simple/brw_eu_emit.c b/src/gallium/drivers/i965simple/brw_eu_emit.c new file mode 100644 index 0000000000..400a80b6fb --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu_emit.c @@ -0,0 +1,1080 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + + + +/*********************************************************************** + * Internal helper for constructing instructions + */ + +static void guess_execution_size( struct brw_instruction *insn, + struct brw_reg reg ) +{ + if (reg.width == BRW_WIDTH_8 && + insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + insn->header.execution_size = BRW_EXECUTE_16; + else + insn->header.execution_size = reg.width; /* note - definitions are compatible */ +} + + +static void brw_set_dest( struct brw_instruction *insn, + struct brw_reg dest ) +{ + insn->bits1.da1.dest_reg_file = dest.file; + insn->bits1.da1.dest_reg_type = dest.type; + insn->bits1.da1.dest_address_mode = dest.address_mode; + + if (dest.address_mode == BRW_ADDRESS_DIRECT) { + insn->bits1.da1.dest_reg_nr = dest.nr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.da1.dest_subreg_nr = dest.subnr; + insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + } + else { + insn->bits1.da16.dest_subreg_nr = dest.subnr / 16; + insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask; + } + } + else { + insn->bits1.ia1.dest_subreg_nr = dest.subnr; + + /* These are different sizes in align1 vs align16: + */ + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset; + insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1; + } + else { + insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset; + } + } + + /* NEW: Set the execution size based on dest.width and + * insn->compression_control: + */ + guess_execution_size(insn, dest); +} + +static void brw_set_src0( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + insn->bits1.da1.src0_reg_file = reg.file; + insn->bits1.da1.src0_reg_type = reg.type; + insn->bits2.da1.src0_abs = reg.abs; + insn->bits2.da1.src0_negate = reg.negate; + insn->bits2.da1.src0_address_mode = reg.address_mode; + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + + /* Required to set some fields in src1 as well: + */ + insn->bits1.da1.src1_reg_file = 0; /* arf */ + insn->bits1.da1.src1_reg_type = reg.type; + } + else + { + if (reg.address_mode == BRW_ADDRESS_DIRECT) { + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.da1.src0_subreg_nr = reg.subnr; + insn->bits2.da1.src0_reg_nr = reg.nr; + } + else { + insn->bits2.da16.src0_subreg_nr = reg.subnr / 16; + insn->bits2.da16.src0_reg_nr = reg.nr; + } + } + else { + insn->bits2.ia1.src0_subreg_nr = reg.subnr; + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset; + } + else { + insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset; + } + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits2.da1.src0_width = BRW_WIDTH_1; + insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits2.da1.src0_horiz_stride = reg.hstride; + insn->bits2.da1.src0_width = reg.width; + insn->bits2.da1.src0_vert_stride = reg.vstride; + } + } + else { + insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits2.da16.src0_vert_stride = reg.vstride; + } + } +} + + +void brw_set_src1( struct brw_instruction *insn, + struct brw_reg reg ) +{ + assert(reg.file != BRW_MESSAGE_REGISTER_FILE); + + insn->bits1.da1.src1_reg_file = reg.file; + insn->bits1.da1.src1_reg_type = reg.type; + insn->bits3.da1.src1_abs = reg.abs; + insn->bits3.da1.src1_negate = reg.negate; + + /* Only src1 can be immediate in two-argument instructions. + */ + assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE); + + if (reg.file == BRW_IMMEDIATE_VALUE) { + insn->bits3.ud = reg.dw1.ud; + } + else { + /* This is a hardware restriction, which may or may not be lifted + * in the future: + */ + assert (reg.address_mode == BRW_ADDRESS_DIRECT); + //assert (reg.file == BRW_GENERAL_REGISTER_FILE); + + if (insn->header.access_mode == BRW_ALIGN_1) { + insn->bits3.da1.src1_subreg_nr = reg.subnr; + insn->bits3.da1.src1_reg_nr = reg.nr; + } + else { + insn->bits3.da16.src1_subreg_nr = reg.subnr / 16; + insn->bits3.da16.src1_reg_nr = reg.nr; + } + + if (insn->header.access_mode == BRW_ALIGN_1) { + if (reg.width == BRW_WIDTH_1 && + insn->header.execution_size == BRW_EXECUTE_1) { + insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0; + insn->bits3.da1.src1_width = BRW_WIDTH_1; + insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0; + } + else { + insn->bits3.da1.src1_horiz_stride = reg.hstride; + insn->bits3.da1.src1_width = reg.width; + insn->bits3.da1.src1_vert_stride = reg.vstride; + } + } + else { + insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X); + insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y); + insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z); + insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W); + + /* This is an oddity of the fact we're using the same + * descriptions for registers in align_16 as align_1: + */ + if (reg.vstride == BRW_VERTICAL_STRIDE_8) + insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4; + else + insn->bits3.da16.src1_vert_stride = reg.vstride; + } + } +} + + + +static void brw_set_math_message( struct brw_instruction *insn, + unsigned msg_length, + unsigned response_length, + unsigned function, + unsigned integer_type, + boolean low_precision, + boolean saturate, + unsigned dataType ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.math.function = function; + insn->bits3.math.int_type = integer_type; + insn->bits3.math.precision = low_precision; + insn->bits3.math.saturate = saturate; + insn->bits3.math.data_type = dataType; + insn->bits3.math.response_length = response_length; + insn->bits3.math.msg_length = msg_length; + insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH; + insn->bits3.math.end_of_thread = 0; +} + +static void brw_set_urb_message( struct brw_instruction *insn, + boolean allocate, + boolean used, + unsigned msg_length, + unsigned response_length, + boolean end_of_thread, + boolean complete, + unsigned offset, + unsigned swizzle_control ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.urb.opcode = 0; /* ? */ + insn->bits3.urb.offset = offset; + insn->bits3.urb.swizzle_control = swizzle_control; + insn->bits3.urb.allocate = allocate; + insn->bits3.urb.used = used; /* ? */ + insn->bits3.urb.complete = complete; + insn->bits3.urb.response_length = response_length; + insn->bits3.urb.msg_length = msg_length; + insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB; + insn->bits3.urb.end_of_thread = end_of_thread; +} + +static void brw_set_dp_write_message( struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned msg_length, + unsigned pixel_scoreboard_clear, + unsigned response_length, + unsigned end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.dp_write.binding_table_index = binding_table_index; + insn->bits3.dp_write.msg_control = msg_control; + insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.dp_write.msg_type = msg_type; + insn->bits3.dp_write.send_commit_msg = 0; + insn->bits3.dp_write.response_length = response_length; + insn->bits3.dp_write.msg_length = msg_length; + insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE; + insn->bits3.urb.end_of_thread = end_of_thread; +} + +static void brw_set_dp_read_message( struct brw_instruction *insn, + unsigned binding_table_index, + unsigned msg_control, + unsigned msg_type, + unsigned target_cache, + unsigned msg_length, + unsigned response_length, + unsigned end_of_thread ) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.dp_read.binding_table_index = binding_table_index; + insn->bits3.dp_read.msg_control = msg_control; + insn->bits3.dp_read.msg_type = msg_type; + insn->bits3.dp_read.target_cache = target_cache; + insn->bits3.dp_read.response_length = response_length; + insn->bits3.dp_read.msg_length = msg_length; + insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; + insn->bits3.dp_read.end_of_thread = end_of_thread; +} + +static void brw_set_sampler_message( struct brw_instruction *insn, + unsigned binding_table_index, + unsigned sampler, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + boolean eot) +{ + brw_set_src1(insn, brw_imm_d(0)); + + insn->bits3.sampler.binding_table_index = binding_table_index; + insn->bits3.sampler.sampler = sampler; + insn->bits3.sampler.msg_type = msg_type; + insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; + insn->bits3.sampler.response_length = response_length; + insn->bits3.sampler.msg_length = msg_length; + insn->bits3.sampler.end_of_thread = eot; + insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER; +} + + + +static struct brw_instruction *next_insn( struct brw_compile *p, + unsigned opcode ) +{ + struct brw_instruction *insn; + + assert(p->nr_insn + 1 < BRW_EU_MAX_INSN); + + insn = &p->store[p->nr_insn++]; + memcpy(insn, p->current, sizeof(*insn)); + + /* Reset this one-shot flag: + */ + + if (p->current->header.destreg__conditonalmod) { + p->current->header.destreg__conditonalmod = 0; + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + } + + insn->header.opcode = opcode; + return insn; +} + + +struct brw_instruction *brw_alu1( struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + return insn; +} + +struct brw_instruction *brw_alu2(struct brw_compile *p, + unsigned opcode, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1 ) +{ + struct brw_instruction *insn = next_insn(p, opcode); + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + return insn; +} + + +/*********************************************************************** + * Convenience routines. + */ +#define ALU1(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0) \ +{ \ + return brw_alu1(p, BRW_OPCODE_##OP, dest, src0); \ +} + +#define ALU2(OP) \ +struct brw_instruction *brw_##OP(struct brw_compile *p, \ + struct brw_reg dest, \ + struct brw_reg src0, \ + struct brw_reg src1) \ +{ \ + return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1); \ +} + + +ALU1(MOV) +ALU2(SEL) +ALU1(NOT) +ALU2(AND) +ALU2(OR) +ALU2(XOR) +ALU2(SHR) +ALU2(SHL) +ALU2(RSR) +ALU2(RSL) +ALU2(ASR) +ALU2(ADD) +ALU2(MUL) +ALU1(FRC) +ALU1(RNDD) +ALU2(MAC) +ALU2(MACH) +ALU1(LZD) +ALU2(DP4) +ALU2(DPH) +ALU2(DP3) +ALU2(DP2) +ALU2(LINE) + + + + +void brw_NOP(struct brw_compile *p) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_ud(0x0)); +} + + + + + +/*********************************************************************** + * Comparisons, if/else/endif + */ + +struct brw_instruction *brw_JMPI(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1); + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + +/* EU takes the value from the flag register and pushes it onto some + * sort of a stack (presumably merging with any flag value already on + * the stack). Within an if block, the flags at the top of the stack + * control execution on each channel of the unit, eg. on each of the + * 16 pixel values in our wm programs. + * + * When the matching 'else' instruction is reached (presumably by + * countdown of the instruction count patched in by our ELSE/ENDIF + * functions), the relevent flags are inverted. + * + * When the matching 'endif' instruction is reached, the flags are + * popped off. If the stack is now empty, normal execution resumes. + * + * No attempt is made to deal with stack overflow (14 elements?). + */ +struct brw_instruction *brw_IF(struct brw_compile *p, unsigned execute_size) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + assert(execute_size == BRW_EXECUTE_1); + + insn = next_insn(p, BRW_OPCODE_ADD); + insn->header.predicate_inverse = 1; + } else { + insn = next_insn(p, BRW_OPCODE_IF); + } + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.execution_size = execute_size; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.predicate_control = BRW_PREDICATE_NORMAL; + insn->header.mask_control = BRW_MASK_ENABLE; + + p->current->header.predicate_control = BRW_PREDICATE_NONE; + + return insn; +} + + +struct brw_instruction *brw_ELSE(struct brw_compile *p, + struct brw_instruction *if_insn) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); + } else { + insn = next_insn(p, BRW_OPCODE_ELSE); + } + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = if_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + + /* Patch the if instruction to point at this instruction. + */ + if (p->single_program_flow) { + assert(if_insn->header.opcode == BRW_OPCODE_ADD); + + if_insn->bits3.ud = (insn - if_insn + 1) * 16; + } else { + assert(if_insn->header.opcode == BRW_OPCODE_IF); + + if_insn->bits3.if_else.jump_count = insn - if_insn; + if_insn->bits3.if_else.pop_count = 1; + if_insn->bits3.if_else.pad0 = 0; + } + + return insn; +} + +void brw_ENDIF(struct brw_compile *p, + struct brw_instruction *patch_insn) +{ + if (p->single_program_flow) { + /* In single program flow mode, there's no need to execute an ENDIF, + * since we don't need to do any stack operations, and if we're executing + * currently, we want to just continue executing. + */ + struct brw_instruction *next = &p->store[p->nr_insn]; + + assert(patch_insn->header.opcode == BRW_OPCODE_ADD); + + patch_insn->bits3.ud = (next - patch_insn) * 16; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); + + brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = patch_insn->header.execution_size; + insn->header.mask_control = BRW_MASK_ENABLE; + + assert(patch_insn->bits3.if_else.jump_count == 0); + + /* Patch the if or else instructions to point at this or the next + * instruction respectively. + */ + if (patch_insn->header.opcode == BRW_OPCODE_IF) { + /* Automagically turn it into an IFF: + */ + patch_insn->header.opcode = BRW_OPCODE_IFF; + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 0; + patch_insn->bits3.if_else.pad0 = 0; + } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) { + patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1; + patch_insn->bits3.if_else.pop_count = 1; + patch_insn->bits3.if_else.pad0 = 0; + } else { + assert(0); + } + + /* Also pop item off the stack in the endif instruction: + */ + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } +} + +struct brw_instruction *brw_BREAK(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + insn->header.mask_control = BRW_MASK_DISABLE; + insn->bits3.if_else.pad0 = 0; + return insn; +} + +struct brw_instruction *brw_CONT(struct brw_compile *p) +{ + struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + insn->header.mask_control = BRW_MASK_DISABLE; + insn->bits3.if_else.pad0 = 0; + return insn; +} + +/* DO/WHILE loop: + */ +struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size) +{ + if (p->single_program_flow) { + return &p->store[p->nr_insn]; + } else { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); + + /* Override the defaults for this instruction: + */ + brw_set_dest(insn, brw_null_reg()); + brw_set_src0(insn, brw_null_reg()); + brw_set_src1(insn, brw_null_reg()); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = execute_size; + insn->header.predicate_control = BRW_PREDICATE_NONE; + /* insn->header.mask_control = BRW_MASK_ENABLE; */ + insn->header.mask_control = BRW_MASK_DISABLE; + + return insn; + } +} + + + +struct brw_instruction *brw_WHILE(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + + if (p->single_program_flow) + insn = next_insn(p, BRW_OPCODE_ADD); + else + insn = next_insn(p, BRW_OPCODE_WHILE); + + brw_set_dest(insn, brw_ip_reg()); + brw_set_src0(insn, brw_ip_reg()); + brw_set_src1(insn, brw_imm_d(0x0)); + + insn->header.compression_control = BRW_COMPRESSION_NONE; + + if (p->single_program_flow) { + insn->header.execution_size = BRW_EXECUTE_1; + + insn->bits3.d = (do_insn - insn) * 16; + } else { + insn->header.execution_size = do_insn->header.execution_size; + + assert(do_insn->header.opcode == BRW_OPCODE_DO); + insn->bits3.if_else.jump_count = do_insn - insn; + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + +/* insn->header.mask_control = BRW_MASK_ENABLE; */ + + insn->header.mask_control = BRW_MASK_DISABLE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; + return insn; +} + + +/* FORWARD JUMPS: + */ +void brw_land_fwd_jump(struct brw_compile *p, + struct brw_instruction *jmp_insn) +{ + struct brw_instruction *landing = &p->store[p->nr_insn]; + + assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI); + assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE); + + jmp_insn->bits3.ud = (landing - jmp_insn) - 1; +} + + + +/* To integrate with the above, it makes sense that the comparison + * instruction should populate the flag register. It might be simpler + * just to use the flag reg for most WM tasks? + */ +void brw_CMP(struct brw_compile *p, + struct brw_reg dest, + unsigned conditional, + struct brw_reg src0, + struct brw_reg src1) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); + + insn->header.destreg__conditonalmod = conditional; + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, src1); + +/* guess_execution_size(insn, src0); */ + + + /* Make it so that future instructions will use the computed flag + * value until brw_set_predicate_control_flag_value() is called + * again. + */ + if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE && + dest.nr == 0) { + p->current->header.predicate_control = BRW_PREDICATE_NORMAL; + p->flag_value = 0xff; + } +} + + + +/*********************************************************************** + * Helpers for the various SEND message types: + */ + +/* Invert 8 values + */ +void brw_math( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned data_type, + unsigned precision ) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* Example code doesn't set predicate_control for send + * instructions. + */ + insn->header.predicate_control = 0; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + data_type); +} + +/* Use 2 send instructions to invert 16 elements + */ +void brw_math_16( struct brw_compile *p, + struct brw_reg dest, + unsigned function, + unsigned saturate, + unsigned msg_reg_nr, + struct brw_reg src, + unsigned precision ) +{ + struct brw_instruction *insn; + unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; + unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1; + + /* First instruction: + */ + brw_push_insn_state(p); + brw_set_predicate_control_flag_value(p, 0xff); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + /* Second instruction: + */ + insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.compression_control = BRW_COMPRESSION_2NDHALF; + insn->header.destreg__conditonalmod = msg_reg_nr+1; + + brw_set_dest(insn, offset(dest,1)); + brw_set_src0(insn, src); + brw_set_math_message(insn, + msg_length, response_length, + function, + BRW_MATH_INTEGER_UNSIGNED, + precision, + saturate, + BRW_MATH_DATA_VECTOR); + + brw_pop_insn_state(p); +} + + + + +void brw_dp_WRITE_16( struct brw_compile *p, + struct brw_reg src, + unsigned msg_reg_nr, + unsigned scratch_offset ) +{ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + unsigned msg_length = 3; + struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src); + + brw_set_dp_write_message(insn, + 255, /* bti */ + BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ + msg_length, + 0, /* pixel scoreboard */ + 0, /* response_length */ + 0); /* eot */ + } + +} + + +void brw_dp_READ_16( struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + unsigned scratch_offset ) +{ + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D), + brw_imm_d(scratch_offset)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); /* UW? */ + brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + + brw_set_dp_read_message(insn, + 255, /* bti */ + 3, /* msg_control */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 1, /* target cache */ + 1, /* msg_length */ + 2, /* response_length */ + 0); /* eot */ + } +} + + +void brw_fb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned msg_length, + unsigned response_length, + boolean eot) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_dp_write_message(insn, + binding_table_index, + BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */ + BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */ + msg_length, + 1, /* pixel scoreboard */ + response_length, + eot); +} + + + +void brw_SAMPLE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + unsigned binding_table_index, + unsigned sampler, + unsigned writemask, + unsigned msg_type, + unsigned response_length, + unsigned msg_length, + boolean eot) +{ + boolean need_stall = 0; + + if(writemask == 0) { +/* debug_printf("%s: zero writemask??\n", __FUNCTION__); */ + return; + } + + /* Hardware doesn't do destination dependency checking on send + * instructions properly. Add a workaround which generates the + * dependency by other means. In practice it seems like this bug + * only crops up for texture samples, and only where registers are + * written by the send and then written again later without being + * read in between. Luckily for us, we already track that + * information and use it to modify the writemask for the + * instruction, so that is a guide for whether a workaround is + * needed. + */ + if (writemask != TGSI_WRITEMASK_XYZW) { + unsigned dst_offset = 0; + unsigned i, newmask = 0, len = 0; + + for (i = 0; i < 4; i++) { + if (writemask & (1<<i)) + break; + dst_offset += 2; + } + for (; i < 4; i++) { + if (!(writemask & (1<<i))) + break; + newmask |= 1<<i; + len++; + } + + if (newmask != writemask) { + need_stall = 1; +/* debug_printf("need stall %x %x\n", newmask , writemask); */ + } + else { + struct brw_reg m1 = brw_message_reg(msg_reg_nr); + + newmask = ~newmask & TGSI_WRITEMASK_XYZW; + + brw_push_insn_state(p); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + brw_MOV(p, m1, brw_vec8_grf(0,0)); + brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); + + brw_pop_insn_state(p); + + src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW); + dest = offset(dest, dst_offset); + response_length = len * 2; + } + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = 0; /* XXX */ + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_sampler_message(insn, + binding_table_index, + sampler, + msg_type, + response_length, + msg_length, + eot); + } + + if (need_stall) + { + struct brw_reg reg = vec8(offset(dest, response_length-1)); + + /* mov (8) r9.0<1>:f r9.0<8;8,1>:f { Align1 } + */ + brw_push_insn_state(p); + brw_set_compression_control(p, FALSE); + brw_MOV(p, reg, reg); + brw_pop_insn_state(p); + } + +} + +/* All these variables are pretty confusing - we might be better off + * using bitmasks and macros for this, in the old style. Or perhaps + * just having the caller instantiate the fields in dword3 itself. + */ +void brw_urb_WRITE(struct brw_compile *p, + struct brw_reg dest, + unsigned msg_reg_nr, + struct brw_reg src0, + boolean allocate, + boolean used, + unsigned msg_length, + unsigned response_length, + boolean eot, + boolean writes_complete, + unsigned offset, + unsigned swizzle) +{ + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + assert(msg_length < 16); + + brw_set_dest(insn, dest); + brw_set_src0(insn, src0); + brw_set_src1(insn, brw_imm_d(0)); + + insn->header.destreg__conditonalmod = msg_reg_nr; + + brw_set_urb_message(insn, + allocate, + used, + msg_length, + response_length, + eot, + writes_complete, + offset, + swizzle); +} + diff --git a/src/gallium/drivers/i965simple/brw_eu_util.c b/src/gallium/drivers/i965simple/brw_eu_util.c new file mode 100644 index 0000000000..3a65b141f0 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_eu_util.c @@ -0,0 +1,126 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_eu.h" + + +void brw_math_invert( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + brw_math( p, + dst, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + src, + BRW_MATH_PRECISION_FULL, + BRW_MATH_DATA_VECTOR ); +} + + + +void brw_copy4(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count) +{ + unsigned i; + + dst = vec4(dst); + src = vec4(src); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + brw_MOV(p, byte_offset(dst, delta+16), byte_offset(src, delta+16)); + } +} + + +void brw_copy8(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src, + unsigned count) +{ + unsigned i; + + dst = vec8(dst); + src = vec8(src); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), byte_offset(src, delta)); + } +} + + +void brw_copy_indirect_to_indirect(struct brw_compile *p, + struct brw_indirect dst_ptr, + struct brw_indirect src_ptr, + unsigned count) +{ + unsigned i; + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, deref_4f(dst_ptr, delta), deref_4f(src_ptr, delta)); + brw_MOV(p, deref_4f(dst_ptr, delta+16), deref_4f(src_ptr, delta+16)); + } +} + + +void brw_copy_from_indirect(struct brw_compile *p, + struct brw_reg dst, + struct brw_indirect ptr, + unsigned count) +{ + unsigned i; + + dst = vec4(dst); + + for (i = 0; i < count; i++) + { + unsigned delta = i*32; + brw_MOV(p, byte_offset(dst, delta), deref_4f(ptr, delta)); + brw_MOV(p, byte_offset(dst, delta+16), deref_4f(ptr, delta+16)); + } +} + + + + diff --git a/src/gallium/drivers/i965simple/brw_flush.c b/src/gallium/drivers/i965simple/brw_flush.c new file mode 100644 index 0000000000..e6001c30d9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_flush.c @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_defines.h" +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_batch.h" + + +static void brw_flush( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence ) +{ + struct brw_context *brw = brw_context(pipe); + + /* Do we need to emit an MI_FLUSH command to flush the hardware + * caches? + */ + if (flags & (PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE)) { + struct brw_mi_flush flush; + + memset(&flush, 0, sizeof(flush)); + flush.opcode = CMD_MI_FLUSH; + + if (!(flags & PIPE_FLUSH_RENDER_CACHE)) + flush.flags |= BRW_INHIBIT_FLUSH_RENDER_CACHE; + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) + flush.flags |= BRW_FLUSH_READ_CACHE; + + BRW_BATCH_STRUCT(brw, &flush); + } + + /* If there are no flags, just flush pending commands to hardware: + */ + FLUSH_BATCH( fence ); +} + + + +void brw_init_flush_functions( struct brw_context *brw ) +{ + brw->pipe.flush = brw_flush; +} diff --git a/src/gallium/drivers/i965simple/brw_gs.c b/src/gallium/drivers/i965simple/brw_gs.c new file mode 100644 index 0000000000..de60868ccc --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs.c @@ -0,0 +1,196 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_state.h" +#include "brw_gs.h" + + + +static void compile_gs_prog( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + struct brw_gs_compile c; + const unsigned *program; + unsigned program_size; + + memset(&c, 0, sizeof(c)); + + c.key = *key; + + /* Need to locate the two positions present in vertex + header. + * These are currently hardcoded: + */ + c.nr_attrs = brw_count_bits(c.key.attrs); + c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */ + c.nr_bytes = c.nr_regs * REG_SIZE; + + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + c.func.single_program_flow = 1; + + /* For some reason the thread is spawned with only 4 channels + * unmasked. + */ + brw_set_mask_control(&c.func, BRW_MASK_DISABLE); + + + /* Note that primitives which don't require a GS program have + * already been weeded out by this stage: + */ + switch (key->primitive) { + case PIPE_PRIM_QUADS: + brw_gs_quads( &c ); + break; + case PIPE_PRIM_QUAD_STRIP: + brw_gs_quad_strip( &c ); + break; + case PIPE_PRIM_LINE_LOOP: + brw_gs_lines( &c ); + break; + case PIPE_PRIM_LINES: + if (key->hint_gs_always) + brw_gs_lines( &c ); + else { + return; + } + break; + case PIPE_PRIM_TRIANGLES: + if (key->hint_gs_always) + brw_gs_tris( &c ); + else { + return; + } + break; + case PIPE_PRIM_POINTS: + if (key->hint_gs_always) + brw_gs_points( &c ); + else { + return; + } + break; + default: + return; + } + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->gs.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_GS_PROG], + key, sizeof(*key), + &brw->gs.prog_data, + &brw->gs.prog_gs_offset); +} + + +static const int gs_prim[PIPE_PRIM_POLYGON+1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINE_LOOP, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_QUADS, + PIPE_PRIM_QUAD_STRIP, + PIPE_PRIM_TRIANGLES +}; + +static void populate_key( struct brw_context *brw, + struct brw_gs_prog_key *key ) +{ + memset(key, 0, sizeof(*key)); + + /* CACHE_NEW_VS_PROG */ + key->attrs = brw->vs.prog_data->outputs_written; + + /* BRW_NEW_PRIMITIVE */ + key->primitive = gs_prim[brw->primitive]; + + key->hint_gs_always = 0; /* debug code? */ + + key->need_gs_prog = (key->hint_gs_always || + brw->primitive == PIPE_PRIM_QUADS || + brw->primitive == PIPE_PRIM_QUAD_STRIP || + brw->primitive == PIPE_PRIM_LINE_LOOP); +} + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_gs_prog( struct brw_context *brw ) +{ + struct brw_gs_prog_key key; + + /* Populate the key: + */ + populate_key(brw, &key); + + if (brw->gs.prog_active != key.need_gs_prog) { + brw->state.dirty.cache |= CACHE_NEW_GS_PROG; + brw->gs.prog_active = key.need_gs_prog; + } + + if (brw->gs.prog_active) { + if (!search_cache(brw, &key)) + compile_gs_prog( brw, &key ); + } +} + + +const struct brw_tracked_state brw_gs_prog = { + .dirty = { + .brw = BRW_NEW_PRIMITIVE, + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_gs_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_gs.h b/src/gallium/drivers/i965simple/brw_gs.h new file mode 100644 index 0000000000..f09141c6aa --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs.h @@ -0,0 +1,75 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_GS_H +#define BRW_GS_H + + +#include "brw_context.h" +#include "brw_eu.h" + +#define MAX_GS_VERTS (4) + +struct brw_gs_prog_key { + unsigned attrs:32; + unsigned primitive:4; + unsigned hint_gs_always:1; + unsigned need_gs_prog:1; + unsigned pad:26; +}; + +struct brw_gs_compile { + struct brw_compile func; + struct brw_gs_prog_key key; + struct brw_gs_prog_data prog_data; + + struct { + struct brw_reg R0; + struct brw_reg vertex[MAX_GS_VERTS]; + } reg; + + /* 3 different ways of expressing vertex size: + */ + unsigned nr_attrs; + unsigned nr_regs; + unsigned nr_bytes; +}; + +#define ATTR_SIZE (4*4) + +void brw_gs_quads( struct brw_gs_compile *c ); +void brw_gs_quad_strip( struct brw_gs_compile *c ); +void brw_gs_tris( struct brw_gs_compile *c ); +void brw_gs_lines( struct brw_gs_compile *c ); +void brw_gs_points( struct brw_gs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_gs_emit.c b/src/gallium/drivers/i965simple/brw_gs_emit.c new file mode 100644 index 0000000000..c3cc90b10f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs_emit.c @@ -0,0 +1,148 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_gs.h" + +static void brw_gs_alloc_regs( struct brw_gs_compile *c, + unsigned nr_verts ) +{ + unsigned i = 0,j; + + /* Register usage is static, precompute here: + */ + c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; + + /* Payload vertices plus space for more generated vertices: + */ + for (j = 0; j < nr_verts; j++) { + c->reg.vertex[j] = brw_vec4_grf(i, 0); + i += c->nr_regs; + } + + c->prog_data.urb_read_length = c->nr_regs; + c->prog_data.total_grf = i; +} + + +static void brw_gs_emit_vue(struct brw_gs_compile *c, + struct brw_reg vert, + boolean last, + unsigned header) +{ + struct brw_compile *p = &c->func; + boolean allocate = !last; + + /* Overwrite PrimType and PrimStart in the message header, for + * each vertex in turn: + */ + brw_MOV(p, get_element_ud(c->reg.R0, 2), brw_imm_ud(header)); + + /* Copy the vertex from vertn into m1..mN+1: + */ + brw_copy8(p, brw_message_reg(1), vert, c->nr_regs); + + /* Send each vertex as a seperate write to the urb. This is + * different to the concept in brw_sf_emit.c, where subsequent + * writes are used to build up a single urb entry. Each of these + * writes instantiates a seperate urb entry, and a new one must be + * allocated each time. + */ + brw_urb_WRITE(p, + allocate ? c->reg.R0 : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), + 0, + c->reg.R0, + allocate, + 1, /* used */ + c->nr_regs + 1, /* msg length */ + allocate ? 1 : 0, /* response length */ + allocate ? 0 : 1, /* eot */ + 1, /* writes_complete */ + 0, /* urb offset */ + BRW_URB_SWIZZLE_NONE); +} + + + +void brw_gs_quads( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + /* Use polygons for correct edgeflag behaviour. Note that vertex 3 + * is the PV for quads, but vertex 0 for polygons: + */ + brw_gs_emit_vue(c, c->reg.vertex[3], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_quad_strip( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 4); + + brw_gs_emit_vue(c, c->reg.vertex[2], 0, ((_3DPRIM_POLYGON << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[3], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, (_3DPRIM_POLYGON << 2)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_POLYGON << 2) | R02_PRIM_END)); +} + +void brw_gs_tris( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 3); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_TRILIST << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 0, (_3DPRIM_TRILIST << 2)); + brw_gs_emit_vue(c, c->reg.vertex[2], 1, ((_3DPRIM_TRILIST << 2) | R02_PRIM_END)); +} + +void brw_gs_lines( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 2); + brw_gs_emit_vue(c, c->reg.vertex[0], 0, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_START)); + brw_gs_emit_vue(c, c->reg.vertex[1], 1, ((_3DPRIM_LINESTRIP << 2) | R02_PRIM_END)); +} + +void brw_gs_points( struct brw_gs_compile *c ) +{ + brw_gs_alloc_regs(c, 1); + brw_gs_emit_vue(c, c->reg.vertex[0], 1, ((_3DPRIM_POINTLIST << 2) | R02_PRIM_START | R02_PRIM_END)); +} + + + + + + + + diff --git a/src/gallium/drivers/i965simple/brw_gs_state.c b/src/gallium/drivers/i965simple/brw_gs_state.c new file mode 100644 index 0000000000..5b8016b2e9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_gs_state.c @@ -0,0 +1,90 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + + +static void upload_gs_unit( struct brw_context *brw ) +{ + struct brw_gs_unit_state gs; + + memset(&gs, 0, sizeof(gs)); + + /* CACHE_NEW_GS_PROG */ + if (brw->gs.prog_active) { + gs.thread0.grf_reg_count = + align(brw->gs.prog_data->total_grf, 16) / 16 - 1; + gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; + gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; + } + else { + gs.thread0.grf_reg_count = 0; + gs.thread0.kernel_start_pointer = 0; + gs.thread3.urb_entry_read_length = 1; + } + + /* BRW_NEW_URB_FENCE */ + gs.thread4.nr_urb_entries = brw->urb.nr_gs_entries; + gs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + + gs.thread4.max_threads = 0; /* Hardware requirement */ + + if (BRW_DEBUG & DEBUG_STATS) + gs.thread4.stats_enable = 1; + + /* CONSTANT */ + gs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + gs.thread1.single_program_flow = 1; + gs.thread3.dispatch_grf_start_reg = 1; + gs.thread3.const_urb_entry_read_offset = 0; + gs.thread3.const_urb_entry_read_length = 0; + gs.thread3.urb_entry_read_offset = 0; + + + brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs ); +} + + +const struct brw_tracked_state brw_gs_unit = { + .dirty = { + .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_GS_PROG + }, + .update = upload_gs_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_misc_state.c b/src/gallium/drivers/i965simple/brw_misc_state.c new file mode 100644 index 0000000000..be812c5da9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_misc_state.c @@ -0,0 +1,488 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_batch.h" +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + + + + + +/*********************************************************************** + * Blend color + */ + +static void upload_blend_constant_color(struct brw_context *brw) +{ + struct brw_blend_constant_color bcc; + + memset(&bcc, 0, sizeof(bcc)); + bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; + bcc.header.length = sizeof(bcc)/4-2; + bcc.blend_constant_color[0] = brw->attribs.BlendColor.color[0]; + bcc.blend_constant_color[1] = brw->attribs.BlendColor.color[1]; + bcc.blend_constant_color[2] = brw->attribs.BlendColor.color[2]; + bcc.blend_constant_color[3] = brw->attribs.BlendColor.color[3]; + + BRW_CACHED_BATCH_STRUCT(brw, &bcc); +} + + +const struct brw_tracked_state brw_blend_constant_color = { + .dirty = { + .brw = BRW_NEW_BLEND, + .cache = 0 + }, + .update = upload_blend_constant_color +}; + + +/*********************************************************************** + * Drawing rectangle + */ +static void upload_drawing_rect(struct brw_context *brw) +{ + struct brw_drawrect bdr; + + memset(&bdr, 0, sizeof(bdr)); + bdr.header.opcode = CMD_DRAW_RECT; + bdr.header.length = sizeof(bdr)/4 - 2; + bdr.xmin = 0; + bdr.ymin = 0; + bdr.xmax = brw->attribs.FrameBuffer.cbufs[0]->width; + bdr.ymax = brw->attribs.FrameBuffer.cbufs[0]->height; + bdr.xorg = 0; + bdr.yorg = 0; + + /* Can't use BRW_CACHED_BATCH_STRUCT because this is also emitted + * uncached in brw_draw.c: + */ + BRW_BATCH_STRUCT(brw, &bdr); +} + +const struct brw_tracked_state brw_drawing_rect = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_drawing_rect +}; + +/** + * Upload the binding table pointers, which point each stage's array of surface + * state pointers. + * + * The binding table pointers are relative to the surface state base address, + * which is the BRW_SS_POOL cache buffer. + */ +static void upload_binding_table_pointers(struct brw_context *brw) +{ + struct brw_binding_table_pointers btp; + memset(&btp, 0, sizeof(btp)); + + btp.header.opcode = CMD_BINDING_TABLE_PTRS; + btp.header.length = sizeof(btp)/4 - 2; + btp.vs = 0; + btp.gs = 0; + btp.clp = 0; + btp.sf = 0; + btp.wm = brw->wm.bind_ss_offset; + + BRW_CACHED_BATCH_STRUCT(brw, &btp); +} + +const struct brw_tracked_state brw_binding_table_pointers = { + .dirty = { + .brw = 0, + .cache = CACHE_NEW_SURF_BIND + }, + .update = upload_binding_table_pointers, +}; + + +/** + * Upload pointers to the per-stage state. + * + * The state pointers in this packet are all relative to the general state + * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer. + */ +static void upload_pipelined_state_pointers(struct brw_context *brw ) +{ + struct brw_pipelined_state_pointers psp; + memset(&psp, 0, sizeof(psp)); + + psp.header.opcode = CMD_PIPELINED_STATE_POINTERS; + psp.header.length = sizeof(psp)/4 - 2; + + psp.vs.offset = brw->vs.state_gs_offset >> 5; + psp.sf.offset = brw->sf.state_gs_offset >> 5; + psp.wm.offset = brw->wm.state_gs_offset >> 5; + psp.cc.offset = brw->cc.state_gs_offset >> 5; + + /* GS gets turned on and off regularly. Need to re-emit URB fence + * after this occurs. + */ + if (brw->gs.prog_active) { + psp.gs.offset = brw->gs.state_gs_offset >> 5; + psp.gs.enable = 1; + } + + if (0) { + psp.clp.offset = brw->clip.state_gs_offset >> 5; + psp.clp.enable = 1; + } + + + if (BRW_CACHED_BATCH_STRUCT(brw, &psp)) + brw->state.dirty.brw |= BRW_NEW_PSP; +} + +const struct brw_tracked_state brw_pipelined_state_pointers = { + .dirty = { + .brw = 0, + .cache = (CACHE_NEW_VS_UNIT | + CACHE_NEW_GS_UNIT | + CACHE_NEW_GS_PROG | + CACHE_NEW_CLIP_UNIT | + CACHE_NEW_SF_UNIT | + CACHE_NEW_WM_UNIT | + CACHE_NEW_CC_UNIT) + }, + .update = upload_pipelined_state_pointers +}; + +static void upload_psp_urb_cbs(struct brw_context *brw ) +{ + upload_pipelined_state_pointers(brw); + brw_upload_urb_fence(brw); + brw_upload_constant_buffer_state(brw); +} + + +const struct brw_tracked_state brw_psp_urb_cbs = { + .dirty = { + .brw = BRW_NEW_URB_FENCE, + .cache = (CACHE_NEW_VS_UNIT | + CACHE_NEW_GS_UNIT | + CACHE_NEW_GS_PROG | + CACHE_NEW_CLIP_UNIT | + CACHE_NEW_SF_UNIT | + CACHE_NEW_WM_UNIT | + CACHE_NEW_CC_UNIT) + }, + .update = upload_psp_urb_cbs +}; + +/** + * Upload the depthbuffer offset and format. + * + * We have to do this per state validation as we need to emit the relocation + * in the batch buffer. + */ +static void upload_depthbuffer(struct brw_context *brw) +{ + struct pipe_surface *depth_surface = brw->attribs.FrameBuffer.zsbuf; + + BEGIN_BATCH(5, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (5 - 2)); + if (depth_surface == NULL) { + OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | + (BRW_SURFACE_NULL << 29)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + } else { + unsigned int format; + + assert(depth_surface->block.width == 1); + assert(depth_surface->block.height == 1); + switch (depth_surface->block.size) { + case 2: + format = BRW_DEPTHFORMAT_D16_UNORM; + break; + case 4: + if (depth_surface->format == PIPE_FORMAT_Z32_FLOAT) + format = BRW_DEPTHFORMAT_D32_FLOAT; + else + format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; + break; + default: + assert(0); + return; + } + + OUT_BATCH((depth_surface->stride - 1) | + (format << 18) | + (BRW_TILEWALK_YMAJOR << 26) | +// (depth_surface->region->tiled << 27) | + (BRW_SURFACE_2D << 29)); + OUT_RELOC(depth_surface->buffer, + PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE, 0); + OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | + ((depth_surface->stride/depth_surface->block.size - 1) << 6) | + ((depth_surface->height - 1) << 19)); + OUT_BATCH(0); + } + ADVANCE_BATCH(); +} + +const struct brw_tracked_state brw_depthbuffer = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_depthbuffer, +}; + + + + +/*********************************************************************** + * Polygon stipple packet + */ + +static void upload_polygon_stipple(struct brw_context *brw) +{ + struct brw_polygon_stipple bps; + unsigned i; + + memset(&bps, 0, sizeof(bps)); + bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; + bps.header.length = sizeof(bps)/4-2; + + /* XXX: state tracker should send *all* state down initially! + */ + if (brw->attribs.PolygonStipple) + for (i = 0; i < 32; i++) + bps.stipple[i] = brw->attribs.PolygonStipple->stipple[31 - i]; /* invert */ + + BRW_CACHED_BATCH_STRUCT(brw, &bps); +} + +const struct brw_tracked_state brw_polygon_stipple = { + .dirty = { + .brw = BRW_NEW_STIPPLE, + .cache = 0 + }, + .update = upload_polygon_stipple +}; + + +/*********************************************************************** + * Line stipple packet + */ + +static void upload_line_stipple(struct brw_context *brw) +{ + struct brw_line_stipple bls; + float tmp; + int tmpi; + + memset(&bls, 0, sizeof(bls)); + bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; + bls.header.length = sizeof(bls)/4 - 2; + + bls.bits0.pattern = brw->attribs.Raster->line_stipple_pattern; + bls.bits1.repeat_count = brw->attribs.Raster->line_stipple_factor; + + tmp = 1.0 / (float) brw->attribs.Raster->line_stipple_factor; + tmpi = tmp * (1<<13); + + + bls.bits1.inverse_repeat_count = tmpi; + + BRW_CACHED_BATCH_STRUCT(brw, &bls); +} + +const struct brw_tracked_state brw_line_stipple = { + .dirty = { + .brw = BRW_NEW_STIPPLE, + .cache = 0 + }, + .update = upload_line_stipple +}; + + +/*********************************************************************** + * Misc constant state packets + */ + +static void upload_pipe_control(struct brw_context *brw) +{ + struct brw_pipe_control pc; + + return; + + memset(&pc, 0, sizeof(pc)); + + pc.header.opcode = CMD_PIPE_CONTROL; + pc.header.length = sizeof(pc)/4 - 2; + pc.header.post_sync_operation = PIPE_CONTROL_NOWRITE; + + pc.header.instruction_state_cache_flush_enable = 1; + + pc.bits1.dest_addr_type = PIPE_CONTROL_GTTWRITE_GLOBAL; + + BRW_BATCH_STRUCT(brw, &pc); +} + +const struct brw_tracked_state brw_pipe_control = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_pipe_control +}; + + +/*********************************************************************** + * Misc invarient state packets + */ + +static void upload_invarient_state( struct brw_context *brw ) +{ + { + struct brw_mi_flush flush; + + memset(&flush, 0, sizeof(flush)); + flush.opcode = CMD_MI_FLUSH; + flush.flags = BRW_FLUSH_STATE_CACHE | BRW_FLUSH_READ_CACHE; + BRW_BATCH_STRUCT(brw, &flush); + } + + { + /* 0x61040000 Pipeline Select */ + /* PipelineSelect : 0 */ + struct brw_pipeline_select ps; + + memset(&ps, 0, sizeof(ps)); + ps.header.opcode = CMD_PIPELINE_SELECT; + ps.header.pipeline_select = 0; + BRW_BATCH_STRUCT(brw, &ps); + } + + { + struct brw_global_depth_offset_clamp gdo; + memset(&gdo, 0, sizeof(gdo)); + + /* Disable depth offset clamping. + */ + gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; + gdo.header.length = sizeof(gdo)/4 - 2; + gdo.depth_offset_clamp = 0.0; + + BRW_BATCH_STRUCT(brw, &gdo); + } + + + /* 0x61020000 State Instruction Pointer */ + { + struct brw_system_instruction_pointer sip; + memset(&sip, 0, sizeof(sip)); + + sip.header.opcode = CMD_STATE_INSN_POINTER; + sip.header.length = 0; + sip.bits0.pad = 0; + sip.bits0.system_instruction_pointer = 0; + BRW_BATCH_STRUCT(brw, &sip); + } + + + { + struct brw_vf_statistics vfs; + memset(&vfs, 0, sizeof(vfs)); + + vfs.opcode = CMD_VF_STATISTICS; + if (BRW_DEBUG & DEBUG_STATS) + vfs.statistics_enable = 1; + + BRW_BATCH_STRUCT(brw, &vfs); + } + + + { + struct brw_polygon_stipple_offset bpso; + + memset(&bpso, 0, sizeof(bpso)); + bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; + bpso.header.length = sizeof(bpso)/4-2; + bpso.bits0.x_offset = 0; + bpso.bits0.y_offset = 0; + + BRW_BATCH_STRUCT(brw, &bpso); + } +} + +const struct brw_tracked_state brw_invarient_state = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_invarient_state +}; + +/** + * Define the base addresses which some state is referenced from. + * + * This allows us to avoid having to emit relocations in many places for + * cached state, and instead emit pointers inside of large, mostly-static + * state pools. This comes at the expense of memory, and more expensive cache + * misses. + */ +static void upload_state_base_address( struct brw_context *brw ) +{ + /* Output the structure (brw_state_base_address) directly to the + * batchbuffer, so we can emit relocations inline. + */ + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); + OUT_RELOC(brw->pool[BRW_GS_POOL].buffer, + PIPE_BUFFER_USAGE_GPU_READ, + 1); /* General state base address */ + OUT_RELOC(brw->pool[BRW_SS_POOL].buffer, + PIPE_BUFFER_USAGE_GPU_READ, + 1); /* Surface state base address */ + OUT_BATCH(1); /* Indirect object base address */ + OUT_BATCH(1); /* General state upper bound */ + OUT_BATCH(1); /* Indirect object upper bound */ + ADVANCE_BATCH(); +} + + +const struct brw_tracked_state brw_state_base_address = { + .dirty = { + .brw = BRW_NEW_SCENE, + .cache = 0 + }, + .update = upload_state_base_address +}; diff --git a/src/gallium/drivers/i965simple/brw_reg.h b/src/gallium/drivers/i965simple/brw_reg.h new file mode 100644 index 0000000000..9e885c3b3b --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_reg.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#define CMD_MI (0x0 << 29) +#define CMD_2D (0x2 << 29) +#define CMD_3D (0x3 << 29) + +#define MI_BATCH_BUFFER_END (CMD_MI | 0xA << 23) + +/* Stalls command execution waiting for the given events to have occurred. */ +#define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) + +/* Primitive dispatch on 830-945 */ +#define _3DPRIMITIVE (CMD_3D | (0x1f << 24)) +#define PRIM_INDIRECT (1<<23) +#define PRIM_INLINE (0<<23) +#define PRIM_INDIRECT_SEQUENTIAL (0<<17) +#define PRIM_INDIRECT_ELTS (1<<17) + +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define XY_SETUP_BLT_CMD (CMD_2D | (0x01 << 22) | 6) + +#define XY_COLOR_BLT_CMD (CMD_2D | (0x50 << 22) | 4) + +#define XY_SRC_COPY_BLT_CMD (CMD_2D | (0x53 << 22) | 6) + +/* BR00 */ +#define XY_BLT_WRITE_ALPHA (1 << 21) +#define XY_BLT_WRITE_RGB (1 << 20) +#define XY_SRC_TILED (1 << 15) +#define XY_DST_TILED (1 << 11) + +/* BR13 */ +#define BR13_565 (0x1 << 24) +#define BR13_8888 (0x3 << 24) + +#define FENCE_LINEAR 0 +#define FENCE_XMAJOR 1 +#define FENCE_YMAJOR 2 diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c new file mode 100644 index 0000000000..ab7cd624b2 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -0,0 +1,244 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/p_winsys.h" +#include "util/u_string.h" + +#include "brw_context.h" +#include "brw_screen.h" +#include "brw_tex_layout.h" + + +static const char * +brw_get_vendor( struct pipe_screen *screen ) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +brw_get_name( struct pipe_screen *screen ) +{ + static char buffer[128]; + const char *chipset; + + switch (brw_screen(screen)->pci_id) { + case PCI_CHIP_I965_Q: + chipset = "Intel(R) 965Q"; + break; + case PCI_CHIP_I965_G: + case PCI_CHIP_I965_G_1: + chipset = "Intel(R) 965G"; + break; + case PCI_CHIP_I965_GM: + chipset = "Intel(R) 965GM"; + break; + case PCI_CHIP_I965_GME: + chipset = "Intel(R) 965GME/GLE"; + break; + default: + chipset = "unknown"; + break; + } + + util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset); + return buffer; +} + + +static int +brw_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 11; /* max 1024x1024 */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 11; /* max 1024x1024 */ + default: + return 0; + } +} + + +static float +brw_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 7.5; + + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; + + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 4.0; + + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + + default: + return 0; + } +} + + +static boolean +brw_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) +{ +#if 0 + /* XXX: This is broken -- rewrite if still needed. */ + static const unsigned tex_supported[] = { + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_L8A8_UNORM, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_S8_Z24, + }; + + + /* Actually a lot more than this - add later: + */ + static const unsigned render_supported[] = { + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + }; + + /* + */ + static const unsigned z_stencil_supported[] = { + PIPE_FORMAT_Z16_UNORM, + PIPE_FORMAT_Z32_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + }; + + switch (type) { + case PIPE_RENDER_FORMAT: + *numFormats = Elements(render_supported); + return render_supported; + + case PIPE_TEX_FORMAT: + *numFormats = Elements(tex_supported); + return render_supported; + + case PIPE_Z_STENCIL_FORMAT: + *numFormats = Elements(render_supported); + return render_supported; + + default: + *numFormats = 0; + return NULL; + } +#else + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + return TRUE; + default: + return FALSE; + }; + return FALSE; +#endif +} + + +static void +brw_destroy_screen( struct pipe_screen *screen ) +{ + struct pipe_winsys *winsys = screen->winsys; + + if(winsys->destroy) + winsys->destroy(winsys); + + FREE(screen); +} + + +/** + * Create a new brw_screen object + */ +struct pipe_screen * +brw_create_screen(struct pipe_winsys *winsys, uint pci_id) +{ + struct brw_screen *brwscreen = CALLOC_STRUCT(brw_screen); + + if (!brwscreen) + return NULL; + + brwscreen->pci_id = pci_id; + + brwscreen->screen.winsys = winsys; + + brwscreen->screen.destroy = brw_destroy_screen; + + brwscreen->screen.get_name = brw_get_name; + brwscreen->screen.get_vendor = brw_get_vendor; + brwscreen->screen.get_param = brw_get_param; + brwscreen->screen.get_paramf = brw_get_paramf; + brwscreen->screen.is_format_supported = brw_is_format_supported; + + brw_init_screen_texture_funcs(&brwscreen->screen); + + return &brwscreen->screen; +} diff --git a/src/gallium/drivers/i965simple/brw_screen.h b/src/gallium/drivers/i965simple/brw_screen.h new file mode 100644 index 0000000000..d3c70387e6 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_screen.h @@ -0,0 +1,68 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef BRW_SCREEN_H +#define BRW_SCREEN_H + + +#include "pipe/p_screen.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Subclass of pipe_screen + */ +struct brw_screen +{ + struct pipe_screen screen; + + uint pci_id; +}; + + +/** cast wrapper */ +static INLINE struct brw_screen * +brw_screen(struct pipe_screen *pscreen) +{ + return (struct brw_screen *) pscreen; +} + + +extern struct pipe_screen * +brw_create_screen(struct pipe_winsys *winsys, uint pci_id); + + +#ifdef __cplusplus +} +#endif + +#endif /* BRW_SCREEN_H */ diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c new file mode 100644 index 0000000000..b82a2e143b --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf.c @@ -0,0 +1,351 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" +#include "brw_state.h" +#include "tgsi/tgsi_parse.h" + + +static void compile_sf_prog( struct brw_context *brw, + struct brw_sf_prog_key *key ) +{ + struct brw_sf_compile c; + const unsigned *program; + unsigned program_size; + + memset(&c, 0, sizeof(c)); + + /* Begin the compilation: + */ + brw_init_compile(&c.func); + + c.key = *key; + + + c.nr_attrs = c.key.vp_output_count; + c.nr_attr_regs = (c.nr_attrs+1)/2; + + c.nr_setup_attrs = c.key.fp_input_count + 1; /* +1 for position */ + c.nr_setup_regs = (c.nr_setup_attrs+1)/2; + + c.prog_data.urb_read_length = c.nr_attr_regs; + c.prog_data.urb_entry_size = c.nr_setup_regs * 2; + + + /* Which primitive? Or all three? + */ + switch (key->primitive) { + case SF_TRIANGLES: + c.nr_verts = 3; + brw_emit_tri_setup( &c ); + break; + case SF_LINES: + c.nr_verts = 2; + brw_emit_line_setup( &c ); + break; + case SF_POINTS: + c.nr_verts = 1; + brw_emit_point_setup( &c ); + break; + + case SF_UNFILLED_TRIS: + default: + assert(0); + return; + } + + + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* Upload + */ + brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->sf.prog_data ); +} + + +static boolean search_cache( struct brw_context *brw, + struct brw_sf_prog_key *key ) +{ + return brw_search_cache(&brw->cache[BRW_SF_PROG], + key, sizeof(*key), + &brw->sf.prog_data, + &brw->sf.prog_gs_offset); +} + + +/* Calculate interpolants for triangle and line rasterization. + */ +static void upload_sf_prog( struct brw_context *brw ) +{ + const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; + struct brw_sf_prog_key key; + struct tgsi_parse_context parse; + int i, done = 0; + + + memset(&key, 0, sizeof(key)); + + /* Populate the key, noting state dependencies: + */ + /* CACHE_NEW_VS_PROG */ + key.vp_output_count = brw->vs.prog_data->outputs_written; + + /* BRW_NEW_FS */ + key.fp_input_count = brw->attribs.FragmentProgram->info.file_max[TGSI_FILE_INPUT] + 1; + + + /* BRW_NEW_REDUCED_PRIMITIVE */ + switch (brw->reduced_primitive) { + case PIPE_PRIM_TRIANGLES: +// if (key.attrs & (1<<VERT_RESULT_EDGE)) +// key.primitive = SF_UNFILLED_TRIS; +// else + key.primitive = SF_TRIANGLES; + break; + case PIPE_PRIM_LINES: + key.primitive = SF_LINES; + break; + case PIPE_PRIM_POINTS: + key.primitive = SF_POINTS; + break; + } + + + + /* Scan fp inputs to figure out what interpolation modes are + * required for each incoming vp output. There is an assumption + * that the state tracker makes sure there is a 1:1 linkage between + * these sets of attributes (XXX: position??) + */ + tgsi_parse_init( &parse, fs->program.tokens ); + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) + { + int first = parse.FullToken.FullDeclaration.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.DeclarationRange.Last; + int interp_mode = parse.FullToken.FullDeclaration.Declaration.Interpolate; + //int semantic = parse.FullToken.FullDeclaration.Semantic.SemanticName; + //int semantic_index = parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + + debug_printf("fs input %d..%d interp mode %d\n", first, last, interp_mode); + + switch (interp_mode) { + case TGSI_INTERPOLATE_CONSTANT: + for (i = first; i <= last; i++) + key.const_mask |= (1 << i); + break; + case TGSI_INTERPOLATE_LINEAR: + for (i = first; i <= last; i++) + key.linear_mask |= (1 << i); + break; + case TGSI_INTERPOLATE_PERSPECTIVE: + for (i = first; i <= last; i++) + key.persp_mask |= (1 << i); + break; + default: + break; + } + + /* Also need stuff for flat shading, twosided color. + */ + + } + break; + default: + done = 1; + break; + } + } + + /* Hack: Adjust for position. Optimize away when not required (ie + * for perspective interpolation). + */ + key.persp_mask <<= 1; + key.linear_mask <<= 1; + key.linear_mask |= 1; + key.const_mask <<= 1; + + debug_printf("key.persp_mask: %x\n", key.persp_mask); + debug_printf("key.linear_mask: %x\n", key.linear_mask); + debug_printf("key.const_mask: %x\n", key.const_mask); + + +// key.do_point_sprite = brw->attribs.Point->PointSprite; +// key.SpriteOrigin = brw->attribs.Point->SpriteOrigin; + +// key.do_flat_shading = (brw->attribs.Raster->flatshade); +// key.do_twoside_color = (brw->attribs.Light->Enabled && brw->attribs.Light->Model.TwoSide); + +// if (key.do_twoside_color) +// key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); + + + if (!search_cache(brw, &key)) + compile_sf_prog( brw, &key ); +} + + +const struct brw_tracked_state brw_sf_prog = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_REDUCED_PRIMITIVE | + BRW_NEW_VS | + BRW_NEW_FS), + .cache = 0, + }, + .update = upload_sf_prog +}; + + + +#if 0 +/* Build a struct like the one we'd like the state tracker to pass to + * us. + */ +static void update_sf_linkage( struct brw_context *brw ) +{ + const struct brw_vertex_program *vs = brw->attribs.VertexProgram; + const struct brw_fragment_program *fs = brw->attribs.FragmentProgram; + struct pipe_setup_linkage state; + struct tgsi_parse_context parse; + + int i, j; + int nr_vp_outputs = 0; + int done = 0; + + struct { + unsigned semantic:8; + unsigned semantic_index:16; + } fp_semantic[32], vp_semantic[32]; + + memset(&state, 0, sizeof(state)); + + state.fp_input_count = 0; + + + + + + + assert(state.fp_input_count == fs->program.num_inputs); + + + /* Then scan vp outputs + */ + done = 0; + tgsi_parse_init( &parse, vs->program.tokens ); + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_INPUT) + { + int first = parse.FullToken.FullDeclaration.DeclarationRange.First; + int last = parse.FullToken.FullDeclaration.DeclarationRange.Last; + + for (i = first; i < last; i++) { + vp_semantic[i].semantic = + parse.FullToken.FullDeclaration.Semantic.SemanticName; + vp_semantic[i].semantic_index = + parse.FullToken.FullDeclaration.Semantic.SemanticIndex; + } + + assert(last > nr_vp_outputs); + nr_vp_outputs = last; + } + break; + default: + done = 1; + break; + } + } + + + /* Now match based on semantic information. + */ + for (i = 0; i< state.fp_input_count; i++) { + for (j = 0; j < nr_vp_outputs; j++) { + if (fp_semantic[i].semantic == vp_semantic[j].semantic && + fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { + state.fp_input[i].vp_output = j; + } + } + if (fp_semantic[i].semantic == TGSI_SEMANTIC_COLOR) { + for (j = 0; j < nr_vp_outputs; j++) { + if (TGSI_SEMANTIC_BCOLOR == vp_semantic[j].semantic && + fp_semantic[i].semantic_index == vp_semantic[j].semantic_index) { + state.fp_input[i].bf_vp_output = j; + } + } + } + } + + if (memcmp(&brw->sf.linkage, &state, sizeof(state)) != 0) { + brw->sf.linkage = state; + brw->state.dirty.brw |= BRW_NEW_SF_LINKAGE; + } +} + + +const struct brw_tracked_state brw_sf_linkage = { + .dirty = { + .brw = (BRW_NEW_VS | + BRW_NEW_FS), + .cache = 0, + }, + .update = update_sf_linkage +}; + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_sf.h b/src/gallium/drivers/i965simple/brw_sf.h new file mode 100644 index 0000000000..b7ada47560 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf.h @@ -0,0 +1,122 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_SF_H +#define BRW_SF_H + +#include "brw_context.h" +#include "brw_eu.h" + + +#define SF_POINTS 0 +#define SF_LINES 1 +#define SF_TRIANGLES 2 +#define SF_UNFILLED_TRIS 3 + + + +struct brw_sf_prog_key { + unsigned vp_output_count:5; + unsigned fp_input_count:5; + + unsigned primitive:2; + unsigned do_twoside_color:1; + unsigned do_flat_shading:1; + unsigned frontface_ccw:1; + unsigned do_point_sprite:1; + + /* Interpolation masks; + */ + unsigned linear_mask; + unsigned persp_mask; + unsigned const_mask; + + +// int SpriteOrigin; +}; + +struct brw_sf_point_tex { + boolean CoordReplace; +}; + +struct brw_sf_compile { + struct brw_compile func; + struct brw_sf_prog_key key; + struct brw_sf_prog_data prog_data; + + struct brw_reg pv; + struct brw_reg det; + struct brw_reg dx0; + struct brw_reg dx2; + struct brw_reg dy0; + struct brw_reg dy2; + + /* z and 1/w passed in seperately: + */ + struct brw_reg z[3]; + struct brw_reg inv_w[3]; + + /* The vertices: + */ + struct brw_reg vert[3]; + + /* Temporaries, allocated after last vertex reg. + */ + struct brw_reg inv_det; + struct brw_reg a1_sub_a0; + struct brw_reg a2_sub_a0; + struct brw_reg tmp; + + struct brw_reg m1Cx; + struct brw_reg m2Cy; + struct brw_reg m3C0; + + unsigned nr_verts; + unsigned nr_attrs; + unsigned nr_attr_regs; + unsigned nr_setup_attrs; + unsigned nr_setup_regs; +#if 0 + ubyte attr_to_idx[VERT_RESULT_MAX]; + ubyte idx_to_attr[VERT_RESULT_MAX]; + struct brw_sf_point_tex point_attrs[VERT_RESULT_MAX]; +#endif +}; + + +void brw_emit_tri_setup( struct brw_sf_compile *c ); +void brw_emit_line_setup( struct brw_sf_compile *c ); +void brw_emit_point_setup( struct brw_sf_compile *c ); +void brw_emit_point_sprite_setup( struct brw_sf_compile *c ); +void brw_emit_anyprim_setup( struct brw_sf_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_sf_emit.c b/src/gallium/drivers/i965simple/brw_sf_emit.c new file mode 100644 index 0000000000..78d6fa5e9e --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf_emit.c @@ -0,0 +1,382 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_defines.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_util.h" +#include "brw_sf.h" + + + +/*********************************************************************** + * Triangle setup. + */ + + +static void alloc_regs( struct brw_sf_compile *c ) +{ + unsigned reg, i; + + /* Values computed by fixed function unit: + */ + c->pv = retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_UD); + c->det = brw_vec1_grf(1, 2); + c->dx0 = brw_vec1_grf(1, 3); + c->dx2 = brw_vec1_grf(1, 4); + c->dy0 = brw_vec1_grf(1, 5); + c->dy2 = brw_vec1_grf(1, 6); + + /* z and 1/w passed in seperately: + */ + c->z[0] = brw_vec1_grf(2, 0); + c->inv_w[0] = brw_vec1_grf(2, 1); + c->z[1] = brw_vec1_grf(2, 2); + c->inv_w[1] = brw_vec1_grf(2, 3); + c->z[2] = brw_vec1_grf(2, 4); + c->inv_w[2] = brw_vec1_grf(2, 5); + + /* The vertices: + */ + reg = 3; + for (i = 0; i < c->nr_verts; i++) { + c->vert[i] = brw_vec8_grf(reg, 0); + reg += c->nr_attr_regs; + } + + /* Temporaries, allocated after last vertex reg. + */ + c->inv_det = brw_vec1_grf(reg, 0); reg++; + c->a1_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->a2_sub_a0 = brw_vec8_grf(reg, 0); reg++; + c->tmp = brw_vec8_grf(reg, 0); reg++; + + /* Note grf allocation: + */ + c->prog_data.total_grf = reg; + + + /* Outputs of this program - interpolation coefficients for + * rasterization: + */ + c->m1Cx = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 1, 0); + c->m2Cy = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 2, 0); + c->m3C0 = brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, 3, 0); +} + + +static void copy_z_inv_w( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + brw_push_insn_state(p); + + /* Copy both scalars with a single MOV: + */ + for (i = 0; i < c->nr_verts; i++) + brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i])); + + brw_pop_insn_state(p); +} + + +static void invert_det( struct brw_sf_compile *c) +{ + brw_math(&c->func, + c->inv_det, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 0, + c->det, + BRW_MATH_DATA_SCALAR, + BRW_MATH_PRECISION_FULL); + +} + +#define NON_PERPECTIVE_ATTRS (FRAG_BIT_WPOS | \ + FRAG_BIT_COL0 | \ + FRAG_BIT_COL1) + +static boolean calculate_masks( struct brw_sf_compile *c, + unsigned reg, + ushort *pc, + ushort *pc_persp, + ushort *pc_linear) +{ + boolean is_last_attr = (reg == c->nr_setup_regs - 1); + unsigned persp_mask = c->key.persp_mask; + unsigned linear_mask = c->key.linear_mask; + + debug_printf("persp_mask: %x\n", persp_mask); + debug_printf("linear_mask: %x\n", linear_mask); + + *pc_persp = 0; + *pc_linear = 0; + *pc = 0xf; + + if (persp_mask & (1 << (reg*2))) + *pc_persp = 0xf; + + if (linear_mask & (1 << (reg*2))) + *pc_linear = 0xf; + + /* Maybe only processs one attribute on the final round: + */ + if (reg*2+1 < c->nr_setup_attrs) { + *pc |= 0xf0; + + if (persp_mask & (1 << (reg*2+1))) + *pc_persp |= 0xf0; + + if (linear_mask & (1 << (reg*2+1))) + *pc_linear |= 0xf0; + } + + debug_printf("pc: %x\n", *pc); + debug_printf("pc_persp: %x\n", *pc_persp); + debug_printf("pc_linear: %x\n", *pc_linear); + + + return is_last_attr; +} + + + +void brw_emit_tri_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + debug_printf("%s START ==============\n", __FUNCTION__); + + c->nr_verts = 3; + alloc_regs(c); + invert_det(c); + copy_z_inv_w(c); + + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + struct brw_reg a2 = offset(c->vert[2], i); + ushort pc = 0, pc_persp = 0, pc_linear = 0; + boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + brw_MUL(p, a2, a2, c->inv_w[2]); + } + + + /* Calculate coefficients for interpolated values: + */ + if (pc_linear) + { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + brw_ADD(p, c->a2_sub_a0, a2, negate(a0)); + + /* calculate dA/dx + */ + brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2); + brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0)); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + /* calculate dA/dy + */ + brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0); + brw_MAC(p, c->tmp, c->a1_sub_a0, negate(c->dx2)); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in + * the send instruction: + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* offset */ + BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ + } + } + + debug_printf("%s DONE ==============\n", __FUNCTION__); + +} + + + +void brw_emit_line_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + + c->nr_verts = 2; + alloc_regs(c); + invert_det(c); + copy_z_inv_w(c); + + for (i = 0; i < c->nr_setup_regs; i++) + { + /* Pair of incoming attributes: + */ + struct brw_reg a0 = offset(c->vert[0], i); + struct brw_reg a1 = offset(c->vert[1], i); + ushort pc, pc_persp, pc_linear; + boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + brw_MUL(p, a1, a1, c->inv_w[1]); + } + + /* Calculate coefficients for position, color: + */ + if (pc_linear) { + brw_set_predicate_control_flag_value(p, pc_linear); + + brw_ADD(p, c->a1_sub_a0, a1, negate(a0)); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0); + brw_MUL(p, c->m1Cx, c->tmp, c->inv_det); + + brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0); + brw_MUL(p, c->m2Cy, c->tmp, c->inv_det); + } + + { + brw_set_predicate_control_flag_value(p, pc); + + /* start point for interpolation + */ + brw_MOV(p, c->m3C0, a0); + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} + + +/* Points setup - several simplifications as all attributes are + * constant across the face of the point (point sprites excluded!) + */ +void brw_emit_point_setup( struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + unsigned i; + + c->nr_verts = 1; + alloc_regs(c); + copy_z_inv_w(c); + + brw_MOV(p, c->m1Cx, brw_imm_ud(0)); /* zero - move out of loop */ + brw_MOV(p, c->m2Cy, brw_imm_ud(0)); /* zero - move out of loop */ + + for (i = 0; i < c->nr_setup_regs; i++) + { + struct brw_reg a0 = offset(c->vert[0], i); + ushort pc, pc_persp, pc_linear; + boolean last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); + + if (pc_persp) + { + /* This seems odd as the values are all constant, but the + * fragment shader will be expecting it: + */ + brw_set_predicate_control_flag_value(p, pc_persp); + brw_MUL(p, a0, a0, c->inv_w[0]); + } + + + /* The delta values are always zero, just send the starting + * coordinate. Again, this is to fit in with the interpolation + * code in the fragment shader. + */ + { + brw_set_predicate_control_flag_value(p, pc); + + brw_MOV(p, c->m3C0, a0); /* constant value */ + + /* Copy m0..m3 to URB. + */ + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + i*4, /* urb destination offset */ + BRW_URB_SWIZZLE_TRANSPOSE); + } + } +} diff --git a/src/gallium/drivers/i965simple/brw_sf_state.c b/src/gallium/drivers/i965simple/brw_sf_state.c new file mode 100644 index 0000000000..2a5de61c21 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_sf_state.c @@ -0,0 +1,181 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + +static void upload_sf_vp(struct brw_context *brw) +{ + struct brw_sf_viewport sfv; + + memset(&sfv, 0, sizeof(sfv)); + + + /* BRW_NEW_VIEWPORT */ + { + const float *scale = brw->attribs.Viewport.scale; + const float *trans = brw->attribs.Viewport.translate; + + sfv.viewport.m00 = scale[0]; + sfv.viewport.m11 = scale[1]; + sfv.viewport.m22 = scale[2]; + sfv.viewport.m30 = trans[0]; + sfv.viewport.m31 = trans[1]; + sfv.viewport.m32 = trans[2]; + } + + /* _NEW_SCISSOR */ + sfv.scissor.xmin = brw->attribs.Scissor.minx; + sfv.scissor.xmax = brw->attribs.Scissor.maxx - 1; + sfv.scissor.ymin = brw->attribs.Scissor.miny; + sfv.scissor.ymax = brw->attribs.Scissor.maxy - 1; + + brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv ); +} + +const struct brw_tracked_state brw_sf_vp = { + .dirty = { + .brw = (BRW_NEW_SCISSOR | + BRW_NEW_VIEWPORT), + .cache = 0 + }, + .update = upload_sf_vp +}; + +static void upload_sf_unit( struct brw_context *brw ) +{ + struct brw_sf_unit_state sf; + memset(&sf, 0, sizeof(sf)); + + /* CACHE_NEW_SF_PROG */ + sf.thread0.grf_reg_count = align(brw->sf.prog_data->total_grf, 16) / 16 - 1; + sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; + sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; + + sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + sf.thread3.dispatch_grf_start_reg = 3; + sf.thread3.urb_entry_read_offset = 1; + + /* BRW_NEW_URB_FENCE */ + sf.thread4.nr_urb_entries = brw->urb.nr_sf_entries; + sf.thread4.urb_entry_allocation_size = brw->urb.sfsize - 1; + sf.thread4.max_threads = MIN2(12, brw->urb.nr_sf_entries / 2) - 1; + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + sf.thread4.max_threads = 0; + + if (BRW_DEBUG & DEBUG_STATS) + sf.thread4.stats_enable = 1; + + /* CACHE_NEW_SF_VP */ + sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5; + sf.sf5.viewport_transform = 1; + + /* BRW_NEW_RASTER */ + if (brw->attribs.Raster->scissor) + sf.sf6.scissor = 1; + +#if 0 + if (brw->attribs.Polygon->FrontFace == GL_CCW) + sf.sf5.front_winding = BRW_FRONTWINDING_CCW; + else + sf.sf5.front_winding = BRW_FRONTWINDING_CW; + + + if (brw->attribs.Polygon->CullFlag) { + switch (brw->attribs.Polygon->CullFaceMode) { + case GL_FRONT: + sf.sf6.cull_mode = BRW_CULLMODE_FRONT; + break; + case GL_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BACK; + break; + case GL_FRONT_AND_BACK: + sf.sf6.cull_mode = BRW_CULLMODE_BOTH; + break; + default: + assert(0); + break; + } + } + else + sf.sf6.cull_mode = BRW_CULLMODE_NONE; +#else + sf.sf5.front_winding = BRW_FRONTWINDING_CCW; + sf.sf6.cull_mode = BRW_CULLMODE_NONE; +#endif + + sf.sf6.line_width = CLAMP(brw->attribs.Raster->line_width, 1.0, 5.0) * (1<<1); + + sf.sf6.line_endcap_aa_region_width = 1; + if (brw->attribs.Raster->line_smooth) + sf.sf6.aa_enable = 1; + else if (sf.sf6.line_width <= 0x2) + sf.sf6.line_width = 0; + + sf.sf6.point_rast_rule = 1; /* opengl conventions */ + + sf.sf7.sprite_point = brw->attribs.Raster->point_sprite; + sf.sf7.point_size = CLAMP(brw->attribs.Raster->line_width, 1.0, 255.0) * (1<<3); + sf.sf7.use_point_size_state = !brw->attribs.Raster->point_size_per_vertex; + + /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: + */ + sf.sf7.trifan_pv = 2; + sf.sf7.linestrip_pv = 1; + sf.sf7.tristrip_pv = 2; + sf.sf7.line_last_pixel_enable = 0; + + /* Set bias for OpenGL rasterization rules: + */ + sf.sf6.dest_org_vbias = 0x8; + sf.sf6.dest_org_hbias = 0x8; + + brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf ); +} + + +const struct brw_tracked_state brw_sf_unit = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_URB_FENCE), + .cache = (CACHE_NEW_SF_VP | + CACHE_NEW_SF_PROG) + }, + .update = upload_sf_unit +}; + + diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c new file mode 100644 index 0000000000..86d877d7ef --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -0,0 +1,48 @@ + +#include "brw_context.h" +#include "brw_state.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + + +/** + * XXX this obsolete new and no longer compiled. + */ +void brw_shader_info(const struct tgsi_token *tokens, + struct brw_shader_info *info ) +{ + struct tgsi_parse_context parse; + int done = 0; + + tgsi_parse_init( &parse, tokens ); + + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + unsigned last = decl->DeclarationRange.Last; + + // Broken by crazy wpos init: + //assert( info->nr_regs[decl->Declaration.File] <= last); + + info->nr_regs[decl->Declaration.File] = MAX2(info->nr_regs[decl->Declaration.File], + last+1); + break; + } + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_INSTRUCTION: + default: + done = 1; + break; + } + } + + tgsi_parse_free (&parse); + +} diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c new file mode 100644 index 0000000000..af46cb546f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -0,0 +1,469 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Zack Rusin <zack@tungstengraphics.com> + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_winsys.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_draw.h" + + +#define DUP( TYPE, VAL ) \ +do { \ + struct TYPE *x = malloc(sizeof(*x)); \ + memcpy(x, VAL, sizeof(*x) ); \ + return x; \ +} while (0) + +/************************************************************************ + * Blend + */ +static void * +brw_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + DUP( pipe_blend_state, blend ); +} + +static void brw_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Blend = (struct pipe_blend_state*)blend; + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + + +static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + free(blend); +} + +static void brw_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.BlendColor = *blend_color; + + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + +/************************************************************************ + * Sampler + */ + +static void * +brw_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + DUP( pipe_sampler_state, sampler ); +} + +static void brw_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct brw_context *brw = brw_context(pipe); + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == brw->num_samplers && + !memcmp(brw->attribs.Samplers, sampler, num * sizeof(void *))) + return; + + memcpy(brw->attribs.Samplers, sampler, num * sizeof(void *)); + memset(&brw->attribs.Samplers[num], 0, (PIPE_MAX_SAMPLERS - num) * + sizeof(void *)); + + brw->num_samplers = num; + + brw->state.dirty.brw |= BRW_NEW_SAMPLER; +} + +static void brw_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + free(sampler); +} + + +/************************************************************************ + * Depth stencil + */ + +static void * +brw_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + DUP( pipe_depth_stencil_alpha_state, depth_stencil ); +} + +static void brw_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + + brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; +} + +static void brw_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + free(depth_stencil); +} + +/************************************************************************ + * Scissor + */ +static void brw_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct brw_context *brw = brw_context(pipe); + + memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); + brw->state.dirty.brw |= BRW_NEW_SCISSOR; +} + + +/************************************************************************ + * Stipple + */ + +static void brw_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ +} + + +/************************************************************************ + * Fragment shader + */ + +static void * brw_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); + + brw_fp->program.tokens = tgsi_dup_tokens(shader->tokens); + brw_fp->id = brw_context(pipe)->program_id++; + + tgsi_scan_shader(shader->tokens, &brw_fp->info); + +#if 0 + brw_shader_info(shader->tokens, + &brw_fp->info2); +#endif + + tgsi_dump(shader->tokens, 0); + + + return (void *)brw_fp; +} + +static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; + brw->state.dirty.brw |= BRW_NEW_FS; +} + +static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_fragment_program *brw_fp = (struct brw_fragment_program *) shader; + + FREE((void *) brw_fp->program.tokens); + FREE(brw_fp); +} + + +/************************************************************************ + * Vertex shader and other TNL state + */ + +static void *brw_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); + + brw_vp->program.tokens = tgsi_dup_tokens(shader->tokens); + brw_vp->id = brw_context(pipe)->program_id++; + + tgsi_scan_shader(shader->tokens, &brw_vp->info); + +#if 0 + brw_shader_info(shader->tokens, + &brw_vp->info2); +#endif + tgsi_dump(shader->tokens, 0); + + return (void *)brw_vp; +} + +static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; + brw->state.dirty.brw |= BRW_NEW_VS; + + debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); +} + +static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_vertex_program *brw_vp = (struct brw_vertex_program *) shader; + + FREE((void *) brw_vp->program.tokens); + FREE(brw_vp); +} + + +static void brw_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Clip = *clip; +} + + +static void brw_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Viewport = *viewport; /* struct copy */ + brw->state.dirty.brw |= BRW_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + //draw_set_viewport_state(brw->draw, viewport); +} + + +static void brw_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct brw_context *brw = brw_context(pipe); + memcpy(brw->vb.vbo_array, buffers, count * sizeof(buffers[0])); +} + +static void brw_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) +{ + /* flush ? */ + struct brw_context *brw = brw_context(pipe); + uint i; + + assert(count <= PIPE_MAX_ATTRIBS); + + for (i = 0; i < count; i++) { + struct brw_vertex_element_state el; + memset(&el, 0, sizeof(el)); + + el.ve0.src_offset = elements[i].src_offset; + el.ve0.src_format = brw_translate_surface_format(elements[i].src_format); + el.ve0.valid = 1; + el.ve0.vertex_buffer_index = elements[i].vertex_buffer_index; + + el.ve1.dst_offset = i * 4; + + el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; + + switch (elements[i].nr_components) { + case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; + case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; + case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; + break; + } + + brw->vb.inputs[i] = el; + } +} + + + +/************************************************************************ + * Constant buffers + */ + +static void brw_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct brw_context *brw = brw_context(pipe); + + assert(buf == 0 || index == 0); + + brw->attribs.Constants[shader] = buf; + brw->state.dirty.brw |= BRW_NEW_CONSTANTS; +} + + +/************************************************************************ + * Texture surfaces + */ + + +static void brw_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct brw_context *brw = brw_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == brw->num_textures && + !memcmp(brw->attribs.Texture, texture, num * + sizeof(struct pipe_texture *))) + return; + + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], + texture[i]); + + for (i = num; i < brw->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], + NULL); + + brw->num_textures = num; + + brw->state.dirty.brw |= BRW_NEW_TEXTURE; +} + + +/************************************************************************ + * Render targets, etc + */ + +static void brw_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FrameBuffer = *fb; /* struct copy */ + + brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; +} + + + +/************************************************************************ + * Rasterizer state + */ + +static void * +brw_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + DUP(pipe_rasterizer_state, rasterizer); +} + +static void brw_bind_rasterizer_state( struct pipe_context *pipe, + void *setup ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; + + /* Also pass-through to draw module: + */ + //draw_set_rasterizer_state(brw->draw, setup); + + brw->state.dirty.brw |= BRW_NEW_RASTERIZER; +} + +static void brw_delete_rasterizer_state(struct pipe_context *pipe, + void *setup) +{ + free(setup); +} + + + +void +brw_init_state_functions( struct brw_context *brw ) +{ + brw->pipe.create_blend_state = brw_create_blend_state; + brw->pipe.bind_blend_state = brw_bind_blend_state; + brw->pipe.delete_blend_state = brw_delete_blend_state; + + brw->pipe.create_sampler_state = brw_create_sampler_state; + brw->pipe.bind_sampler_states = brw_bind_sampler_states; + brw->pipe.delete_sampler_state = brw_delete_sampler_state; + + brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; + brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; + brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; + + brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; + brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; + brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; + brw->pipe.create_fs_state = brw_create_fs_state; + brw->pipe.bind_fs_state = brw_bind_fs_state; + brw->pipe.delete_fs_state = brw_delete_fs_state; + brw->pipe.create_vs_state = brw_create_vs_state; + brw->pipe.bind_vs_state = brw_bind_vs_state; + brw->pipe.delete_vs_state = brw_delete_vs_state; + + brw->pipe.set_blend_color = brw_set_blend_color; + brw->pipe.set_clip_state = brw_set_clip_state; + brw->pipe.set_constant_buffer = brw_set_constant_buffer; + brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; + +// brw->pipe.set_feedback_state = brw_set_feedback_state; +// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; + + brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; + brw->pipe.set_scissor_state = brw_set_scissor_state; + brw->pipe.set_sampler_textures = brw_set_sampler_textures; + brw->pipe.set_viewport_state = brw_set_viewport_state; + brw->pipe.set_vertex_buffers = brw_set_vertex_buffers; + brw->pipe.set_vertex_elements = brw_set_vertex_elements; +} diff --git a/src/gallium/drivers/i965simple/brw_state.h b/src/gallium/drivers/i965simple/brw_state.h new file mode 100644 index 0000000000..de0a6371b8 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state.h @@ -0,0 +1,151 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_STATE_H +#define BRW_STATE_H + +#include "brw_context.h" +#include "brw_winsys.h" + + +const struct brw_tracked_state brw_blend_constant_color; +const struct brw_tracked_state brw_cc_unit; +const struct brw_tracked_state brw_cc_vp; +const struct brw_tracked_state brw_clip_prog; +const struct brw_tracked_state brw_clip_unit; +const struct brw_tracked_state brw_constant_buffer_state; +const struct brw_tracked_state brw_constant_buffer; +const struct brw_tracked_state brw_curbe_offsets; +const struct brw_tracked_state brw_invarient_state; +const struct brw_tracked_state brw_gs_prog; +const struct brw_tracked_state brw_gs_unit; +const struct brw_tracked_state brw_drawing_rect; +const struct brw_tracked_state brw_line_stipple; +const struct brw_tracked_state brw_pipelined_state_pointers; +const struct brw_tracked_state brw_binding_table_pointers; +const struct brw_tracked_state brw_depthbuffer; +const struct brw_tracked_state brw_polygon_stipple_offset; +const struct brw_tracked_state brw_polygon_stipple; +const struct brw_tracked_state brw_program_parameters; +const struct brw_tracked_state brw_recalculate_urb_fence; +const struct brw_tracked_state brw_sf_prog; +const struct brw_tracked_state brw_sf_unit; +const struct brw_tracked_state brw_sf_vp; +const struct brw_tracked_state brw_state_base_address; +const struct brw_tracked_state brw_urb_fence; +const struct brw_tracked_state brw_vertex_state; +const struct brw_tracked_state brw_vs_prog; +const struct brw_tracked_state brw_vs_unit; +const struct brw_tracked_state brw_wm_prog; +const struct brw_tracked_state brw_wm_samplers; +const struct brw_tracked_state brw_wm_surfaces; +const struct brw_tracked_state brw_wm_unit; + +const struct brw_tracked_state brw_psp_urb_cbs; + +const struct brw_tracked_state brw_active_vertprog; +const struct brw_tracked_state brw_tnl_vertprog; +const struct brw_tracked_state brw_pipe_control; + +const struct brw_tracked_state brw_clear_surface_cache; +const struct brw_tracked_state brw_clear_batch_cache; + +/*********************************************************************** + * brw_state_cache.c + */ +unsigned brw_cache_data(struct brw_cache *cache, + const void *data ); + +unsigned brw_cache_data_sz(struct brw_cache *cache, + const void *data, + unsigned data_sz); + +unsigned brw_upload_cache( struct brw_cache *cache, + const void *key, + unsigned key_sz, + const void *data, + unsigned data_sz, + const void *aux, + void *aux_return ); + +boolean brw_search_cache( struct brw_cache *cache, + const void *key, + unsigned key_size, + void *aux_return, + unsigned *offset_return); + +void brw_init_caches( struct brw_context *brw ); +void brw_destroy_caches( struct brw_context *brw ); + +static inline struct pipe_buffer *brw_cache_buffer(struct brw_context *brw, + enum brw_cache_id id) +{ + return brw->cache[id].pool->buffer; +} + +/*********************************************************************** + * brw_state_batch.c + */ +#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) + +boolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + unsigned sz ); + +void brw_destroy_batch_cache( struct brw_context *brw ); + + +/*********************************************************************** + * brw_state_pool.c + */ +void brw_init_pools( struct brw_context *brw ); +void brw_destroy_pools( struct brw_context *brw ); + +boolean brw_pool_alloc( struct brw_mem_pool *pool, + unsigned size, + unsigned alignment, + unsigned *offset_return); + +void brw_pool_fence( struct brw_context *brw, + struct brw_mem_pool *pool, + unsigned fence ); + + +void brw_pool_check_wrap( struct brw_context *brw, + struct brw_mem_pool *pool ); + +void brw_clear_all_caches( struct brw_context *brw ); +void brw_invalidate_pools( struct brw_context *brw ); +void brw_clear_batch_cache_flush( struct brw_context *brw ); + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_state_batch.c b/src/gallium/drivers/i965simple/brw_state_batch.c new file mode 100644 index 0000000000..43a1c89fc4 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_batch.c @@ -0,0 +1,113 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_state.h" +#include "brw_winsys.h" + +#include "util/u_memory.h" + +/* A facility similar to the data caching code above, which aims to + * prevent identical commands being issued repeatedly. + */ +boolean brw_cached_batch_struct( struct brw_context *brw, + const void *data, + unsigned sz ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + struct header *newheader = (struct header *)data; + + if (brw->emit_state_always) { + brw_batchbuffer_data(brw->winsys, data, sz); + return TRUE; + } + + while (item) { + if (item->header->opcode == newheader->opcode) { + if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) + return FALSE; + if (item->sz != sz) { + FREE(item->header); + item->header = MALLOC(sz); + item->sz = sz; + } + goto emit; + } + item = item->next; + } + + assert(!item); + item = CALLOC_STRUCT(brw_cached_batch_item); + item->header = MALLOC(sz); + item->sz = sz; + item->next = brw->cached_batch_items; + brw->cached_batch_items = item; + +emit: + memcpy(item->header, newheader, sz); + brw_batchbuffer_data(brw->winsys, data, sz); + return TRUE; +} + +static void clear_batch_cache( struct brw_context *brw ) +{ + struct brw_cached_batch_item *item = brw->cached_batch_items; + + while (item) { + struct brw_cached_batch_item *next = item->next; + free((void *)item->header); + free(item); + item = next; + } + + brw->cached_batch_items = NULL; + + + brw_clear_all_caches(brw); + + brw_invalidate_pools(brw); +} + +void brw_clear_batch_cache_flush( struct brw_context *brw ) +{ + clear_batch_cache(brw); + +/* brw_do_flush(brw, BRW_FLUSH_STATE_CACHE|BRW_FLUSH_READ_CACHE); */ + + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; +} + + + +void brw_destroy_batch_cache( struct brw_context *brw ) +{ + clear_batch_cache(brw); +} diff --git a/src/gallium/drivers/i965simple/brw_state_cache.c b/src/gallium/drivers/i965simple/brw_state_cache.c new file mode 100644 index 0000000000..094248fa69 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_cache.c @@ -0,0 +1,443 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_state.h" + +#include "brw_wm.h" +#include "brw_vs.h" +#include "brw_clip.h" +#include "brw_sf.h" +#include "brw_gs.h" + +#include "util/u_memory.h" + + + +/*********************************************************************** + * Check cache for uploaded version of struct, else upload new one. + * Fail when memory is exhausted. + * + * XXX: FIXME: Currently search is so slow it would be quicker to + * regenerate the data every time... + */ + +static unsigned hash_key( const void *key, unsigned key_size ) +{ + unsigned *ikey = (unsigned *)key; + unsigned hash = 0, i; + + assert(key_size % 4 == 0); + + /* I'm sure this can be improved on: + */ + for (i = 0; i < key_size/4; i++) + hash ^= ikey[i]; + + return hash; +} + +static struct brw_cache_item *search_cache( struct brw_cache *cache, + unsigned hash, + const void *key, + unsigned key_size) +{ + struct brw_cache_item *c; + + for (c = cache->items[hash % cache->size]; c; c = c->next) { + if (c->hash == hash && + c->key_size == key_size && + memcmp(c->key, key, key_size) == 0) + return c; + } + + return NULL; +} + + +static void rehash( struct brw_cache *cache ) +{ + struct brw_cache_item **items; + struct brw_cache_item *c, *next; + unsigned size, i; + + size = cache->size * 3; + items = (struct brw_cache_item**) MALLOC(size * sizeof(*items)); + memset(items, 0, size * sizeof(*items)); + + for (i = 0; i < cache->size; i++) + for (c = cache->items[i]; c; c = next) { + next = c->next; + c->next = items[c->hash % size]; + items[c->hash % size] = c; + } + + FREE(cache->items); + cache->items = items; + cache->size = size; +} + + +boolean brw_search_cache( struct brw_cache *cache, + const void *key, + unsigned key_size, + void *aux_return, + unsigned *offset_return) +{ + struct brw_cache_item *item; + unsigned addr = 0; + unsigned hash = hash_key(key, key_size); + + item = search_cache(cache, hash, key, key_size); + + if (item) { + if (aux_return) + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + + *offset_return = addr = item->offset; + } + + if (item == NULL || addr != cache->last_addr) { + cache->brw->state.dirty.cache |= 1<<cache->id; + cache->last_addr = addr; + } + + return item != NULL; +} + +unsigned brw_upload_cache( struct brw_cache *cache, + const void *key, + unsigned key_size, + const void *data, + unsigned data_size, + const void *aux, + void *aux_return ) +{ + unsigned offset; + struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); + unsigned hash = hash_key(key, key_size); + void *tmp = MALLOC(key_size + cache->aux_size); + + if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) { + /* Should not be possible: + */ + debug_printf("brw_pool_alloc failed\n"); + exit(1); + } + + memcpy(tmp, key, key_size); + + if (cache->aux_size) + memcpy(tmp+key_size, aux, cache->aux_size); + + item->key = tmp; + item->hash = hash; + item->key_size = key_size; + item->offset = offset; + item->data_size = data_size; + + if (++cache->n_items > cache->size * 1.5) + rehash(cache); + + hash %= cache->size; + item->next = cache->items[hash]; + cache->items[hash] = item; + + if (aux_return) { + assert(cache->aux_size); + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + } + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("upload %s: %d bytes to pool buffer %p offset %x\n", + cache->name, + data_size, + (void*)cache->pool->buffer, + offset); + + /* Copy data to the buffer: + */ + cache->brw->winsys->buffer_subdata_typed(cache->brw->winsys, + cache->pool->buffer, + offset, + data_size, + data, + cache->id); + + cache->brw->state.dirty.cache |= 1<<cache->id; + cache->last_addr = offset; + + return offset; +} + +/* This doesn't really work with aux data. Use search/upload instead + */ +unsigned brw_cache_data_sz(struct brw_cache *cache, + const void *data, + unsigned data_size) +{ + unsigned addr; + + if (!brw_search_cache(cache, data, data_size, NULL, &addr)) { + addr = brw_upload_cache(cache, + data, data_size, + data, data_size, + NULL, NULL); + } + + return addr; +} + +unsigned brw_cache_data(struct brw_cache *cache, + const void *data) +{ + return brw_cache_data_sz(cache, data, cache->key_size); +} + +enum pool_type { + DW_SURFACE_STATE, + DW_GENERAL_STATE +}; + +static void brw_init_cache( struct brw_context *brw, + const char *name, + unsigned id, + unsigned key_size, + unsigned aux_size, + enum pool_type pool_type) +{ + struct brw_cache *cache = &brw->cache[id]; + cache->brw = brw; + cache->id = id; + cache->name = name; + cache->items = NULL; + + cache->size = 7; + cache->n_items = 0; + cache->items = (struct brw_cache_item **) + CALLOC(cache->size, sizeof(struct brw_cache_item)); + + + cache->key_size = key_size; + cache->aux_size = aux_size; + switch (pool_type) { + case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break; + case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break; + default: assert(0); break; + } +} + +void brw_init_caches( struct brw_context *brw ) +{ + + brw_init_cache(brw, + "CC_VP", + BRW_CC_VP, + sizeof(struct brw_cc_viewport), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "CC_UNIT", + BRW_CC_UNIT, + sizeof(struct brw_cc_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "WM_PROG", + BRW_WM_PROG, + sizeof(struct brw_wm_prog_key), + sizeof(struct brw_wm_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SAMPLER_DEFAULT_COLOR", + BRW_SAMPLER_DEFAULT_COLOR, + sizeof(struct brw_sampler_default_color), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SAMPLER", + BRW_SAMPLER, + 0, /* variable key/data size */ + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "WM_UNIT", + BRW_WM_UNIT, + sizeof(struct brw_wm_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SF_PROG", + BRW_SF_PROG, + sizeof(struct brw_sf_prog_key), + sizeof(struct brw_sf_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SF_VP", + BRW_SF_VP, + sizeof(struct brw_sf_viewport), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SF_UNIT", + BRW_SF_UNIT, + sizeof(struct brw_sf_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "VS_UNIT", + BRW_VS_UNIT, + sizeof(struct brw_vs_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "VS_PROG", + BRW_VS_PROG, + sizeof(struct brw_vs_prog_key), + sizeof(struct brw_vs_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "CLIP_UNIT", + BRW_CLIP_UNIT, + sizeof(struct brw_clip_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "CLIP_PROG", + BRW_CLIP_PROG, + sizeof(struct brw_clip_prog_key), + sizeof(struct brw_clip_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "GS_UNIT", + BRW_GS_UNIT, + sizeof(struct brw_gs_unit_state), + 0, + DW_GENERAL_STATE); + + brw_init_cache(brw, + "GS_PROG", + BRW_GS_PROG, + sizeof(struct brw_gs_prog_key), + sizeof(struct brw_gs_prog_data), + DW_GENERAL_STATE); + + brw_init_cache(brw, + "SS_SURFACE", + BRW_SS_SURFACE, + sizeof(struct brw_surface_state), + 0, + DW_SURFACE_STATE); + + brw_init_cache(brw, + "SS_SURF_BIND", + BRW_SS_SURF_BIND, + sizeof(struct brw_surface_binding_table), + 0, + DW_SURFACE_STATE); +} + + +/* When we lose hardware context, need to invalidate the surface cache + * as these structs must be explicitly re-uploaded. They are subject + * to fixup by the memory manager as they contain absolute agp + * offsets, so we need to ensure there is a fresh version of the + * struct available to receive the fixup. + * + * XXX: Need to ensure that there aren't two versions of a surface or + * bufferobj with different backing data active in the same buffer at + * once? Otherwise the cache could confuse them. Maybe better not to + * cache at all? + * + * --> Isn't this the same as saying need to ensure batch is flushed + * before new data is uploaded to an existing buffer? We + * already try to make sure of that. + */ +static void clear_cache( struct brw_cache *cache ) +{ + struct brw_cache_item *c, *next; + unsigned i; + + for (i = 0; i < cache->size; i++) { + for (c = cache->items[i]; c; c = next) { + next = c->next; + free((void *)c->key); + free(c); + } + cache->items[i] = NULL; + } + + cache->n_items = 0; +} + +void brw_clear_all_caches( struct brw_context *brw ) +{ + int i; + + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("%s\n", __FUNCTION__); + + for (i = 0; i < BRW_MAX_CACHE; i++) + clear_cache(&brw->cache[i]); + + if (brw->curbe.last_buf) { + FREE(brw->curbe.last_buf); + brw->curbe.last_buf = NULL; + } + + brw->state.dirty.brw |= ~0; + brw->state.dirty.cache |= ~0; +} + + + + + +void brw_destroy_caches( struct brw_context *brw ) +{ + unsigned i; + + for (i = 0; i < BRW_MAX_CACHE; i++) + clear_cache(&brw->cache[i]); +} diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c new file mode 100644 index 0000000000..007dc8f9de --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_pool.c @@ -0,0 +1,138 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +/** @file brw_state_pool.c + * Implements the state pool allocator. + * + * For the 965, we create two state pools for state cache entries. Objects + * will be allocated into the pools depending on which state base address + * their pointer is relative to in other 965 state. + * + * The state pools are relatively simple: As objects are allocated, increment + * the offset to allocate space. When the pool is "full" (rather, close to + * full), we reset the pool and reset the state cache entries that point into + * the pool. + */ + +#include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "brw_context.h" +#include "brw_state.h" + +boolean brw_pool_alloc( struct brw_mem_pool *pool, + unsigned size, + unsigned alignment, + unsigned *offset_return) +{ + unsigned fixup = align(pool->offset, alignment) - pool->offset; + + size = align(size, 4); + + if (pool->offset + fixup + size >= pool->size) { + debug_printf("%s failed\n", __FUNCTION__); + assert(0); + exit(0); + } + + pool->offset += fixup; + *offset_return = pool->offset; + pool->offset += size; + + return TRUE; +} + +static +void brw_invalidate_pool( struct brw_mem_pool *pool ) +{ + if (BRW_DEBUG & DEBUG_STATE) + debug_printf("\n\n\n %s \n\n\n", __FUNCTION__); + + pool->offset = 0; + + brw_clear_all_caches(pool->brw); +} + + +static void brw_init_pool( struct brw_context *brw, + unsigned pool_id, + unsigned size ) +{ + struct brw_mem_pool *pool = &brw->pool[pool_id]; + + pool->size = size; + pool->brw = brw; + + pool->buffer = pipe_buffer_create(brw->pipe.screen, + 4096, + 0 /* DRM_BO_FLAG_MEM_TT */, + size); +} + +static void brw_destroy_pool( struct brw_context *brw, + unsigned pool_id ) +{ + struct brw_mem_pool *pool = &brw->pool[pool_id]; + + pipe_buffer_reference( pool->brw->pipe.screen, + &pool->buffer, + NULL ); +} + + +void brw_pool_check_wrap( struct brw_context *brw, + struct brw_mem_pool *pool ) +{ + if (pool->offset > (pool->size * 3) / 4) { + brw->state.dirty.brw |= BRW_NEW_SCENE; + } + +} + +void brw_init_pools( struct brw_context *brw ) +{ + brw_init_pool(brw, BRW_GS_POOL, 0x80000); + brw_init_pool(brw, BRW_SS_POOL, 0x80000); +} + +void brw_destroy_pools( struct brw_context *brw ) +{ + brw_destroy_pool(brw, BRW_GS_POOL); + brw_destroy_pool(brw, BRW_SS_POOL); +} + + +void brw_invalidate_pools( struct brw_context *brw ) +{ + brw_invalidate_pool(&brw->pool[BRW_GS_POOL]); + brw_invalidate_pool(&brw->pool[BRW_SS_POOL]); +} diff --git a/src/gallium/drivers/i965simple/brw_state_upload.c b/src/gallium/drivers/i965simple/brw_state_upload.c new file mode 100644 index 0000000000..bac9161b5f --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_state_upload.c @@ -0,0 +1,202 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" + +#include "util/u_memory.h" + +/* This is used to initialize brw->state.atoms[]. We could use this + * list directly except for a single atom, brw_constant_buffer, which + * has a .dirty value which changes according to the parameters of the + * current fragment and vertex programs, and so cannot be a static + * value. + */ +const struct brw_tracked_state *atoms[] = +{ + &brw_vs_prog, + &brw_gs_prog, + &brw_clip_prog, + &brw_sf_prog, + &brw_wm_prog, + + /* Once all the programs are done, we know how large urb entry + * sizes need to be and can decide if we need to change the urb + * layout. + */ + &brw_curbe_offsets, + &brw_recalculate_urb_fence, + + + &brw_cc_vp, + &brw_cc_unit, + + &brw_wm_surfaces, /* must do before samplers */ + &brw_wm_samplers, + + &brw_wm_unit, + &brw_sf_vp, + &brw_sf_unit, + &brw_vs_unit, /* always required, enabled or not */ + &brw_clip_unit, + &brw_gs_unit, + + /* Command packets: + */ + &brw_invarient_state, + &brw_state_base_address, + &brw_pipe_control, + + &brw_binding_table_pointers, + &brw_blend_constant_color, + + &brw_drawing_rect, + &brw_depthbuffer, + + &brw_polygon_stipple, + &brw_line_stipple, + + &brw_psp_urb_cbs, + + &brw_constant_buffer +}; + + +void brw_init_state( struct brw_context *brw ) +{ + brw_init_pools(brw); + brw_init_caches(brw); + + brw->state.dirty.brw = ~0; + brw->emit_state_always = 0; +} + + +void brw_destroy_state( struct brw_context *brw ) +{ + brw_destroy_caches(brw); + brw_destroy_batch_cache(brw); + brw_destroy_pools(brw); +} + +/*********************************************************************** + */ + +static boolean check_state( const struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + return ((a->brw & b->brw) || + (a->cache & b->cache)); +} + +static void accumulate_state( struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + a->brw |= b->brw; + a->cache |= b->cache; +} + + +static void xor_states( struct brw_state_flags *result, + const struct brw_state_flags *a, + const struct brw_state_flags *b ) +{ + result->brw = a->brw ^ b->brw; + result->cache = a->cache ^ b->cache; +} + + +/*********************************************************************** + * Emit all state: + */ +void brw_validate_state( struct brw_context *brw ) +{ + struct brw_state_flags *state = &brw->state.dirty; + unsigned i; + + if (brw->emit_state_always) + state->brw |= ~0; + + if (state->cache == 0 && + state->brw == 0) + return; + + if (brw->state.dirty.brw & BRW_NEW_SCENE) + brw_clear_batch_cache_flush(brw); + + if (BRW_DEBUG) { + /* Debug version which enforces various sanity checks on the + * state flags which are generated and checked to help ensure + * state atoms are ordered correctly in the list. + */ + struct brw_state_flags examined, prev; + memset(&examined, 0, sizeof(examined)); + prev = *state; + + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; + struct brw_state_flags generated; + + assert(atom->dirty.brw || + atom->dirty.cache); + assert(atom->update); + + if (check_state(state, &atom->dirty)) { + atom->update( brw ); + } + + accumulate_state(&examined, &atom->dirty); + + /* generated = (prev ^ state) + * if (examined & generated) + * fail; + */ + xor_states(&generated, &prev, state); + assert(!check_state(&examined, &generated)); + prev = *state; + } + } + else { + for (i = 0; i < Elements(atoms); i++) { + const struct brw_tracked_state *atom = atoms[i]; + + assert(atom->dirty.brw || + atom->dirty.cache); + assert(atom->update); + + if (check_state(state, &atom->dirty)) + atom->update( brw ); + } + } + + memset(state, 0, sizeof(*state)); +} diff --git a/src/gallium/drivers/i965simple/brw_structs.h b/src/gallium/drivers/i965simple/brw_structs.h new file mode 100644 index 0000000000..bbb087e95d --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_structs.h @@ -0,0 +1,1348 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_STRUCTS_H +#define BRW_STRUCTS_H + +#include "pipe/p_compiler.h" + +/* Command packets: + */ +struct header +{ + unsigned length:16; + unsigned opcode:16; +}; + + +union header_union +{ + struct header bits; + unsigned dword; +}; + +struct brw_3d_control +{ + struct + { + unsigned length:8; + unsigned notify_enable:1; + unsigned pad:3; + unsigned wc_flush_enable:1; + unsigned depth_stall_enable:1; + unsigned operation:2; + unsigned opcode:16; + } header; + + struct + { + unsigned pad:2; + unsigned dest_addr_type:1; + unsigned dest_addr:29; + } dest; + + unsigned dword2; + unsigned dword3; +}; + + +struct brw_3d_primitive +{ + struct + { + unsigned length:8; + unsigned pad:2; + unsigned topology:5; + unsigned indexed:1; + unsigned opcode:16; + } header; + + unsigned verts_per_instance; + unsigned start_vert_location; + unsigned instance_count; + unsigned start_instance_location; + unsigned base_vert_location; +}; + +/* These seem to be passed around as function args, so it works out + * better to keep them as #defines: + */ +#define BRW_FLUSH_READ_CACHE 0x1 +#define BRW_FLUSH_STATE_CACHE 0x2 +#define BRW_INHIBIT_FLUSH_RENDER_CACHE 0x4 +#define BRW_FLUSH_SNAPSHOT_COUNTERS 0x8 + +struct brw_mi_flush +{ + unsigned flags:4; + unsigned pad:12; + unsigned opcode:16; +}; + +struct brw_vf_statistics +{ + unsigned statistics_enable:1; + unsigned pad:15; + unsigned opcode:16; +}; + + + +struct brw_binding_table_pointers +{ + struct header header; + unsigned vs; + unsigned gs; + unsigned clp; + unsigned sf; + unsigned wm; +}; + + +struct brw_blend_constant_color +{ + struct header header; + float blend_constant_color[4]; +}; + + +struct brw_depthbuffer +{ + union header_union header; + + union { + struct { + unsigned pitch:18; + unsigned format:3; + unsigned pad:4; + unsigned depth_offset_disable:1; + unsigned tile_walk:1; + unsigned tiled_surface:1; + unsigned pad2:1; + unsigned surface_type:3; + } bits; + unsigned dword; + } dword1; + + unsigned dword2_base_addr; + + union { + struct { + unsigned pad:1; + unsigned mipmap_layout:1; + unsigned lod:4; + unsigned width:13; + unsigned height:13; + } bits; + unsigned dword; + } dword3; + + union { + struct { + unsigned pad:12; + unsigned min_array_element:9; + unsigned depth:11; + } bits; + unsigned dword; + } dword4; +}; + +struct brw_drawrect +{ + struct header header; + unsigned xmin:16; + unsigned ymin:16; + unsigned xmax:16; + unsigned ymax:16; + unsigned xorg:16; + unsigned yorg:16; +}; + + + + +struct brw_global_depth_offset_clamp +{ + struct header header; + float depth_offset_clamp; +}; + +struct brw_indexbuffer +{ + union { + struct + { + unsigned length:8; + unsigned index_format:2; + unsigned cut_index_enable:1; + unsigned pad:5; + unsigned opcode:16; + } bits; + unsigned dword; + + } header; + + unsigned buffer_start; + unsigned buffer_end; +}; + + +struct brw_line_stipple +{ + struct header header; + + struct + { + unsigned pattern:16; + unsigned pad:16; + } bits0; + + struct + { + unsigned repeat_count:9; + unsigned pad:7; + unsigned inverse_repeat_count:16; + } bits1; +}; + + +struct brw_pipelined_state_pointers +{ + struct header header; + + struct { + unsigned pad:5; + unsigned offset:27; + } vs; + + struct + { + unsigned enable:1; + unsigned pad:4; + unsigned offset:27; + } gs; + + struct + { + unsigned enable:1; + unsigned pad:4; + unsigned offset:27; + } clp; + + struct + { + unsigned pad:5; + unsigned offset:27; + } sf; + + struct + { + unsigned pad:5; + unsigned offset:27; + } wm; + + struct + { + unsigned pad:5; + unsigned offset:27; /* KW: check me! */ + } cc; +}; + + +struct brw_polygon_stipple_offset +{ + struct header header; + + struct { + unsigned y_offset:5; + unsigned pad:3; + unsigned x_offset:5; + unsigned pad0:19; + } bits0; +}; + + + +struct brw_polygon_stipple +{ + struct header header; + unsigned stipple[32]; +}; + + + +struct brw_pipeline_select +{ + struct + { + unsigned pipeline_select:1; + unsigned pad:15; + unsigned opcode:16; + } header; +}; + + +struct brw_pipe_control +{ + struct + { + unsigned length:8; + unsigned notify_enable:1; + unsigned pad:2; + unsigned instruction_state_cache_flush_enable:1; + unsigned write_cache_flush_enable:1; + unsigned depth_stall_enable:1; + unsigned post_sync_operation:2; + + unsigned opcode:16; + } header; + + struct + { + unsigned pad:2; + unsigned dest_addr_type:1; + unsigned dest_addr:29; + } bits1; + + unsigned data0; + unsigned data1; +}; + + +struct brw_urb_fence +{ + struct + { + unsigned length:8; + unsigned vs_realloc:1; + unsigned gs_realloc:1; + unsigned clp_realloc:1; + unsigned sf_realloc:1; + unsigned vfe_realloc:1; + unsigned cs_realloc:1; + unsigned pad:2; + unsigned opcode:16; + } header; + + struct + { + unsigned vs_fence:10; + unsigned gs_fence:10; + unsigned clp_fence:10; + unsigned pad:2; + } bits0; + + struct + { + unsigned sf_fence:10; + unsigned vf_fence:10; + unsigned cs_fence:10; + unsigned pad:2; + } bits1; +}; + +struct brw_constant_buffer_state /* previously brw_command_streamer */ +{ + struct header header; + + struct + { + unsigned nr_urb_entries:3; + unsigned pad:1; + unsigned urb_entry_size:5; + unsigned pad0:23; + } bits0; +}; + +struct brw_constant_buffer +{ + struct + { + unsigned length:8; + unsigned valid:1; + unsigned pad:7; + unsigned opcode:16; + } header; + + struct + { + unsigned buffer_length:6; + unsigned buffer_address:26; + } bits0; +}; + +struct brw_state_base_address +{ + struct header header; + + struct + { + unsigned modify_enable:1; + unsigned pad:4; + unsigned general_state_address:27; + } bits0; + + struct + { + unsigned modify_enable:1; + unsigned pad:4; + unsigned surface_state_address:27; + } bits1; + + struct + { + unsigned modify_enable:1; + unsigned pad:4; + unsigned indirect_object_state_address:27; + } bits2; + + struct + { + unsigned modify_enable:1; + unsigned pad:11; + unsigned general_state_upper_bound:20; + } bits3; + + struct + { + unsigned modify_enable:1; + unsigned pad:11; + unsigned indirect_object_state_upper_bound:20; + } bits4; +}; + +struct brw_state_prefetch +{ + struct header header; + + struct + { + unsigned prefetch_count:3; + unsigned pad:3; + unsigned prefetch_pointer:26; + } bits0; +}; + +struct brw_system_instruction_pointer +{ + struct header header; + + struct + { + unsigned pad:4; + unsigned system_instruction_pointer:28; + } bits0; +}; + + + + +/* State structs for the various fixed function units: + */ + + +struct thread0 +{ + unsigned pad0:1; + unsigned grf_reg_count:3; + unsigned pad1:2; + unsigned kernel_start_pointer:26; +}; + +struct thread1 +{ + unsigned ext_halt_exception_enable:1; + unsigned sw_exception_enable:1; + unsigned mask_stack_exception_enable:1; + unsigned timeout_exception_enable:1; + unsigned illegal_op_exception_enable:1; + unsigned pad0:3; + unsigned depth_coef_urb_read_offset:6; /* WM only */ + unsigned pad1:2; + unsigned floating_point_mode:1; + unsigned thread_priority:1; + unsigned binding_table_entry_count:8; + unsigned pad3:5; + unsigned single_program_flow:1; +}; + +struct thread2 +{ + unsigned per_thread_scratch_space:4; + unsigned pad0:6; + unsigned scratch_space_base_pointer:22; +}; + + +struct thread3 +{ + unsigned dispatch_grf_start_reg:4; + unsigned urb_entry_read_offset:6; + unsigned pad0:1; + unsigned urb_entry_read_length:6; + unsigned pad1:1; + unsigned const_urb_entry_read_offset:6; + unsigned pad2:1; + unsigned const_urb_entry_read_length:6; + unsigned pad3:1; +}; + + + +struct brw_clip_unit_state +{ + struct thread0 thread0; + struct + { + unsigned pad0:7; + unsigned sw_exception_enable:1; + unsigned pad1:3; + unsigned mask_stack_exception_enable:1; + unsigned pad2:1; + unsigned illegal_op_exception_enable:1; + unsigned pad3:2; + unsigned floating_point_mode:1; + unsigned thread_priority:1; + unsigned binding_table_entry_count:8; + unsigned pad4:5; + unsigned single_program_flow:1; + } thread1; + + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:9; + unsigned gs_output_stats:1; /* not always */ + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:1; /* may be less */ + unsigned pad3:6; + } thread4; + + struct + { + unsigned pad0:13; + unsigned clip_mode:3; + unsigned userclip_enable_flags:8; + unsigned userclip_must_clip:1; + unsigned pad1:1; + unsigned guard_band_enable:1; + unsigned viewport_z_clip_enable:1; + unsigned viewport_xy_clip_enable:1; + unsigned vertex_position_space:1; + unsigned api_mode:1; + unsigned pad2:1; + } clip5; + + struct + { + unsigned pad0:5; + unsigned clipper_viewport_state_ptr:27; + } clip6; + + + float viewport_xmin; + float viewport_xmax; + float viewport_ymin; + float viewport_ymax; +}; + + + +struct brw_cc_unit_state +{ + struct + { + unsigned pad0:3; + unsigned bf_stencil_pass_depth_pass_op:3; + unsigned bf_stencil_pass_depth_fail_op:3; + unsigned bf_stencil_fail_op:3; + unsigned bf_stencil_func:3; + unsigned bf_stencil_enable:1; + unsigned pad1:2; + unsigned stencil_write_enable:1; + unsigned stencil_pass_depth_pass_op:3; + unsigned stencil_pass_depth_fail_op:3; + unsigned stencil_fail_op:3; + unsigned stencil_func:3; + unsigned stencil_enable:1; + } cc0; + + + struct + { + unsigned bf_stencil_ref:8; + unsigned stencil_write_mask:8; + unsigned stencil_test_mask:8; + unsigned stencil_ref:8; + } cc1; + + + struct + { + unsigned logicop_enable:1; + unsigned pad0:10; + unsigned depth_write_enable:1; + unsigned depth_test_function:3; + unsigned depth_test:1; + unsigned bf_stencil_write_mask:8; + unsigned bf_stencil_test_mask:8; + } cc2; + + + struct + { + unsigned pad0:8; + unsigned alpha_test_func:3; + unsigned alpha_test:1; + unsigned blend_enable:1; + unsigned ia_blend_enable:1; + unsigned pad1:1; + unsigned alpha_test_format:1; + unsigned pad2:16; + } cc3; + + struct + { + unsigned pad0:5; + unsigned cc_viewport_state_offset:27; + } cc4; + + struct + { + unsigned pad0:2; + unsigned ia_dest_blend_factor:5; + unsigned ia_src_blend_factor:5; + unsigned ia_blend_function:3; + unsigned statistics_enable:1; + unsigned logicop_func:4; + unsigned pad1:11; + unsigned dither_enable:1; + } cc5; + + struct + { + unsigned clamp_post_alpha_blend:1; + unsigned clamp_pre_alpha_blend:1; + unsigned clamp_range:2; + unsigned pad0:11; + unsigned y_dither_offset:2; + unsigned x_dither_offset:2; + unsigned dest_blend_factor:5; + unsigned src_blend_factor:5; + unsigned blend_function:3; + } cc6; + + struct { + union { + float f; + ubyte ub[4]; + } alpha_ref; + } cc7; +}; + + + +struct brw_sf_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:10; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:6; + unsigned pad3:1; + } thread4; + + struct + { + unsigned front_winding:1; + unsigned viewport_transform:1; + unsigned pad0:3; + unsigned sf_viewport_state_offset:27; + } sf5; + + struct + { + unsigned pad0:9; + unsigned dest_org_vbias:4; + unsigned dest_org_hbias:4; + unsigned scissor:1; + unsigned disable_2x2_trifilter:1; + unsigned disable_zero_pix_trifilter:1; + unsigned point_rast_rule:2; + unsigned line_endcap_aa_region_width:2; + unsigned line_width:4; + unsigned fast_scissor_disable:1; + unsigned cull_mode:2; + unsigned aa_enable:1; + } sf6; + + struct + { + unsigned point_size:11; + unsigned use_point_size_state:1; + unsigned subpixel_precision:1; + unsigned sprite_point:1; + unsigned pad0:11; + unsigned trifan_pv:2; + unsigned linestrip_pv:2; + unsigned tristrip_pv:2; + unsigned line_last_pixel_enable:1; + } sf7; + +}; + + +struct brw_gs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:10; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:1; + unsigned pad3:6; + } thread4; + + struct + { + unsigned sampler_count:3; + unsigned pad0:2; + unsigned sampler_state_pointer:27; + } gs5; + + + struct + { + unsigned max_vp_index:4; + unsigned pad0:26; + unsigned reorder_enable:1; + unsigned pad1:1; + } gs6; +}; + + +struct brw_vs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned pad0:10; + unsigned stats_enable:1; + unsigned nr_urb_entries:7; + unsigned pad1:1; + unsigned urb_entry_allocation_size:5; + unsigned pad2:1; + unsigned max_threads:4; + unsigned pad3:3; + } thread4; + + struct + { + unsigned sampler_count:3; + unsigned pad0:2; + unsigned sampler_state_pointer:27; + } vs5; + + struct + { + unsigned vs_enable:1; + unsigned vert_cache_disable:1; + unsigned pad0:30; + } vs6; +}; + + +struct brw_wm_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned stats_enable:1; + unsigned pad0:1; + unsigned sampler_count:3; + unsigned sampler_state_pointer:27; + } wm4; + + struct + { + unsigned enable_8_pix:1; + unsigned enable_16_pix:1; + unsigned enable_32_pix:1; + unsigned pad0:7; + unsigned legacy_global_depth_bias:1; + unsigned line_stipple:1; + unsigned depth_offset:1; + unsigned polygon_stipple:1; + unsigned line_aa_region_width:2; + unsigned line_endcap_aa_region_width:2; + unsigned early_depth_test:1; + unsigned thread_dispatch_enable:1; + unsigned program_uses_depth:1; + unsigned program_computes_depth:1; + unsigned program_uses_killpixel:1; + unsigned legacy_line_rast: 1; + unsigned pad1:1; + unsigned max_threads:6; + unsigned pad2:1; + } wm5; + + float global_depth_offset_constant; + float global_depth_offset_scale; +}; + +struct brw_sampler_default_color { + float color[4]; +}; + +struct brw_sampler_state +{ + + struct + { + unsigned shadow_function:3; + unsigned lod_bias:11; + unsigned min_filter:3; + unsigned mag_filter:3; + unsigned mip_filter:2; + unsigned base_level:5; + unsigned pad:1; + unsigned lod_preclamp:1; + unsigned default_color_mode:1; + unsigned pad0:1; + unsigned disable:1; + } ss0; + + struct + { + unsigned r_wrap_mode:3; + unsigned t_wrap_mode:3; + unsigned s_wrap_mode:3; + unsigned pad:3; + unsigned max_lod:10; + unsigned min_lod:10; + } ss1; + + + struct + { + unsigned pad:5; + unsigned default_color_pointer:27; + } ss2; + + struct + { + unsigned pad:19; + unsigned max_aniso:3; + unsigned chroma_key_mode:1; + unsigned chroma_key_index:2; + unsigned chroma_key_enable:1; + unsigned monochrome_filter_width:3; + unsigned monochrome_filter_height:3; + } ss3; +}; + + +struct brw_clipper_viewport +{ + float xmin; + float xmax; + float ymin; + float ymax; +}; + +struct brw_cc_viewport +{ + float min_depth; + float max_depth; +}; + +struct brw_sf_viewport +{ + struct { + float m00; + float m11; + float m22; + float m30; + float m31; + float m32; + } viewport; + + struct { + short xmin; + short ymin; + short xmax; + short ymax; + } scissor; +}; + +/* Documented in the subsystem/shared-functions/sampler chapter... + */ +struct brw_surface_state +{ + struct { + unsigned cube_pos_z:1; + unsigned cube_neg_z:1; + unsigned cube_pos_y:1; + unsigned cube_neg_y:1; + unsigned cube_pos_x:1; + unsigned cube_neg_x:1; + unsigned pad:4; + unsigned mipmap_layout_mode:1; + unsigned vert_line_stride_ofs:1; + unsigned vert_line_stride:1; + unsigned color_blend:1; + unsigned writedisable_blue:1; + unsigned writedisable_green:1; + unsigned writedisable_red:1; + unsigned writedisable_alpha:1; + unsigned surface_format:9; + unsigned data_return_format:1; + unsigned pad0:1; + unsigned surface_type:3; + } ss0; + + struct { + unsigned base_addr; + } ss1; + + struct { + unsigned pad:2; + unsigned mip_count:4; + unsigned width:13; + unsigned height:13; + } ss2; + + struct { + unsigned tile_walk:1; + unsigned tiled_surface:1; + unsigned pad:1; + unsigned pitch:18; + unsigned depth:11; + } ss3; + + struct { + unsigned pad:19; + unsigned min_array_elt:9; + unsigned min_lod:4; + } ss4; +}; + + + +struct brw_vertex_buffer_state +{ + struct { + unsigned pitch:11; + unsigned pad:15; + unsigned access_type:1; + unsigned vb_index:5; + } vb0; + + unsigned start_addr; + unsigned max_index; +#if 1 + unsigned instance_data_step_rate; /* not included for sequential/random vertices? */ +#endif +}; + +#define BRW_VBP_MAX 17 + +struct brw_vb_array_state { + struct header header; + struct brw_vertex_buffer_state vb[BRW_VBP_MAX]; +}; + + +struct brw_vertex_element_state +{ + struct + { + unsigned src_offset:11; + unsigned pad:5; + unsigned src_format:9; + unsigned pad0:1; + unsigned valid:1; + unsigned vertex_buffer_index:5; + } ve0; + + struct + { + unsigned dst_offset:8; + unsigned pad:8; + unsigned vfcomponent3:4; + unsigned vfcomponent2:4; + unsigned vfcomponent1:4; + unsigned vfcomponent0:4; + } ve1; +}; + +#define BRW_VEP_MAX 18 + +struct brw_vertex_element_packet { + struct header header; + struct brw_vertex_element_state ve[BRW_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ +}; + + +struct brw_urb_immediate { + unsigned opcode:4; + unsigned offset:6; + unsigned swizzle_control:2; + unsigned pad:1; + unsigned allocate:1; + unsigned used:1; + unsigned complete:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; +}; + +/* Instruction format for the execution units: + */ + +struct brw_instruction +{ + struct + { + unsigned opcode:7; + unsigned pad:1; + unsigned access_mode:1; + unsigned mask_control:1; + unsigned dependency_control:2; + unsigned compression_control:2; + unsigned thread_control:2; + unsigned predicate_control:4; + unsigned predicate_inverse:1; + unsigned execution_size:3; + unsigned destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + unsigned pad0:2; + unsigned debug_control:1; + unsigned saturate:1; + } header; + + union { + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad:1; + unsigned dest_subreg_nr:5; + unsigned dest_reg_nr:8; + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } da1; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned pad:6; + int dest_indirect_offset:10; /* offset against the deref'd address reg */ + unsigned dest_subreg_nr:3; /* subnr for the address reg a0.x */ + unsigned dest_horiz_stride:2; + unsigned dest_address_mode:1; + } ia1; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned src1_reg_file:2; + unsigned src1_reg_type:3; + unsigned pad0:1; + unsigned dest_writemask:4; + unsigned dest_subreg_nr:1; + unsigned dest_reg_nr:8; + unsigned pad1:2; + unsigned dest_address_mode:1; + } da16; + + struct + { + unsigned dest_reg_file:2; + unsigned dest_reg_type:3; + unsigned src0_reg_file:2; + unsigned src0_reg_type:3; + unsigned pad0:6; + unsigned dest_writemask:4; + int dest_indirect_offset:6; + unsigned dest_subreg_nr:3; + unsigned pad1:2; + unsigned dest_address_mode:1; + } ia16; + } bits1; + + + union { + struct + { + unsigned src0_subreg_nr:5; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad:6; + } da1; + + struct + { + int src0_indirect_offset:10; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_horiz_stride:2; + unsigned src0_width:3; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad:6; + } ia1; + + struct + { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + unsigned src0_subreg_nr:1; + unsigned src0_reg_nr:8; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad1:6; + } da16; + + struct + { + unsigned src0_swz_x:2; + unsigned src0_swz_y:2; + int src0_indirect_offset:6; + unsigned src0_subreg_nr:3; + unsigned src0_abs:1; + unsigned src0_negate:1; + unsigned src0_address_mode:1; + unsigned src0_swz_z:2; + unsigned src0_swz_w:2; + unsigned pad0:1; + unsigned src0_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad1:6; + } ia16; + + } bits2; + + union + { + struct + { + unsigned src1_subreg_nr:5; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned pad0:7; + } da1; + + struct + { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + unsigned src1_subreg_nr:1; + unsigned src1_reg_nr:8; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned pad2:7; + } da16; + + struct + { + int src1_indirect_offset:10; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_horiz_stride:2; + unsigned src1_width:3; + unsigned src1_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad1:6; + } ia1; + + struct + { + unsigned src1_swz_x:2; + unsigned src1_swz_y:2; + int src1_indirect_offset:6; + unsigned src1_subreg_nr:3; + unsigned src1_abs:1; + unsigned src1_negate:1; + unsigned pad0:1; + unsigned src1_swz_z:2; + unsigned src1_swz_w:2; + unsigned pad1:1; + unsigned src1_vert_stride:4; + unsigned flag_reg_nr:1; + unsigned pad2:6; + } ia16; + + + struct + { + int jump_count:16; /* note: signed */ + unsigned pop_count:4; + unsigned pad0:12; + } if_else; + + struct { + unsigned function:4; + unsigned int_type:1; + unsigned precision:1; + unsigned saturate:1; + unsigned data_type:1; + unsigned pad0:8; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } math; + + struct { + unsigned binding_table_index:8; + unsigned sampler:4; + unsigned return_format:2; + unsigned msg_type:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } sampler; + + struct brw_urb_immediate urb; + + struct { + unsigned binding_table_index:8; + unsigned msg_control:4; + unsigned msg_type:2; + unsigned target_cache:2; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_read; + + struct { + unsigned binding_table_index:8; + unsigned msg_control:3; + unsigned pixel_scoreboard_clear:1; + unsigned msg_type:3; + unsigned send_commit_msg:1; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } dp_write; + + struct { + unsigned pad:16; + unsigned response_length:4; + unsigned msg_length:4; + unsigned msg_target:4; + unsigned pad1:3; + unsigned end_of_thread:1; + } generic; + + int d; + unsigned ud; + } bits3; +}; + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c new file mode 100644 index 0000000000..b89756c47b --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -0,0 +1,124 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "brw_blit.h" +#include "brw_context.h" +#include "brw_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_tile.h" +#include "util/u_rect.h" + + + +/* Assumes all values are within bounds -- no checking at this level - + * do it higher up if required. + */ +static void +brw_surface_copy(struct pipe_context *pipe, + boolean do_flip, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + assert( dst != src ); + assert( dst->block.size == src->block.size ); + assert( dst->block.width == src->block.height ); + assert( dst->block.height == src->block.height ); + + if (0) { + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + + const void *src_map = pipe->screen->surface_map( pipe->screen, + src, + PIPE_BUFFER_USAGE_CPU_READ ); + + pipe_copy_rect(dst_map, + &dst->block, + dst->stride, + dstx, dsty, + width, height, + src_map, + do_flip ? -(int) src->stride : src->stride, + srcx, do_flip ? height - 1 - srcy : srcy); + + pipe->screen->surface_unmap(pipe->screen, src); + pipe->screen->surface_unmap(pipe->screen, dst); + } + else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); + brw_copy_blit(brw_context(pipe), + do_flip, + dst->block.size, + (short) src->stride/src->block.size, src->buffer, src->offset, FALSE, + (short) dst->stride/dst->block.size, dst->buffer, dst->offset, FALSE, + (short) srcx, (short) srcy, (short) dstx, (short) dsty, + (short) width, (short) height, PIPE_LOGICOP_COPY); + } +} + + +static void +brw_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + if (0) { + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + + pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); + + pipe->screen->surface_unmap(pipe->screen, dst); + } + else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); + brw_fill_blit(brw_context(pipe), + dst->block.size, + (short) dst->stride/dst->block.size, + dst->buffer, dst->offset, FALSE, + (short) dstx, (short) dsty, + (short) width, (short) height, + value); + } +} + + +void +brw_init_surface_functions(struct brw_context *brw) +{ + brw->pipe.surface_copy = brw_surface_copy; + brw->pipe.surface_fill = brw_surface_fill; +} diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c new file mode 100644 index 0000000000..cc0c665e02 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -0,0 +1,400 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +/* Code to layout images in a mipmap tree for i965. + */ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "brw_context.h" +#include "brw_tex_layout.h" + + +#define FILE_DEBUG_FLAG DEBUG_TEXTURE + +#if 0 +unsigned intel_compressed_alignment(unsigned internalFormat) +{ + unsigned alignment = 4; + + switch (internalFormat) { + case GL_COMPRESSED_RGB_FXT1_3DFX: + case GL_COMPRESSED_RGBA_FXT1_3DFX: + alignment = 8; + break; + + default: + break; + } + + return alignment; +} +#endif + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +static void intel_miptree_set_image_offset(struct brw_texture *tex, + unsigned level, + unsigned img, + unsigned x, unsigned y) +{ + struct pipe_texture *pt = &tex->base; + if (img == 0 && level == 0) + assert(x == 0 && y == 0); + assert(img < tex->nr_images[level]); + + tex->image_offset[level][img] = y * tex->stride + x * pt->block.size; +} + +static void intel_miptree_set_level_info(struct brw_texture *tex, + unsigned level, + unsigned nr_images, + unsigned x, unsigned y, + unsigned w, unsigned h, unsigned d) +{ + struct pipe_texture *pt = &tex->base; + + assert(level < PIPE_MAX_TEXTURE_LEVELS); + + pt->width[level] = w; + pt->height[level] = h; + pt->depth[level] = d; + + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); + + tex->level_offset[level] = y * tex->stride + x * tex->base.block.size; + tex->nr_images[level] = nr_images; + + /* + DBG("%s level %d size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, + level, w, h, d, x, y, tex->level_offset[level]); + */ + + /* Not sure when this would happen, but anyway: + */ + if (tex->image_offset[level]) { + FREE(tex->image_offset[level]); + tex->image_offset[level] = NULL; + } + + assert(nr_images); + assert(!tex->image_offset[level]); + + tex->image_offset[level] = (unsigned *) MALLOC(nr_images * sizeof(unsigned)); + tex->image_offset[level][0] = 0; +} + +static void i945_miptree_layout_2d(struct brw_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + const int align_x = 2, align_y = 4; + unsigned level; + unsigned x = 0; + unsigned y = 0; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + + tex->stride = align(pt->nblocksx[0] * pt->block.size, 4); + + /* May need to adjust pitch to accomodate the placement of + * the 2nd mipmap level. This occurs when the alignment + * constraints of mipmap placement push the right edge of the + * 2nd mipmap level out past the width of its parent. + */ + if (pt->last_level > 0) { + unsigned mip1_nblocksx + = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) + + pf_get_nblocksx(&pt->block, minify(minify(width))); + + if (mip1_nblocksx > nblocksx) + tex->stride = mip1_nblocksx * pt->block.size; + } + + /* Pitch must be a whole number of dwords + */ + tex->stride = align(tex->stride, 64); + tex->total_nblocksy = 0; + + for (level = 0; level <= pt->last_level; level++) { + intel_miptree_set_level_info(tex, level, 1, x, y, width, + height, 1); + + nblocksy = align(nblocksy, align_y); + + /* Because the images are packed better, the final offset + * might not be the maximal one: + */ + tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy); + + /* Layout_below: step right after second mipmap level. + */ + if (level == 1) { + x += align(nblocksx, align_x); + } + else { + y += nblocksy; + } + + width = minify(width); + height = minify(height); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + } +} + +static boolean brw_miptree_layout(struct brw_texture *tex) +{ + struct pipe_texture *pt = &tex->base; + /* XXX: these vary depending on image format: + */ +/* int align_w = 4; */ + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_3D: { + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + unsigned pack_x_pitch, pack_x_nr; + unsigned pack_y_pitch; + unsigned level; + unsigned align_h = 2; + unsigned align_w = 4; + + tex->total_nblocksy = 0; + + tex->stride = align(pt->nblocksx[0], 4); + pack_y_pitch = align(pt->nblocksy[0], align_h); + + pack_x_pitch = tex->stride / pt->block.size; + pack_x_nr = 1; + + for (level = 0; level <= pt->last_level; level++) { + unsigned nr_images = pt->target == PIPE_TEXTURE_3D ? depth : 6; + int x = 0; + int y = 0; + uint q, j; + + intel_miptree_set_level_info(tex, level, nr_images, + 0, tex->total_nblocksy, + width, height, depth); + + for (q = 0; q < nr_images;) { + for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { + intel_miptree_set_image_offset(tex, level, q, x, y); + x += pack_x_pitch; + } + + x = 0; + y += pack_y_pitch; + } + + + tex->total_nblocksy += y; + width = minify(width); + height = minify(height); + depth = minify(depth); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); + + if (pt->compressed) { + pack_y_pitch = (height + 3) / 4; + + if (pack_x_pitch > align(width, align_w)) { + pack_x_pitch = align(width, align_w); + pack_x_nr <<= 1; + } + } else { + if (pack_x_pitch > 4) { + pack_x_pitch >>= 1; + pack_x_nr <<= 1; + assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); + } + + if (pack_y_pitch > 2) { + pack_y_pitch >>= 1; + pack_y_pitch = align(pack_y_pitch, align_h); + } + } + + } + break; + } + + default: + i945_miptree_layout_2d(tex); + break; + } +#if 0 + PRINT("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, + pt->pitch, + pt->total_nblocksy, + pt->block.size, + pt->stride * pt->total_nblocksy ); +#endif + + return TRUE; +} + + +static struct pipe_texture * +brw_texture_create_screen(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct pipe_winsys *ws = screen->winsys; + struct brw_texture *tex = CALLOC_STRUCT(brw_texture); + + if (tex) { + tex->base = *templat; + tex->base.refcount = 1; + + tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); + tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); + + if (brw_miptree_layout(tex)) + tex->buffer = ws->buffer_create(ws, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->stride * + tex->total_nblocksy); + + if (!tex->buffer) { + FREE(tex); + return NULL; + } + } + + return &tex->base; +} + + +static void +brw_texture_release_screen(struct pipe_screen *screen, + struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct pipe_winsys *ws = screen->winsys; + struct brw_texture *tex = (struct brw_texture *)*pt; + uint i; + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); + */ + + winsys_buffer_reference(ws, &tex->buffer, NULL); + + for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) + if (tex->image_offset[i]) + free(tex->image_offset[i]); + + free(tex); + } + *pt = NULL; +} + + +static struct pipe_surface * +brw_get_tex_surface_screen(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = screen->winsys; + struct brw_texture *tex = (struct brw_texture *)pt; + struct pipe_surface *ps; + unsigned offset; /* in bytes */ + + offset = tex->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE) { + offset += tex->image_offset[level][face]; + } + else if (pt->target == PIPE_TEXTURE_3D) { + offset += tex->image_offset[level][zslice]; + } + else { + assert(face == 0); + assert(zslice == 0); + } + + ps = ws->surface_alloc(ws); + if (ps) { + assert(ps->format); + assert(ps->refcount); + winsys_buffer_reference(ws, &ps->buffer, tex->buffer); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = tex->stride; + ps->offset = offset; + } + return ps; +} + + +void +brw_init_texture_functions(struct brw_context *brw) +{ +// brw->pipe.texture_update = brw_texture_update; +} + + +void +brw_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = brw_texture_create_screen; + screen->texture_release = brw_texture_release_screen; + screen->get_tex_surface = brw_get_tex_surface_screen; +} + diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.h b/src/gallium/drivers/i965simple/brw_tex_layout.h new file mode 100644 index 0000000000..a6b6ba8146 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_tex_layout.h @@ -0,0 +1,44 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + + +#ifndef BRW_TEX_LAYOUT_H +#define BRW_TEX_LAYOUT_H + + +struct brw_context; +struct pipe_screen; + + +extern void +brw_init_texture_functions(struct brw_context *brw); + +extern void +brw_init_screen_texture_funcs(struct pipe_screen *screen); + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_urb.c b/src/gallium/drivers/i965simple/brw_urb.c new file mode 100644 index 0000000000..101a4367b9 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_urb.c @@ -0,0 +1,186 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +//#include "brw_state.h" +#include "brw_batch.h" +#include "brw_defines.h" + +#define VS 0 +#define GS 1 +#define CLP 2 +#define SF 3 +#define CS 4 + +/* XXX: Are the min_entry_size numbers useful? + * XXX: Verify min_nr_entries, esp for VS. + * XXX: Verify SF min_entry_size. + */ +static const struct { + unsigned min_nr_entries; + unsigned preferred_nr_entries; + unsigned min_entry_size; + unsigned max_entry_size; +} limits[CS+1] = { + { 8, 32, 1, 5 }, /* vs */ + { 4, 8, 1, 5 }, /* gs */ + { 6, 8, 1, 5 }, /* clp */ + { 1, 8, 1, 12 }, /* sf */ + { 1, 4, 1, 32 } /* cs */ +}; + + +static boolean check_urb_layout( struct brw_context *brw ) +{ + brw->urb.vs_start = 0; + brw->urb.gs_start = brw->urb.nr_vs_entries * brw->urb.vsize; + brw->urb.clip_start = brw->urb.gs_start + brw->urb.nr_gs_entries * brw->urb.vsize; + brw->urb.sf_start = brw->urb.clip_start + brw->urb.nr_clip_entries * brw->urb.vsize; + brw->urb.cs_start = brw->urb.sf_start + brw->urb.nr_sf_entries * brw->urb.sfsize; + + return brw->urb.cs_start + brw->urb.nr_cs_entries * brw->urb.csize <= 256; +} + +/* Most minimal update, forces re-emit of URB fence packet after GS + * unit turned on/off. + */ +static void recalculate_urb_fence( struct brw_context *brw ) +{ + unsigned csize = brw->curbe.total_size; + unsigned vsize = brw->vs.prog_data->urb_entry_size; + unsigned sfsize = brw->sf.prog_data->urb_entry_size; + + if (csize < limits[CS].min_entry_size) + csize = limits[CS].min_entry_size; + + if (vsize < limits[VS].min_entry_size) + vsize = limits[VS].min_entry_size; + + if (sfsize < limits[SF].min_entry_size) + sfsize = limits[SF].min_entry_size; + + if (brw->urb.vsize < vsize || + brw->urb.sfsize < sfsize || + brw->urb.csize < csize || + (brw->urb.constrained && (brw->urb.vsize > brw->urb.vsize || + brw->urb.sfsize > brw->urb.sfsize || + brw->urb.csize > brw->urb.csize))) { + + + brw->urb.csize = csize; + brw->urb.sfsize = sfsize; + brw->urb.vsize = vsize; + + brw->urb.nr_vs_entries = limits[VS].preferred_nr_entries; + brw->urb.nr_gs_entries = limits[GS].preferred_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].preferred_nr_entries; + brw->urb.nr_sf_entries = limits[SF].preferred_nr_entries; + brw->urb.nr_cs_entries = limits[CS].preferred_nr_entries; + + if (!check_urb_layout(brw)) { + brw->urb.nr_vs_entries = limits[VS].min_nr_entries; + brw->urb.nr_gs_entries = limits[GS].min_nr_entries; + brw->urb.nr_clip_entries = limits[CLP].min_nr_entries; + brw->urb.nr_sf_entries = limits[SF].min_nr_entries; + brw->urb.nr_cs_entries = limits[CS].min_nr_entries; + + brw->urb.constrained = 1; + + if (!check_urb_layout(brw)) { + /* This is impossible, given the maximal sizes of urb + * entries and the values for minimum nr of entries + * provided above. + */ + debug_printf("couldn't calculate URB layout!\n"); + exit(1); + } + + if (BRW_DEBUG & (DEBUG_URB|DEBUG_FALLBACKS)) + debug_printf("URB CONSTRAINED\n"); + } + else + brw->urb.constrained = 0; + + if (BRW_DEBUG & DEBUG_URB) + debug_printf("URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n", + brw->urb.vs_start, + brw->urb.gs_start, + brw->urb.clip_start, + brw->urb.sf_start, + brw->urb.cs_start, + 256); + + brw->state.dirty.brw |= BRW_NEW_URB_FENCE; + } +} + + +const struct brw_tracked_state brw_recalculate_urb_fence = { + .dirty = { + .brw = BRW_NEW_CURBE_OFFSETS, + .cache = (CACHE_NEW_VS_PROG | + CACHE_NEW_SF_PROG) + }, + .update = recalculate_urb_fence +}; + + + + + +void brw_upload_urb_fence(struct brw_context *brw) +{ + struct brw_urb_fence uf; + memset(&uf, 0, sizeof(uf)); + + uf.header.opcode = CMD_URB_FENCE; + uf.header.length = sizeof(uf)/4-2; + uf.header.vs_realloc = 1; + uf.header.gs_realloc = 1; + uf.header.clp_realloc = 1; + uf.header.sf_realloc = 1; + uf.header.vfe_realloc = 1; + uf.header.cs_realloc = 1; + + /* The ordering below is correct, not the layout in the + * instruction. + * + * There are 256 urb reg pairs in total. + */ + uf.bits0.vs_fence = brw->urb.gs_start; + uf.bits0.gs_fence = brw->urb.clip_start; + uf.bits0.clp_fence = brw->urb.sf_start; + uf.bits1.sf_fence = brw->urb.cs_start; + uf.bits1.cs_fence = 256; + + BRW_BATCH_STRUCT(brw, &uf); +} diff --git a/src/gallium/drivers/i965simple/brw_util.c b/src/gallium/drivers/i965simple/brw_util.c new file mode 100644 index 0000000000..42391d7c8c --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_util.c @@ -0,0 +1,104 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_util.h" +#include "brw_defines.h" + +#include "pipe/p_defines.h" + +unsigned brw_count_bits( unsigned val ) +{ + unsigned i; + for (i = 0; val ; val >>= 1) + if (val & 1) + i++; + return i; +} + + +unsigned brw_translate_blend_equation( int mode ) +{ + switch (mode) { + case PIPE_BLEND_ADD: + return BRW_BLENDFUNCTION_ADD; + case PIPE_BLEND_MIN: + return BRW_BLENDFUNCTION_MIN; + case PIPE_BLEND_MAX: + return BRW_BLENDFUNCTION_MAX; + case PIPE_BLEND_SUBTRACT: + return BRW_BLENDFUNCTION_SUBTRACT; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BRW_BLENDFUNCTION_REVERSE_SUBTRACT; + default: + assert(0); + return BRW_BLENDFUNCTION_ADD; + } +} + +unsigned brw_translate_blend_factor( int factor ) +{ + switch(factor) { + case PIPE_BLENDFACTOR_ZERO: + return BRW_BLENDFACTOR_ZERO; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return BRW_BLENDFACTOR_SRC_ALPHA; + case PIPE_BLENDFACTOR_ONE: + return BRW_BLENDFACTOR_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return BRW_BLENDFACTOR_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return BRW_BLENDFACTOR_INV_SRC_COLOR; + case PIPE_BLENDFACTOR_DST_COLOR: + return BRW_BLENDFACTOR_DST_COLOR; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return BRW_BLENDFACTOR_INV_DST_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return BRW_BLENDFACTOR_INV_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return BRW_BLENDFACTOR_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return BRW_BLENDFACTOR_INV_DST_ALPHA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return BRW_BLENDFACTOR_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return BRW_BLENDFACTOR_INV_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return BRW_BLENDFACTOR_CONST_ALPHA; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return BRW_BLENDFACTOR_INV_CONST_ALPHA; + default: + assert(0); + return BRW_BLENDFACTOR_ZERO; + } +} diff --git a/src/gallium/drivers/i965simple/brw_util.h b/src/gallium/drivers/i965simple/brw_util.h new file mode 100644 index 0000000000..d60e5934db --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_util.h @@ -0,0 +1,43 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_UTIL_H +#define BRW_UTIL_H + +#include "pipe/p_state.h" + +extern unsigned brw_count_bits( unsigned val ); +extern unsigned brw_translate_blend_factor( int factor ); +extern unsigned brw_translate_blend_equation( int mode ); + + +#endif diff --git a/src/gallium/drivers/i965simple/brw_vs.c b/src/gallium/drivers/i965simple/brw_vs.c new file mode 100644 index 0000000000..92327e896d --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs.c @@ -0,0 +1,120 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_vs.h" +#include "brw_util.h" +#include "brw_state.h" + + +static void do_vs_prog( struct brw_context *brw, + const struct brw_vertex_program *vp, + struct brw_vs_prog_key *key ) +{ + unsigned program_size; + const unsigned *program; + struct brw_vs_compile c; + + memset(&c, 0, sizeof(c)); + memcpy(&c.key, key, sizeof(*key)); + + brw_init_compile(&c.func); + c.vp = vp; + + c.prog_data.outputs_written = vp->info.num_outputs; + c.prog_data.inputs_read = vp->info.num_inputs; + +#if 0 + if (c.key.copy_edgeflag) { + c.prog_data.outputs_written |= 1<<VERT_RESULT_EDGE; + c.prog_data.inputs_read |= 1<<VERT_ATTRIB_EDGEFLAG; + } +#endif + + /* Emit GEN4 code. + */ + brw_vs_emit(&c); + + /* get the program + */ + program = brw_get_program(&c.func, &program_size); + + /* + */ + brw->vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG], + &c.key, + sizeof(c.key), + program, + program_size, + &c.prog_data, + &brw->vs.prog_data); +} + + +static void brw_upload_vs_prog( struct brw_context *brw ) +{ + struct brw_vs_prog_key key; + const struct brw_vertex_program *vp = brw->attribs.VertexProgram; + + assert(vp); + + memset(&key, 0, sizeof(key)); + + /* Just upload the program verbatim for now. Always send it all + * the inputs it asks for, whether they are varying or not. + */ + key.program_string_id = vp->id; + key.nr_userclip = brw->attribs.Clip.nr; + key.copy_edgeflag = (brw->attribs.Raster->fill_cw != PIPE_POLYGON_MODE_FILL || + brw->attribs.Raster->fill_ccw != PIPE_POLYGON_MODE_FILL); + + /* Make an early check for the key. + */ + if (brw_search_cache(&brw->cache[BRW_VS_PROG], + &key, sizeof(key), + &brw->vs.prog_data, + &brw->vs.prog_gs_offset)) + return; + + do_vs_prog(brw, vp, &key); +} + + +/* See brw_vs.c: + */ +const struct brw_tracked_state brw_vs_prog = { + .dirty = { + .brw = BRW_NEW_VS, + .cache = 0 + }, + .update = brw_upload_vs_prog +}; diff --git a/src/gallium/drivers/i965simple/brw_vs.h b/src/gallium/drivers/i965simple/brw_vs.h new file mode 100644 index 0000000000..070f9dfcae --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs.h @@ -0,0 +1,82 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_VS_H +#define BRW_VS_H + + +#include "brw_context.h" +#include "brw_eu.h" + + +struct brw_vs_prog_key { + unsigned program_string_id; + unsigned nr_userclip:4; + unsigned copy_edgeflag:1; + unsigned know_w_is_one:1; + unsigned pad:26; +}; + + +struct brw_vs_compile { + struct brw_compile func; + struct brw_vs_prog_key key; + struct brw_vs_prog_data prog_data; + + const struct brw_vertex_program *vp; + + unsigned nr_inputs; + + unsigned first_output; + unsigned nr_outputs; + + unsigned first_tmp; + unsigned last_tmp; + + struct brw_reg r0; + struct brw_reg r1; + struct brw_reg regs[12][128]; + struct brw_reg tmp; + struct brw_reg stack; + + struct { + boolean used_in_src; + struct brw_reg reg; + } output_regs[128]; + + struct brw_reg userplane[6]; + +}; + +void brw_vs_emit( struct brw_vs_compile *c ); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c new file mode 100644 index 0000000000..34dbc0624d --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -0,0 +1,1330 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_context.h" +#include "brw_vs.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + +struct brw_prog_info { + unsigned num_temps; + unsigned num_addrs; + unsigned num_consts; + + unsigned writes_psize; + + unsigned pos_idx; + unsigned result_edge_idx; + unsigned edge_flag_idx; + unsigned psize_idx; +}; + +/* Do things as simply as possible. Allocate and populate all regs + * ahead of time. + */ +static void brw_vs_alloc_regs( struct brw_vs_compile *c, + struct brw_prog_info *info ) +{ + unsigned i, reg = 0, mrf; + unsigned nr_params; + + /* r0 -- reserved as usual + */ + c->r0 = brw_vec8_grf(reg, 0); reg++; + + /* User clip planes from curbe: + */ + if (c->key.nr_userclip) { + for (i = 0; i < c->key.nr_userclip; i++) { + c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1); + } + + /* Deal with curbe alignment: + */ + reg += ((6+c->key.nr_userclip+3)/4)*2; + } + + /* Vertex program parameters from curbe: + */ + nr_params = c->prog_data.max_const; + for (i = 0; i < nr_params; i++) { + c->regs[TGSI_FILE_CONSTANT][i] = stride(brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + } + reg += (nr_params+1)/2; + c->prog_data.curb_read_length = reg - 1; + + + + /* Allocate input regs: + */ + c->nr_inputs = c->vp->info.num_inputs; + for (i = 0; i < c->nr_inputs; i++) { + c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + + + /* Allocate outputs: TODO: could organize the non-position outputs + * to go straight into message regs. + */ + c->nr_outputs = 0; + c->first_output = reg; + mrf = 4; + for (i = 0; i < c->vp->info.num_outputs; i++) { + c->nr_outputs++; +#if 0 + if (i == VERT_RESULT_HPOS) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } + else if (i == VERT_RESULT_PSIZ) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + mrf++; /* just a placeholder? XXX fix later stages & remove this */ + } + else { + c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } +#else + /*treat pos differently for now */ + if (i == info->pos_idx) { + c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0); + reg++; + } else { + c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf); + mrf++; + } +#endif + } + + /* Allocate program temporaries: + */ + for (i = 0; i < info->num_temps; i++) { + c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0); + reg++; + } + + /* Address reg(s). Don't try to use the internal address reg until + * deref time. + */ + for (i = 0; i < info->num_addrs; i++) { + c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE, + reg, + 0, + BRW_REGISTER_TYPE_D, + BRW_VERTICAL_STRIDE_8, + BRW_WIDTH_8, + BRW_HORIZONTAL_STRIDE_1, + BRW_SWIZZLE_XXXX, + TGSI_WRITEMASK_X); + reg++; + } + + for (i = 0; i < 128; i++) { + if (c->output_regs[i].used_in_src) { + c->output_regs[i].reg = brw_vec8_grf(reg, 0); + reg++; + } + } + + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0); + reg += 2; + + + /* Some opcodes need an internal temporary: + */ + c->first_tmp = reg; + c->last_tmp = reg; /* for allocation purposes */ + + /* Each input reg holds data from two vertices. The + * urb_read_length is the number of registers read from *each* + * vertex urb, so is half the amount: + */ + c->prog_data.urb_read_length = (c->nr_inputs+1)/2; + + c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4; + c->prog_data.total_grf = reg; +} + + +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ + struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; + + return tmp; +} + +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +static void unalias1( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg )) +{ + if (dst.file == arg0.file && dst.nr == arg0.nr) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0); + brw_MOV(p, dst, tmp); + } + else { + func(c, dst, arg0); + } +} + +static void unalias2( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + void (*func)( struct brw_vs_compile *, + struct brw_reg, + struct brw_reg, + struct brw_reg )) +{ + if ((dst.file == arg0.file && dst.nr == arg0.nr) || + (dst.file == arg1.file && dst.nr == arg1.nr)) { + struct brw_compile *p = &c->func; + struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask); + func(c, tmp, arg0, arg1); + brw_MOV(p, dst, tmp); + } + else { + func(c, dst, arg0, arg1); + } +} + +static void emit_sop( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + unsigned cond) +{ + brw_push_insn_state(p); + brw_CMP(p, brw_null_reg(), cond, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(1.0f)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(0.0f)); + brw_pop_insn_state(p); +} + +static void emit_seq( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ); +} + +static void emit_sne( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ); +} +static void emit_slt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L); +} + +static void emit_sle( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE); +} + +static void emit_sgt( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G); +} + +static void emit_sge( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE); +} + +static void emit_max( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg1, arg0); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + +static void emit_min( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1 ) +{ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1); + brw_SEL(p, dst, arg0, arg1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +} + + +static void emit_math1( struct brw_vs_compile *c, + unsigned function, + struct brw_reg dst, + struct brw_reg arg0, + unsigned precision) +{ + /* There are various odd behaviours with SEND on the simulator. In + * addition there are documented issues with the fact that the GEN4 + * processor doesn't do dependency control properly on SEND + * results. So, on balance, this kludge to get around failures + * with writemasked math results looks like it might be necessary + * whether that turns out to be a simulator bug or not: + */ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_math(p, + tmp, + function, + BRW_MATH_SATURATE_NONE, + 2, + arg0, + BRW_MATH_DATA_SCALAR, + precision); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + +static void emit_math2( struct brw_vs_compile *c, + unsigned function, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1, + unsigned precision) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_MOV(p, brw_message_reg(3), arg1); + + brw_math(p, + tmp, + function, + BRW_MATH_SATURATE_NONE, + 2, + arg0, + BRW_MATH_DATA_SCALAR, + precision); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + +static void emit_exp_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) { + struct brw_reg tmp = get_tmp(c); + struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D); + + /* tmp_d = floor(arg0.x) */ + brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0)); + + /* result[0] = 2.0 ^ tmp */ + + /* Adjust exponent for floating point: + * exp += 127 + */ + brw_ADD(p, brw_writemask(tmp_d, TGSI_WRITEMASK_X), tmp_d, brw_imm_d(127)); + + /* Install exponent and sign. + * Excess drops off the edge: + */ + brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), TGSI_WRITEMASK_X), + tmp_d, brw_imm_d(23)); + + release_tmp(c, tmp); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) { + /* result[1] = arg0.x - floor(arg0.x) */ + brw_FRC(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0, 0)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) { + /* As with the LOG instruction, we might be better off just + * doing a taylor expansion here, seeing as we have to do all + * the prep work. + * + * If mathbox partial precision is too low, consider also: + * result[3] = result[0] * EXP(result[1]) + */ + emit_math1(c, + BRW_MATH_FUNCTION_EXP, + brw_writemask(dst, TGSI_WRITEMASK_Z), + brw_swizzle1(arg0, 0), + BRW_MATH_PRECISION_PARTIAL); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) { + /* result[3] = 1.0; */ + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), brw_imm_f(1)); + } +} + + +static void emit_log_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD); + boolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) { + tmp = get_tmp(c); + tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + } + + /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt + * according to spec: + * + * These almost look likey they could be joined up, but not really + * practical: + * + * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127 + * result[1].i = (x.i & ((1<<23)-1) + (127<<23) + */ + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_XZ) { + brw_AND(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_X), + brw_swizzle1(arg0_ud, 0), + brw_imm_ud((1U<<31)-1)); + + brw_SHR(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_X), + tmp_ud, + brw_imm_ud(23)); + + brw_ADD(p, + brw_writemask(tmp, TGSI_WRITEMASK_X), + retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */ + brw_imm_d(-127)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_YZ) { + brw_AND(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_Y), + brw_swizzle1(arg0_ud, 0), + brw_imm_ud((1<<23)-1)); + + brw_OR(p, + brw_writemask(tmp_ud, TGSI_WRITEMASK_Y), + tmp_ud, + brw_imm_ud(127<<23)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) { + /* result[2] = result[0] + LOG2(result[1]); */ + + /* Why bother? The above is just a hint how to do this with a + * taylor series. Maybe we *should* use a taylor series as by + * the time all the above has been done it's almost certainly + * quicker than calling the mathbox, even with low precision. + * + * Options are: + * - result[0] + mathbox.LOG2(result[1]) + * - mathbox.LOG2(arg0.x) + * - result[0] + inline_taylor_approx(result[1]) + */ + emit_math1(c, + BRW_MATH_FUNCTION_LOG, + brw_writemask(tmp, TGSI_WRITEMASK_Z), + brw_swizzle1(tmp, 1), + BRW_MATH_PRECISION_FULL); + + brw_ADD(p, + brw_writemask(tmp, TGSI_WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(tmp, 0)); + } + + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) { + /* result[3] = 1.0; */ + brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_W), brw_imm_f(1)); + } + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + + +/* Need to unalias - consider swizzles: r0 = DST r0.xxxx r1 + */ +static void emit_dst_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0, + struct brw_reg arg1) +{ + struct brw_compile *p = &c->func; + + /* There must be a better way to do this: + */ + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_X), brw_imm_f(1.0)); + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) + brw_MUL(p, brw_writemask(dst, TGSI_WRITEMASK_Y), arg0, arg1); + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Z), arg0); + if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), arg1); +} + +static void emit_xpd( struct brw_compile *p, + struct brw_reg dst, + struct brw_reg t, + struct brw_reg u) +{ + brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3)); + brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3)); +} + + + +static void emit_lit_noalias( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_instruction *if_insn; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_YZ), brw_imm_f(0)); + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_XW), brw_imm_f(1)); + + /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order + * to get all channels active inside the IF. In the clipping code + * we run with NoMask, so it's not an option and we can use + * BRW_EXECUTE_1 for all comparisions. + */ + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0)); + if_insn = brw_IF(p, BRW_EXECUTE_8); + { + brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0,0)); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0)); + brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_Z), brw_swizzle1(arg0,1)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + emit_math2(c, + BRW_MATH_FUNCTION_POW, + brw_writemask(dst, TGSI_WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(arg0, 3), + BRW_MATH_PRECISION_PARTIAL); + } + + brw_ENDIF(p, if_insn); +} + + + + + +/* TODO: relative addressing! + */ +static struct brw_reg get_reg( struct brw_vs_compile *c, + unsigned file, + unsigned index ) +{ + switch (file) { + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + case TGSI_FILE_CONSTANT: + assert(c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm].nr != 0); + return c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm]; + case TGSI_FILE_IMMEDIATE: + assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0); + return c->regs[TGSI_FILE_CONSTANT][index]; + case TGSI_FILE_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case TGSI_FILE_NULL: /* undef values */ + return brw_null_reg(); + + default: + assert(0); + return brw_null_reg(); + } +} + + + +static struct brw_reg deref( struct brw_vs_compile *c, + struct brw_reg arg, + int offset) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = vec4(get_tmp(c)); + struct brw_reg vp_address = retype(vec1(get_reg(c, TGSI_FILE_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); + unsigned byte_offset = arg.nr * 32 + arg.subnr + offset * 16; + struct brw_reg indirect = brw_vec4_indirect(0,0); + + { + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + + /* This is pretty clunky - load the address register twice and + * fetch each 4-dword value in turn. There must be a way to do + * this in a single pass, but I couldn't get it to work. + */ + brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset)); + brw_MOV(p, tmp, indirect); + + brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset)); + brw_MOV(p, suboffset(tmp, 4), indirect); + + brw_pop_insn_state(p); + } + + return vec8(tmp); +} + + +static void emit_arl( struct brw_vs_compile *c, + struct brw_reg dst, + struct brw_reg arg0 ) +{ + struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); + + brw_RNDD(p, tmp, arg0); + brw_MUL(p, dst, tmp, brw_imm_d(16)); + + if (need_tmp) + release_tmp(c, tmp); +} + + +/* Will return mangled results for SWZ op. The emit_swz() function + * ignores this result and recalculates taking extended swizzles into + * account. + */ +static struct brw_reg get_arg( struct brw_vs_compile *c, + struct tgsi_src_register *src ) +{ + struct brw_reg reg; + + if (src->File == TGSI_FILE_NULL) + return brw_null_reg(); + +#if 0 + if (src->RelAddr) + reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); + else +#endif + reg = get_reg(c, src->File, src->Index); + + /* Convert 3-bit swizzle to 2-bit. + */ + reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SwizzleX, + src->SwizzleY, + src->SwizzleZ, + src->SwizzleW); + + /* Note this is ok for non-swizzle instructions: + */ + reg.negate = src->Negate ? 1 : 0; + + return reg; +} + + +static struct brw_reg get_dst( struct brw_vs_compile *c, + const struct tgsi_dst_register *dst ) +{ + struct brw_reg reg = get_reg(c, dst->File, dst->Index); + + reg.dw1.bits.writemask = dst->WriteMask; + + return reg; +} + + + + +static void emit_swz( struct brw_vs_compile *c, + struct brw_reg dst, + struct tgsi_src_register src ) +{ + struct brw_compile *p = &c->func; + unsigned zeros_mask = 0; + unsigned ones_mask = 0; + unsigned src_mask = 0; + ubyte src_swz[4]; + boolean need_tmp = (src.Negate && + dst.file != BRW_GENERAL_REGISTER_FILE); + struct brw_reg tmp = dst; + unsigned i; + + if (need_tmp) + tmp = get_tmp(c); + + for (i = 0; i < 4; i++) { + if (dst.dw1.bits.writemask & (1<<i)) { + ubyte s = 0; + switch(i) { + case 0: + s = src.SwizzleX; + break; + s = src.SwizzleY; + case 1: + break; + s = src.SwizzleZ; + case 2: + break; + s = src.SwizzleW; + case 3: + break; + } + switch (s) { + case TGSI_SWIZZLE_X: + case TGSI_SWIZZLE_Y: + case TGSI_SWIZZLE_Z: + case TGSI_SWIZZLE_W: + src_mask |= 1<<i; + src_swz[i] = s; + break; + case TGSI_EXTSWIZZLE_ZERO: + zeros_mask |= 1<<i; + break; + case TGSI_EXTSWIZZLE_ONE: + ones_mask |= 1<<i; + break; + } + } + } + + /* Do src first, in case dst aliases src: + */ + if (src_mask) { + struct brw_reg arg0; + +#if 0 + if (src.RelAddr) + arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); + else +#endif + arg0 = get_reg(c, src.File, src.Index); + + arg0 = brw_swizzle(arg0, + src_swz[0], src_swz[1], + src_swz[2], src_swz[3]); + + brw_MOV(p, brw_writemask(tmp, src_mask), arg0); + } + + if (zeros_mask) + brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0)); + + if (ones_mask) + brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1)); + + if (src.Negate) + brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp)); + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); + } +} + + + +/* Post-vertex-program processing. Send the results to the URB. + */ +static void emit_vertex_write( struct brw_vs_compile *c, struct brw_prog_info *info) +{ + struct brw_compile *p = &c->func; + struct brw_reg m0 = brw_message_reg(0); + struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][info->pos_idx]; + struct brw_reg ndc; + + if (c->key.copy_edgeflag) { + brw_MOV(p, + get_reg(c, TGSI_FILE_OUTPUT, info->result_edge_idx), + get_reg(c, TGSI_FILE_INPUT, info->edge_flag_idx)); + } + + + /* Build ndc coords? TODO: Shortcircuit when w is known to be one. + */ + if (!c->key.know_w_is_one) { + ndc = get_tmp(c); + emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + brw_MUL(p, brw_writemask(ndc, TGSI_WRITEMASK_XYZ), pos, ndc); + } + else { + ndc = pos; + } + + /* This includes the workaround for -ve rhw, so is no longer an + * optional step: + */ + if (info->writes_psize || + c->key.nr_userclip || + !c->key.know_w_is_one) + { + struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); + unsigned i; + + brw_MOV(p, header1, brw_imm_ud(0)); + + brw_set_access_mode(p, BRW_ALIGN_16); + + if (info->writes_psize) { + struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][info->psize_idx]; + brw_MUL(p, brw_writemask(header1, TGSI_WRITEMASK_W), + brw_swizzle1(psiz, 0), brw_imm_f(1<<11)); + brw_AND(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, + brw_imm_ud(0x7ff<<8)); + } + + + for (i = 0; i < c->key.nr_userclip; i++) { + brw_set_conditionalmod(p, BRW_CONDITIONAL_L); + brw_DP4(p, brw_null_reg(), pos, c->userplane[i]); + brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<i)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + + /* i965 clipping workaround: + * 1) Test for -ve rhw + * 2) If set, + * set ndc = (0,0,0,0) + * set ucp[6] = 1 + * + * Later, clipping will detect ucp[6] and ensure the primitive is + * clipped against all fixed planes. + */ + if (!c->key.know_w_is_one) { + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); + + brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } + + brw_set_access_mode(p, BRW_ALIGN_1); /* why? */ + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1); + brw_set_access_mode(p, BRW_ALIGN_16); + + release_tmp(c, header1); + } + else { + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); + } + + + /* Emit the (interleaved) headers for the two vertices - an 8-reg + * of zeros followed by two sets of NDC coordinates: + */ + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, offset(m0, 2), ndc); + brw_MOV(p, offset(m0, 3), pos); + + + brw_urb_WRITE(p, + brw_null_reg(), /* dest */ + 0, /* starting mrf reg nr */ + c->r0, /* src */ + 0, /* allocate */ + 1, /* used */ + c->nr_outputs + 3, /* msg len */ + 0, /* response len */ + 1, /* eot */ + 1, /* writes complete */ + 0, /* urb destination offset */ + BRW_URB_SWIZZLE_INTERLEAVE); + +} + +static void +post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst ) +{ + struct tgsi_parse_context parse; + const struct tgsi_token *tokens = c->vp->program.tokens; + tgsi_parse_init(&parse, tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { +#if 0 + struct brw_instruction *brw_inst1, *brw_inst2; + const struct tgsi_full_instruction *inst1, *inst2; + int offset; + inst1 = &parse.FullToken.FullInstruction; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case TGSI_OPCODE_CAL: + case TGSI_OPCODE_BRA: + target_insn = inst1->BranchTarget; + inst2 = &c->vp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + case TGSI_OPCODE_END: + offset = end_inst - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } +#endif + } + } + tgsi_parse_free(&parse); +} + +static void process_declaration(const struct tgsi_full_declaration *decl, + struct brw_prog_info *info) +{ + int first = decl->DeclarationRange.First; + int last = decl->DeclarationRange.Last; + + switch(decl->Declaration.File) { + case TGSI_FILE_CONSTANT: + info->num_consts += last - first + 1; + break; + case TGSI_FILE_INPUT: { + } + break; + case TGSI_FILE_OUTPUT: { + assert(last == first); /* for now */ + if (decl->Declaration.Semantic) { + switch (decl->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: { + info->pos_idx = first; + } + break; + case TGSI_SEMANTIC_COLOR: + break; + case TGSI_SEMANTIC_BCOLOR: + break; + case TGSI_SEMANTIC_FOG: + break; + case TGSI_SEMANTIC_PSIZE: { + info->writes_psize = TRUE; + info->psize_idx = first; + } + break; + case TGSI_SEMANTIC_GENERIC: + break; + } + } + } + break; + case TGSI_FILE_TEMPORARY: { + info->num_temps += (last - first) + 1; + } + break; + case TGSI_FILE_SAMPLER: { + } + break; + case TGSI_FILE_ADDRESS: { + info->num_addrs += (last - first) + 1; + } + break; + case TGSI_FILE_IMMEDIATE: { + } + break; + case TGSI_FILE_NULL: { + } + break; + } +} + +static void process_instruction(struct brw_vs_compile *c, + struct tgsi_full_instruction *inst, + struct brw_prog_info *info) +{ + struct brw_reg args[3], dst; + struct brw_compile *p = &c->func; + /*struct brw_indirect stack_index = brw_indirect(0, 0);*/ + unsigned i; + unsigned index; + unsigned file; + /*FIXME: might not be the only one*/ + const struct tgsi_dst_register *dst_reg = &inst->FullDstRegisters[0].DstRegister; + /* + struct brw_instruction *if_inst[MAX_IFSN]; + unsigned insn, if_insn = 0; + */ + + for (i = 0; i < 3; i++) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + index = src->SrcRegister.Index; + file = src->SrcRegister.File; + if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src) + args[i] = c->output_regs[index].reg; + else + args[i] = get_arg(c, &src->SrcRegister); + } + + /* Get dest regs. Note that it is possible for a reg to be both + * dst and arg, given the static allocation of registers. So + * care needs to be taken emitting multi-operation instructions. + */ + index = dst_reg->Index; + file = dst_reg->File; + if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src) + dst = c->output_regs[index].reg; + else + dst = get_dst(c, dst_reg); + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + brw_MOV(p, dst, brw_abs(args[0])); + break; + case TGSI_OPCODE_ADD: + brw_ADD(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DP3: + brw_DP3(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DP4: + brw_DP4(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DPH: + brw_DPH(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_DST: + unalias2(c, dst, args[0], args[1], emit_dst_noalias); + break; + case TGSI_OPCODE_EXP: + unalias1(c, dst, args[0], emit_exp_noalias); + break; + case TGSI_OPCODE_EX2: + emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_ARL: + emit_arl(c, dst, args[0]); + break; + case TGSI_OPCODE_FLR: + brw_RNDD(p, dst, args[0]); + break; + case TGSI_OPCODE_FRC: + brw_FRC(p, dst, args[0]); + break; + case TGSI_OPCODE_LOG: + unalias1(c, dst, args[0], emit_log_noalias); + break; + case TGSI_OPCODE_LG2: + emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_LIT: + unalias1(c, dst, args[0], emit_lit_noalias); + break; + case TGSI_OPCODE_MAD: + brw_MOV(p, brw_acc_reg(), args[2]); + brw_MAC(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MAX: + emit_max(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MIN: + emit_min(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: +#if 0 + /* The args[0] value can't be used here as it won't have + * correctly encoded the full swizzle: + */ + emit_swz(c, dst, inst->SrcReg[0] ); +#endif + brw_MOV(p, dst, args[0]); + break; + case TGSI_OPCODE_MUL: + brw_MUL(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_POW: + emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_RCP: + emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + case TGSI_OPCODE_RSQ: + emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL); + break; + + case TGSI_OPCODE_SEQ: + emit_seq(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SNE: + emit_sne(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SGE: + emit_sge(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SGT: + emit_sgt(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SLT: + emit_slt(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SLE: + emit_sle(p, dst, args[0], args[1]); + break; + case TGSI_OPCODE_SUB: + brw_ADD(p, dst, args[0], negate(args[1])); + break; + case TGSI_OPCODE_XPD: + emit_xpd(p, dst, args[0], args[1]); + break; +#if 0 + case TGSI_OPCODE_IF: + assert(if_insn < MAX_IFSN); + if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_ELSE: + if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]); + break; + case TGSI_OPCODE_ENDIF: + assert(if_insn > 0); + brw_ENDIF(p, if_inst[--if_insn]); + break; + case TGSI_OPCODE_BRA: + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_set_predicate_control_flag_value(p, 0xff); + break; + case TGSI_OPCODE_CAL: + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(4)); + inst->Data = &p->store[p->nr_insn]; + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; +#endif + case TGSI_OPCODE_RET: +#if 0 + brw_ADD(p, get_addr_reg(stack_index), + get_addr_reg(stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); +#else + /*brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));*/ +#endif + break; + case TGSI_OPCODE_END: + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + break; + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + break; + default: + debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode); + break; + } + + if (dst_reg->File == TGSI_FILE_OUTPUT + && dst_reg->Index != info->pos_idx + && c->output_regs[dst_reg->Index].used_in_src) + brw_MOV(p, get_dst(c, dst_reg), dst); + + release_tmps(c); +} + +/* Emit the fragment program instructions here. + */ +void brw_vs_emit(struct brw_vs_compile *c) +{ +#define MAX_IFSN 32 + struct brw_compile *p = &c->func; + struct brw_instruction *end_inst; + struct tgsi_parse_context parse; + struct brw_indirect stack_index = brw_indirect(0, 0); + const struct tgsi_token *tokens = c->vp->program.tokens; + struct brw_prog_info prog_info; + unsigned allocated_registers = 0; + memset(&prog_info, 0, sizeof(struct brw_prog_info)); + + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_access_mode(p, BRW_ALIGN_16); + + tgsi_parse_init(&parse, tokens); + /* Message registers can't be read, so copy the output into GRF register + if they are used in source registers */ + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + unsigned i; + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: { + const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; + for (i = 0; i < 3; ++i) { + const struct tgsi_src_register *src = &inst->FullSrcRegisters[i].SrcRegister; + unsigned index = src->Index; + unsigned file = src->File; + if (file == TGSI_FILE_OUTPUT) + c->output_regs[index].used_in_src = TRUE; + } + } + break; + default: + /* nothing */ + break; + } + } + tgsi_parse_free(&parse); + + tgsi_parse_init(&parse, tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: { + struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + process_declaration(decl, &prog_info); + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: { + struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate; + /*assert(imm->Immediate.Size == 4);*/ + c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float; + c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u.ImmediateFloat32[3].Float; + c->prog_data.num_imm++; + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: { + struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; + if (!allocated_registers) { + /* first instruction (declerations finished). + * now that we know what vars are being used allocate + * registers for them.*/ + c->prog_data.num_consts = prog_info.num_consts; + c->prog_data.max_const = prog_info.num_consts + c->prog_data.num_imm; + brw_vs_alloc_regs(c, &prog_info); + + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack)); + brw_set_access_mode(p, BRW_ALIGN_16); + allocated_registers = 1; + } + process_instruction(c, inst, &prog_info); + } + break; + } + } + + end_inst = &p->store[p->nr_insn]; + emit_vertex_write(c, &prog_info); + post_vs_emit(c, end_inst); + tgsi_parse_free(&parse); + +} diff --git a/src/gallium/drivers/i965simple/brw_vs_state.c b/src/gallium/drivers/i965simple/brw_vs_state.c new file mode 100644 index 0000000000..1eaff87892 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_vs_state.c @@ -0,0 +1,103 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + +static void upload_vs_unit( struct brw_context *brw ) +{ + struct brw_vs_unit_state vs; + + memset(&vs, 0, sizeof(vs)); + + /* CACHE_NEW_VS_PROG */ + vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; + vs.thread0.grf_reg_count = align(brw->vs.prog_data->total_grf, 16) / 16 - 1; + vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; + vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; + vs.thread3.dispatch_grf_start_reg = 1; + + + /* BRW_NEW_URB_FENCE */ + vs.thread4.nr_urb_entries = brw->urb.nr_vs_entries; + vs.thread4.urb_entry_allocation_size = brw->urb.vsize - 1; + vs.thread4.max_threads = MIN2( + MAX2(0, (brw->urb.nr_vs_entries - 6) / 2 - 1), + 15); + + + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + vs.thread4.max_threads = 0; + + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ + if (0 /*brw->attribs.Clip->ClipPlanesEnabled*/) { + /* Note that we read in the userclip planes as well, hence + * clip_start: + */ + vs.thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; + } + else { + vs.thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2; + } + + vs.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + vs.thread3.urb_entry_read_offset = 0; + + /* No samplers for ARB_vp programs: + */ + vs.vs5.sampler_count = 0; + + if (BRW_DEBUG & DEBUG_STATS) + vs.thread4.stats_enable = 1; + + /* Vertex program always enabled: + */ + vs.vs6.vs_enable = 1; + + brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs ); +} + + +const struct brw_tracked_state brw_vs_unit = { + .dirty = { + .brw = (BRW_NEW_CLIP | + BRW_NEW_CURBE_OFFSETS | + BRW_NEW_URB_FENCE), + .cache = CACHE_NEW_VS_PROG + }, + .update = upload_vs_unit +}; diff --git a/src/gallium/drivers/i965simple/brw_winsys.h b/src/gallium/drivers/i965simple/brw_winsys.h new file mode 100644 index 0000000000..ec1e400418 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_winsys.h @@ -0,0 +1,209 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * This is the interface that i965simple requires any window system + * hosting it to implement. This is the only include file in i965simple + * which is public. + * + */ + +#ifndef BRW_WINSYS_H +#define BRW_WINSYS_H + + +#include "pipe/p_defines.h" + + +/* Pipe drivers are (meant to be!) independent of both GL and the + * window system. The window system provides a buffer manager and a + * set of additional hooks for things like command buffer submission, + * etc. + * + * There clearly has to be some agreement between the window system + * driver and the hardware driver about the format of command buffers, + * etc. + */ + +struct pipe_buffer; +struct pipe_fence_handle; +struct pipe_winsys; +struct pipe_screen; + + +/* The pipe driver currently understands the following chipsets: + */ +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + + +/* These are the names of all the state caches managed by the driver. + * + * When data is uploaded to a buffer with buffer_subdata, we use the + * special version of that function below so that information about + * what type of data this is can be passed to the winsys backend. + * That in turn allows the correct flags to be set in the aub file + * dump to allow human-readable file dumps later on. + */ + +enum brw_cache_id { + BRW_CC_VP, + BRW_CC_UNIT, + BRW_WM_PROG, + BRW_SAMPLER_DEFAULT_COLOR, + BRW_SAMPLER, + BRW_WM_UNIT, + BRW_SF_PROG, + BRW_SF_VP, + BRW_SF_UNIT, + BRW_VS_UNIT, + BRW_VS_PROG, + BRW_GS_UNIT, + BRW_GS_PROG, + BRW_CLIP_VP, + BRW_CLIP_UNIT, + BRW_CLIP_PROG, + BRW_SS_SURFACE, + BRW_SS_SURF_BIND, + + BRW_MAX_CACHE +}; + +#define BRW_CONSTANT_BUFFER BRW_MAX_CACHE + +/** + * Additional winsys interface for i965simple. + * + * It is an over-simple batchbuffer mechanism. Will want to improve the + * performance of this, perhaps based on the cmdstream stuff. It + * would be pretty impossible to implement swz on top of this + * interface. + * + * Will also need additions/changes to implement static/dynamic + * indirect state. + */ +struct brw_winsys { + + void (*destroy)(struct brw_winsys *); + + /** + * Reserve space on batch buffer. + * + * Returns a null pointer if there is insufficient space in the batch buffer + * to hold the requested number of dwords and relocations. + * + * The number of dwords should also include the number of relocations. + */ + unsigned *(*batch_start)(struct brw_winsys *sws, + unsigned dwords, + unsigned relocs); + + void (*batch_dword)(struct brw_winsys *sws, + unsigned dword); + + /** + * Emit a relocation to a buffer. + * + * Used not only when the buffer addresses are not pinned, but also to + * ensure refered buffers will not be destroyed until the current batch + * buffer execution is finished. + * + * The access flags is a combination of I915_BUFFER_ACCESS_WRITE and + * I915_BUFFER_ACCESS_READ macros. + */ + void (*batch_reloc)(struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags, + unsigned delta); + + + /* Not used yet, but really want this: + */ + void (*batch_end)( struct brw_winsys *sws ); + + /** + * Flush the batch buffer. + * + * Fence argument must point to NULL or to a previous fence, and the caller + * must call fence_reference when done with the fence. + */ + void (*batch_flush)(struct brw_winsys *sws, + struct pipe_fence_handle **fence); + + + /* A version of buffer_subdata that includes information for the + * simulator: + */ + void (*buffer_subdata_typed)(struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned long offset, + unsigned long size, + const void *data, + unsigned data_type); + + + /* A cheat so we don't have to think about relocations in a couple + * of places yet: + */ + unsigned (*get_buffer_offset)( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned flags ); + +}; + +#define BRW_BUFFER_ACCESS_WRITE 0x1 +#define BRW_BUFFER_ACCESS_READ 0x2 + +#define BRW_BUFFER_USAGE_LIT_VERTEX (PIPE_BUFFER_USAGE_CUSTOM << 0) + + +struct pipe_context *brw_create(struct pipe_screen *, + struct brw_winsys *, + unsigned pci_id); + +static inline boolean brw_batchbuffer_data(struct brw_winsys *winsys, + const void *data, + unsigned bytes) +{ + static const unsigned incr = sizeof(unsigned); + uint i; + const unsigned *udata = (const unsigned*)(data); + unsigned size = bytes/incr; + + winsys->batch_start(winsys, size, 0); + for (i = 0; i < size; ++i) { + winsys->batch_dword(winsys, udata[i]); + } + winsys->batch_end(winsys); + + return (i == size); +} +#endif diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c new file mode 100644 index 0000000000..8de565b96c --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm.c @@ -0,0 +1,209 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_util.h" +#include "brw_wm.h" +#include "brw_eu.h" +#include "brw_state.h" +#include "util/u_memory.h" + + + +static void do_wm_prog( struct brw_context *brw, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) +{ + struct brw_wm_compile *c = CALLOC_STRUCT(brw_wm_compile); + const unsigned *program; + unsigned program_size; + + c->key = *key; + c->fp = fp; + + c->delta_xy[0] = brw_null_reg(); + c->delta_xy[1] = brw_null_reg(); + c->pixel_xy[0] = brw_null_reg(); + c->pixel_xy[1] = brw_null_reg(); + c->pixel_w = brw_null_reg(); + + + debug_printf("XXXXXXXX FP\n"); + + brw_wm_glsl_emit(c); + + /* get the program + */ + program = brw_get_program(&c->func, &program_size); + + /* + */ + brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG], + &c->key, + sizeof(c->key), + program, + program_size, + &c->prog_data, + &brw->wm.prog_data ); + + FREE(c); +} + + + +static void brw_wm_populate_key( struct brw_context *brw, + struct brw_wm_prog_key *key ) +{ + /* BRW_NEW_FRAGMENT_PROGRAM */ + struct brw_fragment_program *fp = + (struct brw_fragment_program *)brw->attribs.FragmentProgram; + unsigned lookup = 0; + unsigned line_aa; + + memset(key, 0, sizeof(*key)); + + /* Build the index for table lookup + */ + /* BRW_NEW_DEPTH_STENCIL */ + if (fp->info.uses_kill || + brw->attribs.DepthStencil->alpha.enabled) + lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->info.writes_z) + lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + if (brw->attribs.DepthStencil->depth.enabled) + lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (brw->attribs.DepthStencil->depth.enabled && + brw->attribs.DepthStencil->depth.writemask) /* ?? */ + lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + if (brw->attribs.DepthStencil->stencil[0].enabled) { + lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (brw->attribs.DepthStencil->stencil[0].write_mask || + brw->attribs.DepthStencil->stencil[1].write_mask) + lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + } + + /* XXX: when should this be disabled? + */ + if (1) + lookup |= IZ_EARLY_DEPTH_TEST_BIT; + + + line_aa = AA_NEVER; + + /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ + if (brw->attribs.Raster->line_smooth) { + if (brw->reduced_primitive == PIPE_PRIM_LINES) { + line_aa = AA_ALWAYS; + } + else if (brw->reduced_primitive == PIPE_PRIM_TRIANGLES) { + if (brw->attribs.Raster->fill_ccw == PIPE_POLYGON_MODE_LINE) { + line_aa = AA_SOMETIMES; + + if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE || + (brw->attribs.Raster->cull_mode == PIPE_WINDING_CW)) + line_aa = AA_ALWAYS; + } + else if (brw->attribs.Raster->fill_cw == PIPE_POLYGON_MODE_LINE) { + line_aa = AA_SOMETIMES; + + if (brw->attribs.Raster->cull_mode == PIPE_WINDING_CCW) + line_aa = AA_ALWAYS; + } + } + } + + brw_wm_lookup_iz(line_aa, + lookup, + key); + + +#if 0 + /* BRW_NEW_SAMPLER + * + * Not doing any of this at the moment: + */ + for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + const struct pipe_sampler_state *unit = brw->attribs.Samplers[i]; + + if (unit) { + + if (unit->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + key->shadowtex_mask |= 1<<i; + } + if (t->Image[0][t->BaseLevel]->InternalFormat == GL_YCBCR_MESA) + key->yuvtex_mask |= 1<<i; + } + } +#endif + + + /* Extra info: + */ + key->program_string_id = fp->id; + +} + + +static void brw_upload_wm_prog( struct brw_context *brw ) +{ + struct brw_wm_prog_key key; + struct brw_fragment_program *fp = (struct brw_fragment_program *) + brw->attribs.FragmentProgram; + + brw_wm_populate_key(brw, &key); + + /* Make an early check for the key. + */ + if (brw_search_cache(&brw->cache[BRW_WM_PROG], + &key, sizeof(key), + &brw->wm.prog_data, + &brw->wm.prog_gs_offset)) + return; + + do_wm_prog(brw, fp, &key); +} + + +const struct brw_tracked_state brw_wm_prog = { + .dirty = { + .brw = (BRW_NEW_FS | + BRW_NEW_REDUCED_PRIMITIVE), + .cache = 0 + }, + .update = brw_upload_wm_prog +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm.h b/src/gallium/drivers/i965simple/brw_wm.h new file mode 100644 index 0000000000..b29c4393f0 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm.h @@ -0,0 +1,142 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef BRW_WM_H +#define BRW_WM_H + + +#include "brw_context.h" +#include "brw_eu.h" + +/* A big lookup table is used to figure out which and how many + * additional regs will inserted before the main payload in the WM + * program execution. These mainly relate to depth and stencil + * processing and the early-depth-test optimization. + */ +#define IZ_PS_KILL_ALPHATEST_BIT 0x1 +#define IZ_PS_COMPUTES_DEPTH_BIT 0x2 +#define IZ_DEPTH_WRITE_ENABLE_BIT 0x4 +#define IZ_DEPTH_TEST_ENABLE_BIT 0x8 +#define IZ_STENCIL_WRITE_ENABLE_BIT 0x10 +#define IZ_STENCIL_TEST_ENABLE_BIT 0x20 +#define IZ_EARLY_DEPTH_TEST_BIT 0x40 +#define IZ_BIT_MAX 0x80 + +#define AA_NEVER 0 +#define AA_SOMETIMES 1 +#define AA_ALWAYS 2 + +struct brw_wm_prog_key { + unsigned source_depth_reg:3; + unsigned aa_dest_stencil_reg:3; + unsigned dest_depth_reg:3; + unsigned nr_depth_regs:3; + unsigned shadowtex_mask:8; + unsigned computes_depth:1; /* could be derived from program string */ + unsigned source_depth_to_render_target:1; + unsigned runtime_check_aads_emit:1; + + unsigned yuvtex_mask:8; + + unsigned program_string_id; +}; + + + + + +#define PROGRAM_INTERNAL_PARAM +#define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */ +#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + PIPE_MAX_ATTRIBS + 3) +#define BRW_WM_MAX_GRF 128 /* hardware limit */ +#define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) +#define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) +#define BRW_WM_MAX_PARAM 256 +#define BRW_WM_MAX_CONST 256 +#define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS + +#define PAYLOAD_DEPTH (PIPE_MAX_ATTRIBS) + +#define MAX_IFSN 32 +#define MAX_LOOP_DEPTH 32 + +struct brw_wm_compile { + struct brw_compile func; + struct brw_wm_prog_key key; + struct brw_wm_prog_data prog_data; /* result */ + + struct brw_fragment_program *fp; + + unsigned grf_limit; + unsigned max_wm_grf; + + + struct brw_reg pixel_xy[2]; + struct brw_reg delta_xy[2]; + struct brw_reg pixel_w; + + + struct brw_reg wm_regs[8][32][4]; + + struct brw_reg payload_depth[4]; + struct brw_reg payload_coef[16]; + + struct brw_reg emit_mask_reg; + + struct brw_instruction *if_inst[MAX_IFSN]; + int if_insn; + + struct brw_instruction *loop_inst[MAX_LOOP_DEPTH]; + int loop_insn; + + struct brw_instruction *inst0; + struct brw_instruction *inst1; + + struct brw_reg stack; + struct brw_indirect stack_index; + + unsigned reg_index; + + unsigned tmp_start; + unsigned tmp_index; +}; + + + +void brw_wm_lookup_iz( unsigned line_aa, + unsigned lookup, + struct brw_wm_prog_key *key ); + +void brw_wm_glsl_emit(struct brw_wm_compile *c); +void brw_wm_emit_decls(struct brw_wm_compile *c); + +#endif diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c new file mode 100644 index 0000000000..d50e66f613 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -0,0 +1,392 @@ + +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ + c->tmp_index++; + c->reg_index = MAX2(c->reg_index, c->tmp_start + c->tmp_index); + return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); +} + +static void release_tmps(struct brw_wm_compile *c) +{ + c->tmp_index = 0; +} + + + +static int is_null( struct brw_reg reg ) +{ + return (reg.file == BRW_ARCHITECTURE_REGISTER_FILE && + reg.nr == BRW_ARF_NULL); +} + +static void emit_pixel_xy( struct brw_wm_compile *c ) +{ + if (is_null(c->pixel_xy[0])) { + + struct brw_compile *p = &c->func; + struct brw_reg r1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); + + c->pixel_xy[0] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); + c->pixel_xy[1] = vec8(retype(alloc_tmp(c), BRW_REGISTER_TYPE_UW)); + + /* Calculate pixel centers by adding 1 or 0 to each of the + * micro-tile coordinates passed in r1. + */ + brw_ADD(p, + c->pixel_xy[0], + stride(suboffset(r1_uw, 4), 2, 4, 0), + brw_imm_v(0x10101010)); + + brw_ADD(p, + c->pixel_xy[1], + stride(suboffset(r1_uw, 5), 2, 4, 0), + brw_imm_v(0x11001100)); + } +} + + + + + + +static void emit_delta_xy( struct brw_wm_compile *c ) +{ + if (is_null(c->delta_xy[0])) { + struct brw_compile *p = &c->func; + struct brw_reg r1 = brw_vec1_grf(1, 0); + + emit_pixel_xy(c); + + c->delta_xy[0] = alloc_tmp(c); + c->delta_xy[1] = alloc_tmp(c); + + /* Calc delta X,Y by subtracting origin in r1 from the pixel + * centers. + */ + brw_ADD(p, + c->delta_xy[0], + retype(c->pixel_xy[0], BRW_REGISTER_TYPE_UW), + negate(r1)); + + brw_ADD(p, + c->delta_xy[1], + retype(c->pixel_xy[1], BRW_REGISTER_TYPE_UW), + negate(suboffset(r1,1))); + } +} + + + +#if 0 +static void emit_pixel_w( struct brw_wm_compile *c ) +{ + if (is_null(c->pixel_w)) { + struct brw_compile *p = &c->func; + + struct brw_reg interp_wpos = c->coef_wpos; + + c->pixel_w = alloc_tmp(c); + + emit_delta_xy(c); + + /* Calc 1/w - just linterp wpos[3] optimized by putting the + * result straight into a message reg. + */ + struct brw_reg interp3 = brw_vec1_grf(interp_wpos.nr+1, 4); + brw_LINE(p, brw_null_reg(), interp3, c->delta_xy[0]); + brw_MAC(p, brw_message_reg(2), suboffset(interp3, 1), c->delta_xy[1]); + + /* Calc w */ + brw_math_16( p, + c->pixel_w, + BRW_MATH_FUNCTION_INV, + BRW_MATH_SATURATE_NONE, + 2, + brw_null_reg(), + BRW_MATH_PRECISION_FULL); + } +} +#endif + + +static void emit_cinterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i]; + brw_MOV(p, dst, suboffset(interp[i],3)); + } + } +} + +static void emit_linterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + emit_delta_xy(c); + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + struct brw_reg dst = c->wm_regs[TGSI_FILE_INPUT][idx][i]; + brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); + brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); + } + } +} + +#if 0 +static void emit_pinterp(struct brw_wm_compile *c, + int idx, + int mask ) +{ + struct brw_compile *p = &c->func; + struct brw_reg interp[4]; + struct brw_reg coef = c->payload_coef[idx]; + int i; + + get_delta_xy(c); + get_pixel_w(c); + + interp[0] = brw_vec1_grf(coef.nr, 0); + interp[1] = brw_vec1_grf(coef.nr, 4); + interp[2] = brw_vec1_grf(coef.nr+1, 0); + interp[3] = brw_vec1_grf(coef.nr+1, 4); + + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + struct brw_reg dst = allocate_reg(c, TGSI_FILE_INPUT, idx, i); + brw_LINE(p, brw_null_reg(), interp[i], c->delta_xy[0]); + brw_MAC(p, dst, suboffset(interp[i],1), c->delta_xy[1]); + brw_MUL(p, dst, dst, c->pixel_w); + } + } +} +#endif + + + +#if 0 +static void emit_wpos( ) +{ + struct prog_dst_register dst = dst_reg(PROGRAM_INPUT, idx); + struct tgsi_full_src_register interp = src_reg(PROGRAM_PAYLOAD, idx); + struct tgsi_full_src_register deltas = get_delta_xy(c); + struct tgsi_full_src_register arg2; + unsigned opcode; + + opcode = WM_LINTERP; + arg2 = src_undef(); + + /* Have to treat wpos.xy specially: + */ + emit_op(c, + WM_WPOSXY, + dst_mask(dst, WRITEMASK_XY), + 0, 0, 0, + get_pixel_xy(c), + src_undef(), + src_undef()); + + dst = dst_mask(dst, WRITEMASK_ZW); + + /* PROGRAM_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw + */ + emit_op(c, + WM_LINTERP, + dst, + 0, 0, 0, + interp, + deltas, + arg2); +} +#endif + + + + +/* Perform register allocation: + * + * -- r0??? + * -- passthrough depth regs (and stencil/aa??) + * -- curbe ?? + * -- inputs (coefficients) + * + * Use a totally static register allocation. This will perform poorly + * but is an easy way to get started (again). + */ +static void prealloc_reg(struct brw_wm_compile *c) +{ + int i, j; + int nr_curbe_regs = 0; + + /* R0, then some depth related regs: + */ + for (i = 0; i < c->key.nr_depth_regs; i++) { + c->payload_depth[i] = brw_vec8_grf(i*2, 0); + c->reg_index += 2; + } + + + /* Then a copy of our part of the CURBE entry: + */ + { + int nr_constants = c->fp->info.file_max[TGSI_FILE_CONSTANT] + 1; + int index = 0; + + /* XXX number of constants, or highest numbered constant? */ + assert(nr_constants == c->fp->info.file_count[TGSI_FILE_CONSTANT]); + + c->prog_data.max_const = 4*nr_constants; + for (i = 0; i < nr_constants; i++) { + for (j = 0; j < 4; j++, index++) + c->wm_regs[TGSI_FILE_CONSTANT][i][j] = brw_vec1_grf(c->reg_index + index/8, + index%8); + } + + nr_curbe_regs = 2*((4*nr_constants+15)/16); + c->reg_index += nr_curbe_regs; + } + + /* Adjust for parameter coefficients for position, which are + * currently always provided. + */ +// c->position_coef[i] = brw_vec8_grf(c->reg_index, 0); + c->reg_index += 2; + + /* Next we receive the plane coefficients for parameter + * interpolation: + */ + assert(c->fp->info.file_max[TGSI_FILE_INPUT] == c->fp->info.num_inputs); + for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) { + c->payload_coef[i] = brw_vec8_grf(c->reg_index, 0); + c->reg_index += 2; + } + + c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; + c->prog_data.urb_read_length = (c->fp->info.num_inputs + 1) * 2; + c->prog_data.curb_read_length = nr_curbe_regs; + + /* That's the end of the payload, now we can start allocating registers. + */ + c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index++; + + c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, c->reg_index, 0); + c->reg_index += 2; + + /* Now allocate room for the interpolated inputs and staging + * registers for the outputs: + */ + /* XXX do we want to loop over the _number_ of inputs/outputs or loop + * to the highest input/output index that's used? + * Probably the same, actually. + */ + assert(c->fp->info.file_max[TGSI_FILE_INPUT] + 1 == c->fp->info.num_inputs); + assert(c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1 == c->fp->info.num_outputs); + for (i = 0; i < c->fp->info.file_max[TGSI_FILE_INPUT] + 1; i++) + for (j = 0; j < 4; j++) + c->wm_regs[TGSI_FILE_INPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); + + for (i = 0; i < c->fp->info.file_max[TGSI_FILE_OUTPUT] + 1; i++) + for (j = 0; j < 4; j++) + c->wm_regs[TGSI_FILE_OUTPUT][i][j] = brw_vec8_grf( c->reg_index++, 0 ); + + /* Beyond this we should only need registers for internal temporaries: + */ + c->tmp_start = c->reg_index; +} + + + + + +/* Need to interpolate fragment program inputs in as a preamble to the + * shader. A more sophisticated compiler would do this on demand, but + * we'll do it up front: + */ +void brw_wm_emit_decls(struct brw_wm_compile *c) +{ + struct tgsi_parse_context parse; + int done = 0; + + prealloc_reg(c); + + tgsi_parse_init( &parse, c->fp->program.tokens ); + + while( !done && + !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; + unsigned first = decl->DeclarationRange.First; + unsigned last = decl->DeclarationRange.Last; + unsigned mask = decl->Declaration.UsageMask; /* ? */ + unsigned i; + + if (decl->Declaration.File != TGSI_FILE_INPUT) + break; + + for( i = first; i <= last; i++ ) { + switch (decl->Declaration.Interpolate) { + case TGSI_INTERPOLATE_CONSTANT: + emit_cinterp(c, i, mask); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_linterp(c, i, mask); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + //emit_pinterp(c, i, mask); + emit_linterp(c, i, mask); + break; + } + } + break; + } + case TGSI_TOKEN_TYPE_IMMEDIATE: + case TGSI_TOKEN_TYPE_INSTRUCTION: + default: + done = 1; + break; + } + } + + tgsi_parse_free (&parse); + + release_tmps(c); +} diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c new file mode 100644 index 0000000000..ab6410aa60 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -0,0 +1,1076 @@ + +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + + + +static int get_scalar_dst_index(struct tgsi_full_instruction *inst) +{ + struct tgsi_dst_register dst = inst->FullDstRegisters[0].DstRegister; + int i; + for (i = 0; i < 4; i++) + if (dst.WriteMask & (1<<i)) + break; + return i; +} + +static struct brw_reg alloc_tmp(struct brw_wm_compile *c) +{ + c->tmp_index++; + c->reg_index = MAX2(c->reg_index, c->tmp_index); + return brw_vec8_grf(c->tmp_start + c->tmp_index, 0); +} + +static void release_tmps(struct brw_wm_compile *c) +{ + c->tmp_index = 0; +} + + +static struct brw_reg +get_reg(struct brw_wm_compile *c, int file, int index, int component ) +{ + switch (file) { + case TGSI_FILE_NULL: + return brw_null_reg(); + + case TGSI_FILE_SAMPLER: + /* Should never get here: + */ + assert (0); + return brw_null_reg(); + + case TGSI_FILE_IMMEDIATE: + /* These need a different path: + */ + assert(0); + return brw_null_reg(); + + + case TGSI_FILE_CONSTANT: + case TGSI_FILE_INPUT: + case TGSI_FILE_OUTPUT: + case TGSI_FILE_TEMPORARY: + case TGSI_FILE_ADDRESS: + return c->wm_regs[file][index][component]; + + default: + assert(0); + return brw_null_reg(); + } +} + + +static struct brw_reg get_dst_reg(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + int component) +{ + return get_reg(c, + inst->FullDstRegisters[0].DstRegister.File, + inst->FullDstRegisters[0].DstRegister.Index, + component); +} + +static int get_swz( struct tgsi_src_register src, int index ) +{ + switch (index & 3) { + case 0: return src.SwizzleX; + case 1: return src.SwizzleY; + case 2: return src.SwizzleZ; + case 3: return src.SwizzleW; + default: return 0; + } +} + +static int get_ext_swz( struct tgsi_src_register_ext_swz src, int index ) +{ + switch (index & 3) { + case 0: return src.ExtSwizzleX; + case 1: return src.ExtSwizzleY; + case 2: return src.ExtSwizzleZ; + case 3: return src.ExtSwizzleW; + default: return 0; + } +} + +static struct brw_reg get_src_reg(struct brw_wm_compile *c, + struct tgsi_full_src_register *src, + int index) +{ + struct brw_reg reg; + int component = index; + int neg = 0; + int abs = 0; + + if (src->SrcRegister.Negate) + neg = 1; + + component = get_swz(src->SrcRegister, component); + + /* Yes, there are multiple negates: + */ + switch (component & 3) { + case 0: neg ^= src->SrcRegisterExtSwz.NegateX; break; + case 1: neg ^= src->SrcRegisterExtSwz.NegateY; break; + case 2: neg ^= src->SrcRegisterExtSwz.NegateZ; break; + case 3: neg ^= src->SrcRegisterExtSwz.NegateW; break; + } + + /* And multiple swizzles, fun isn't it: + */ + component = get_ext_swz(src->SrcRegisterExtSwz, component); + + /* Not handling indirect lookups yet: + */ + assert(src->SrcRegister.Indirect == 0); + + /* Don't know what dimension means: + */ + assert(src->SrcRegister.Dimension == 0); + + /* Will never handle any of this stuff: + */ + assert(src->SrcRegisterExtMod.Complement == 0); + assert(src->SrcRegisterExtMod.Bias == 0); + assert(src->SrcRegisterExtMod.Scale2X == 0); + + if (src->SrcRegisterExtMod.Absolute) + abs = 1; + + /* Another negate! This is a post-absolute negate, which we + * can't do. Need to clean the crap out of tgsi somehow. + */ + assert(src->SrcRegisterExtMod.Negate == 0); + + switch( component ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + reg = get_reg(c, + src->SrcRegister.File, + src->SrcRegister.Index, + component ); + + if (neg) + reg = negate(reg); + + if (abs) + reg = brw_abs(reg); + + break; + + /* XXX: this won't really work in the general case, but we know + * that the extended swizzle is only allowed in the SWZ + * instruction (right??), in which case using an immediate + * directly will work. + */ + case TGSI_EXTSWIZZLE_ZERO: + reg = brw_imm_f(0); + break; + + case TGSI_EXTSWIZZLE_ONE: + if (neg && !abs) + reg = brw_imm_f(-1.0); + else + reg = brw_imm_f(1.0); + break; + + default: + assert(0); + break; + } + + + return reg; +} + +static void emit_abs( struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + + int i; + struct brw_compile *p = &c->func; + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + struct brw_reg src, dst; + dst = get_dst_reg(c, inst, i); + src = get_src_reg(c, &inst->FullSrcRegisters[0], i); + brw_MOV(p, dst, brw_abs(src)); /* NOTE */ + } + } + brw_set_saturate(p, 0); +} + + +static void emit_xpd(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + int i; + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + for (i = 0; i < 4; i++) { + unsigned i2 = (i+2)%3; + unsigned i1 = (i+1)%3; + if (mask & (1<<i)) { + struct brw_reg src0, src1, dst; + dst = get_dst_reg(c, inst, i); + src0 = negate(get_src_reg(c, &inst->FullSrcRegisters[0], i2)); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i1); + brw_MUL(p, brw_null_reg(), src0, src1); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i1); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i2); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + brw_MAC(p, dst, src0, src1); + brw_set_saturate(p, 0); + } + } + brw_set_saturate(p, 0); +} + +static void emit_dp3(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_reg src0[3], src1[3], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 3; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, 0); +} + +static void emit_dp4(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, brw_null_reg(), src0[2], src1[2]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_dph(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_reg src0[4], src1[4], dst; + int i; + struct brw_compile *p = &c->func; + for (i = 0; i < 4; i++) { + src0[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1[i] = get_src_reg(c, &inst->FullSrcRegisters[1], i); + } + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MUL(p, brw_null_reg(), src0[0], src1[0]); + brw_MAC(p, brw_null_reg(), src0[1], src1[1]); + brw_MAC(p, dst, src0[2], src1[2]); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_ADD(p, dst, src0[3], src1[3]); + brw_set_saturate(p, 0); +} + +static void emit_math1(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, unsigned func) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + brw_MOV(p, brw_message_reg(2), src0); + brw_math(p, + dst, + func, + ((inst->Instruction.Saturate != TGSI_SAT_NONE) + ? BRW_MATH_SATURATE_SATURATE + : BRW_MATH_SATURATE_NONE), + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + + +static void emit_alu2(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + unsigned opcode) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, src1, dst; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + int i; + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_alu2(p, opcode, dst, src0, src1); + } + } + brw_set_saturate(p, 0); +} + + +static void emit_alu1(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, + unsigned opcode) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, dst; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + int i; + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + for (i = 0 ; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + brw_alu1(p, opcode, dst, src0); + } + } + if (inst->Instruction.Saturate != TGSI_SAT_NONE) + brw_set_saturate(p, 0); +} + + +static void emit_max(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src0, src1); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); +} + +static void emit_min(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg src0, src1, dst; + int i; + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MOV(p, dst, src0); + brw_set_saturate(p, 0); + + brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, src1, src0); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, src1); + brw_set_saturate(p, 0); + brw_set_predicate_control_flag_value(p, 0xff); + } + } + brw_pop_insn_state(p); +} + +static void emit_pow(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst, src0, src1; + dst = get_dst_reg(c, inst, get_scalar_dst_index(inst)); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], 0); + + brw_MOV(p, brw_message_reg(2), src0); + brw_MOV(p, brw_message_reg(3), src1); + + brw_math(p, + dst, + BRW_MATH_FUNCTION_POW, + (inst->Instruction.Saturate != TGSI_SAT_NONE + ? BRW_MATH_SATURATE_SATURATE + : BRW_MATH_SATURATE_NONE), + 2, + brw_null_reg(), + BRW_MATH_DATA_VECTOR, + BRW_MATH_PRECISION_FULL); +} + +static void emit_lrp(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, tmp1, tmp2, src0, src1, src2; + int i; + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + + if (src1.nr == dst.nr) { + tmp1 = alloc_tmp(c); + brw_MOV(p, tmp1, src1); + } else + tmp1 = src1; + + src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); + if (src2.nr == dst.nr) { + tmp2 = alloc_tmp(c); + brw_MOV(p, tmp2, src2); + } else + tmp2 = src2; + + brw_ADD(p, dst, negate(src0), brw_imm_f(1.0)); + brw_MUL(p, brw_null_reg(), dst, tmp2); + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_MAC(p, dst, src0, tmp1); + brw_set_saturate(p, 0); + } + release_tmps(c); + } +} + +static void emit_kil(struct brw_wm_compile *c) +{ + struct brw_compile *p = &c->func; + struct brw_reg depth = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); //IMASK + brw_AND(p, depth, c->emit_mask_reg, depth); + brw_pop_insn_state(p); +} + +static void emit_mad(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, src0, src1, src2; + int i; + + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + src2 = get_src_reg(c, &inst->FullSrcRegisters[2], i); + brw_MUL(p, dst, src0, src1); + + brw_set_saturate(p, (inst->Instruction.Saturate != TGSI_SAT_NONE) ? 1 : 0); + brw_ADD(p, dst, dst, src2); + brw_set_saturate(p, 0); + } + } +} + +static void emit_sop(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst, unsigned cond) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg dst, src0, src1; + int i; + + brw_push_insn_state(p); + for (i = 0; i < 4; i++) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], i); + src1 = get_src_reg(c, &inst->FullSrcRegisters[1], i); + brw_CMP(p, brw_null_reg(), cond, src0, src1); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_MOV(p, dst, brw_imm_f(0.0)); + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + brw_MOV(p, dst, brw_imm_f(1.0)); + } + } + brw_pop_insn_state(p); +} + + +static void emit_ddx(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + unsigned nr, i; + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); + nr = src0.nr; + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, interp[i]); + brw_MUL(p, dst, dst, w); + } + } + brw_set_saturate(p, 0); +} + +static void emit_ddy(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + unsigned mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + struct brw_reg interp[4]; + struct brw_reg dst; + struct brw_reg src0, w; + unsigned nr, i; + + src0 = get_src_reg(c, &inst->FullSrcRegisters[0], 0); + nr = src0.nr; + w = get_src_reg(c, &inst->FullSrcRegisters[1], 3); + interp[0] = brw_vec1_grf(nr, 0); + interp[1] = brw_vec1_grf(nr, 4); + interp[2] = brw_vec1_grf(nr+1, 0); + interp[3] = brw_vec1_grf(nr+1, 4); + brw_set_saturate(p, inst->Instruction.Saturate != TGSI_SAT_NONE); + for(i = 0; i < 4; i++ ) { + if (mask & (1<<i)) { + dst = get_dst_reg(c, inst, i); + brw_MOV(p, dst, suboffset(interp[i], 1)); + brw_MUL(p, dst, dst, w); + } + } + brw_set_saturate(p, 0); +} + +/* TODO + BIAS on SIMD8 not workind yet... +*/ +static void emit_txb(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg payload_reg = c->payload_depth[0]; + struct brw_reg dst[4], src[4]; + unsigned i; + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + +#if 0 + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); + break; + default: + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), src[2]); + break; + } +#else + brw_MOV(p, brw_message_reg(2), src[0]); + brw_MOV(p, brw_message_reg(3), src[1]); + brw_MOV(p, brw_message_reg(4), brw_imm_f(0)); +#endif + + brw_MOV(p, brw_message_reg(5), src[3]); + brw_MOV(p, brw_message_reg(6), brw_imm_f(0)); + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + inst->TexSrcUnit + 1, /* surface */ + inst->TexSrcUnit, /* sampler */ + inst->FullDstRegisters[0].DstRegister.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS, + 4, + 4, + 0); +#endif +} + +static void emit_tex(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ +#if 0 + struct brw_compile *p = &c->func; + struct brw_reg payload_reg = c->payload_depth[0]; + struct brw_reg dst[4], src[4]; + unsigned msg_len; + unsigned i, nr; + unsigned emit; + boolean shadow = (c->key.shadowtex_mask & (1<<inst->TexSrcUnit)) ? 1 : 0; + + for (i = 0; i < 4; i++) + dst[i] = get_dst_reg(c, inst, i); + for (i = 0; i < 4; i++) + src[i] = get_src_reg(c, &inst->FullSrcRegisters[0], i); + +#if 0 + switch (inst->TexSrcTarget) { + case TEXTURE_1D_INDEX: + emit = WRITEMASK_X; + nr = 1; + break; + case TEXTURE_2D_INDEX: + case TEXTURE_RECT_INDEX: + emit = WRITEMASK_XY; + nr = 2; + break; + default: + emit = WRITEMASK_XYZ; + nr = 3; + break; + } +#else + emit = WRITEMASK_XY; + nr = 2; +#endif + + msg_len = 1; + + for (i = 0; i < nr; i++) { + static const unsigned swz[4] = {0,1,2,2}; + if (emit & (1<<i)) + brw_MOV(p, brw_message_reg(msg_len+1), src[swz[i]]); + else + brw_MOV(p, brw_message_reg(msg_len+1), brw_imm_f(0)); + msg_len += 1; + } + + if (shadow) { + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); + brw_MOV(p, brw_message_reg(6), src[2]); + } + + brw_SAMPLE(p, + retype(vec8(dst[0]), BRW_REGISTER_TYPE_UW), + 1, + retype(payload_reg, BRW_REGISTER_TYPE_UW), + inst->TexSrcUnit + 1, /* surface */ + inst->TexSrcUnit, /* sampler */ + inst->FullDstRegisters[0].DstRegister.WriteMask, + BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE, + 4, + shadow ? 6 : 4, + 0); + + if (shadow) + brw_MOV(p, dst[3], brw_imm_f(1.0)); +#endif +} + + + + + + + + +static void emit_fb_write(struct brw_wm_compile *c, + struct tgsi_full_instruction *inst) +{ + struct brw_compile *p = &c->func; + int nr = 2; + int channel; + int base_reg = 0; + + // src0 = output color + // src1 = payload_depth[0] + // src2 = output depth + // dst = ??? + + + + /* Reserve a space for AA - may not be needed: + */ + if (c->key.aa_dest_stencil_reg) + nr += 1; + + { + brw_push_insn_state(p); + for (channel = 0; channel < 4; channel++) { + struct brw_reg src0 = c->wm_regs[TGSI_FILE_OUTPUT][0][channel]; + + /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */ + /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */ + brw_MOV(p, brw_message_reg(nr + channel), src0); + } + /* skip over the regs populated above: */ + nr += 8; + brw_pop_insn_state(p); + } + + + /* Pass through control information: + */ + /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ + { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ + brw_MOV(p, + brw_message_reg(base_reg + 1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + } + + /* Send framebuffer write message: */ + brw_fb_WRITE(p, + retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), + base_reg, + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + 0, /* render surface always 0 */ + nr, + 0, + 1); + +} + + +static void brw_wm_emit_instruction( struct brw_wm_compile *c, + struct tgsi_full_instruction *inst ) +{ + struct brw_compile *p = &c->func; + +#if 0 + if (inst->CondUpdate) + brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); + else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); +#else + brw_set_conditionalmod(p, BRW_CONDITIONAL_NONE); +#endif + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + emit_abs(c, inst); + break; + case TGSI_OPCODE_ADD: + emit_alu2(c, inst, BRW_OPCODE_ADD); + break; + case TGSI_OPCODE_SUB: + assert(0); +// emit_alu2(c, inst, BRW_OPCODE_SUB); + break; + case TGSI_OPCODE_FRC: + emit_alu1(c, inst, BRW_OPCODE_FRC); + break; + case TGSI_OPCODE_FLR: + assert(0); +// emit_alu1(c, inst, BRW_OPCODE_FLR); + break; + case TGSI_OPCODE_LRP: + emit_lrp(c, inst); + break; + case TGSI_OPCODE_INT: + emit_alu1(c, inst, BRW_OPCODE_RNDD); + break; + case TGSI_OPCODE_MOV: + emit_alu1(c, inst, BRW_OPCODE_MOV); + break; + case TGSI_OPCODE_DP3: + emit_dp3(c, inst); + break; + case TGSI_OPCODE_DP4: + emit_dp4(c, inst); + break; + case TGSI_OPCODE_XPD: + emit_xpd(c, inst); + break; + case TGSI_OPCODE_DPH: + emit_dph(c, inst); + break; + case TGSI_OPCODE_RCP: + emit_math1(c, inst, BRW_MATH_FUNCTION_INV); + break; + case TGSI_OPCODE_RSQ: + emit_math1(c, inst, BRW_MATH_FUNCTION_RSQ); + break; + case TGSI_OPCODE_SIN: + emit_math1(c, inst, BRW_MATH_FUNCTION_SIN); + break; + case TGSI_OPCODE_COS: + emit_math1(c, inst, BRW_MATH_FUNCTION_COS); + break; + case TGSI_OPCODE_EX2: + emit_math1(c, inst, BRW_MATH_FUNCTION_EXP); + break; + case TGSI_OPCODE_LG2: + emit_math1(c, inst, BRW_MATH_FUNCTION_LOG); + break; + case TGSI_OPCODE_MAX: + emit_max(c, inst); + break; + case TGSI_OPCODE_MIN: + emit_min(c, inst); + break; + case TGSI_OPCODE_DDX: + emit_ddx(c, inst); + break; + case TGSI_OPCODE_DDY: + emit_ddy(c, inst); + break; + case TGSI_OPCODE_SLT: + emit_sop(c, inst, BRW_CONDITIONAL_L); + break; + case TGSI_OPCODE_SLE: + emit_sop(c, inst, BRW_CONDITIONAL_LE); + break; + case TGSI_OPCODE_SGT: + emit_sop(c, inst, BRW_CONDITIONAL_G); + break; + case TGSI_OPCODE_SGE: + emit_sop(c, inst, BRW_CONDITIONAL_GE); + break; + case TGSI_OPCODE_SEQ: + emit_sop(c, inst, BRW_CONDITIONAL_EQ); + break; + case TGSI_OPCODE_SNE: + emit_sop(c, inst, BRW_CONDITIONAL_NEQ); + break; + case TGSI_OPCODE_MUL: + emit_alu2(c, inst, BRW_OPCODE_MUL); + break; + case TGSI_OPCODE_POW: + emit_pow(c, inst); + break; + case TGSI_OPCODE_MAD: + emit_mad(c, inst); + break; + case TGSI_OPCODE_TEX: + emit_tex(c, inst); + break; + case TGSI_OPCODE_TXB: + emit_txb(c, inst); + break; + case TGSI_OPCODE_TEXKILL: + emit_kil(c); + break; + case TGSI_OPCODE_IF: + assert(c->if_insn < MAX_IFSN); + c->if_inst[c->if_insn++] = brw_IF(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_ELSE: + c->if_inst[c->if_insn-1] = brw_ELSE(p, c->if_inst[c->if_insn-1]); + break; + case TGSI_OPCODE_ENDIF: + assert(c->if_insn > 0); + brw_ENDIF(p, c->if_inst[--c->if_insn]); + break; + case TGSI_OPCODE_BGNSUB: + case TGSI_OPCODE_ENDSUB: + break; + case TGSI_OPCODE_CAL: + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_ADD(p, deref_1ud(c->stack_index, 0), brw_ip_reg(), brw_imm_d(3*16)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_ADD(p, + get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(4)); +// orig_inst = inst->Data; +// orig_inst->Data = &p->store[p->nr_insn]; + assert(0); + brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16)); + brw_pop_insn_state(p); + break; + + case TGSI_OPCODE_RET: +#if 0 + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_ADD(p, + get_addr_reg(c->stack_index), + get_addr_reg(c->stack_index), brw_imm_d(-4)); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_MOV(p, brw_ip_reg(), deref_1ud(c->stack_index, 0)); + brw_set_access_mode(p, BRW_ALIGN_16); + brw_pop_insn_state(p); +#else + emit_fb_write(c, inst); +#endif + + break; + case TGSI_OPCODE_LOOP: + c->loop_inst[c->loop_insn++] = brw_DO(p, BRW_EXECUTE_8); + break; + case TGSI_OPCODE_BRK: + brw_BREAK(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_CONT: + brw_CONT(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + break; + case TGSI_OPCODE_ENDLOOP: + c->loop_insn--; + c->inst0 = c->inst1 = brw_WHILE(p, c->loop_inst[c->loop_insn]); + /* patch all the BREAK instructions from + last BEGINLOOP */ + while (c->inst0 > c->loop_inst[c->loop_insn]) { + c->inst0--; + if (c->inst0->header.opcode == BRW_OPCODE_BREAK) { + c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0 + 1; + c->inst0->bits3.if_else.pop_count = 0; + } else if (c->inst0->header.opcode == BRW_OPCODE_CONTINUE) { + c->inst0->bits3.if_else.jump_count = c->inst1 - c->inst0; + c->inst0->bits3.if_else.pop_count = 0; + } + } + break; + case TGSI_OPCODE_END: + emit_fb_write(c, inst); + break; + + default: + debug_printf("unsupported IR in fragment shader %d\n", + inst->Instruction.Opcode); + } +#if 0 + if (inst->CondUpdate) + brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); + else + brw_set_predicate_control(p, BRW_PREDICATE_NONE); +#endif +} + + + + + + +void brw_wm_glsl_emit(struct brw_wm_compile *c) +{ + struct tgsi_parse_context parse; + struct brw_compile *p = &c->func; + + brw_init_compile(&c->func); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + + c->reg_index = 0; + c->if_insn = 0; + c->loop_insn = 0; + c->stack_index = brw_indirect(0,0); + + /* Do static register allocation and parameter interpolation: + */ + brw_wm_emit_decls( c ); + + /* Emit the actual program. All done with very direct translation, + * hopefully we can improve on this shortly... + */ + brw_MOV(p, get_addr_reg(c->stack_index), brw_address(c->stack)); + + tgsi_parse_init( &parse, c->fp->program.tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) + { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* already done */ + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* not handled yet */ + assert(0); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + brw_wm_emit_instruction(c, &parse.FullToken.FullInstruction); + break; + + default: + assert( 0 ); + } + } + + tgsi_parse_free (&parse); + + /* Fix up call targets: + */ +#if 0 + { + unsigned nr_insns = c->fp->program.Base.NumInstructions; + unsigned insn, target_insn; + struct tgsi_full_instruction *inst1, *inst2; + struct brw_instruction *brw_inst1, *brw_inst2; + int offset; + for (insn = 0; insn < nr_insns; insn++) { + inst1 = &c->fp->program.Base.Instructions[insn]; + brw_inst1 = inst1->Data; + switch (inst1->Opcode) { + case TGSI_OPCODE_CAL: + target_insn = inst1->BranchTarget; + inst2 = &c->fp->program.Base.Instructions[target_insn]; + brw_inst2 = inst2->Data; + offset = brw_inst2 - brw_inst1; + brw_set_src1(brw_inst1, brw_imm_d(offset*16)); + break; + default: + break; + } + } + } +#endif + + c->prog_data.total_grf = c->reg_index; + c->prog_data.total_scratch = 0; +} diff --git a/src/gallium/drivers/i965simple/brw_wm_iz.c b/src/gallium/drivers/i965simple/brw_wm_iz.c new file mode 100644 index 0000000000..6c5f25bf39 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_iz.c @@ -0,0 +1,214 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_wm.h" + + +#undef P /* prompted depth */ +#undef C /* computed */ +#undef N /* non-promoted? */ + +#define P 0 +#define C 1 +#define N 2 + +const struct { + unsigned mode:2; + unsigned sd_present:1; + unsigned sd_to_rt:1; + unsigned dd_present:1; + unsigned ds_present:1; +} wm_iz_table[IZ_BIT_MAX] = +{ + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 1, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 0, 1, 0, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 1, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 0, 0, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 0, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { N, 0, 1, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 0 }, + { C, 0, 1, 1, 0 }, + { C, 0, 1, 1, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { N, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { N, 1, 1, 0, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 0, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { C, 0, 1, 0, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { P, 0, 0, 0, 0 }, + { C, 1, 1, 1, 1 }, + { C, 0, 1, 1, 1 }, + { C, 0, 1, 1, 1 } +}; + +void brw_wm_lookup_iz( unsigned line_aa, + unsigned lookup, + struct brw_wm_prog_key *key ) +{ + unsigned reg = 2; + + assert (lookup < IZ_BIT_MAX); + + if (lookup & IZ_PS_COMPUTES_DEPTH_BIT) + key->computes_depth = 1; + + if (wm_iz_table[lookup].sd_present) { + key->source_depth_reg = reg; + reg += 2; + } + + if (wm_iz_table[lookup].sd_to_rt) + key->source_depth_to_render_target = 1; + + if (wm_iz_table[lookup].ds_present || line_aa != AA_NEVER) { + key->aa_dest_stencil_reg = reg; + key->runtime_check_aads_emit = (!wm_iz_table[lookup].ds_present && + line_aa == AA_SOMETIMES); + reg++; + } + + if (wm_iz_table[lookup].dd_present) { + key->dest_depth_reg = reg; + reg+=2; + } + + key->nr_depth_regs = (reg+1)/2; +} + diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c new file mode 100644 index 0000000000..52b2909a65 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c @@ -0,0 +1,275 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +#include "util/u_math.h" +#include "util/u_memory.h" + + +#define COMPAREFUNC_ALWAYS 0 +#define COMPAREFUNC_NEVER 0x1 +#define COMPAREFUNC_LESS 0x2 +#define COMPAREFUNC_EQUAL 0x3 +#define COMPAREFUNC_LEQUAL 0x4 +#define COMPAREFUNC_GREATER 0x5 +#define COMPAREFUNC_NOTEQUAL 0x6 +#define COMPAREFUNC_GEQUAL 0x7 + +/* Samplers aren't strictly wm state from the hardware's perspective, + * but that is the only situation in which we use them in this driver. + */ + +static int intel_translate_shadow_compare_func(unsigned func) +{ + switch(func) { + case PIPE_FUNC_NEVER: + return COMPAREFUNC_ALWAYS; + case PIPE_FUNC_LESS: + return COMPAREFUNC_LEQUAL; + case PIPE_FUNC_LEQUAL: + return COMPAREFUNC_LESS; + case PIPE_FUNC_GREATER: + return COMPAREFUNC_GEQUAL; + case PIPE_FUNC_GEQUAL: + return COMPAREFUNC_GREATER; + case PIPE_FUNC_NOTEQUAL: + return COMPAREFUNC_EQUAL; + case PIPE_FUNC_EQUAL: + return COMPAREFUNC_NOTEQUAL; + case PIPE_FUNC_ALWAYS: + return COMPAREFUNC_NEVER; + } + + debug_printf("Unknown value in %s: %x\n", __FUNCTION__, func); + return COMPAREFUNC_NEVER; +} + +/* The brw (and related graphics cores) do not support GL_CLAMP. The + * Intel drivers for "other operating systems" implement GL_CLAMP as + * GL_CLAMP_TO_EDGE, so the same is done here. + */ +static unsigned translate_wrap_mode( int wrap ) +{ + switch( wrap ) { + case PIPE_TEX_WRAP_REPEAT: + return BRW_TEXCOORDMODE_WRAP; + case PIPE_TEX_WRAP_CLAMP: + return BRW_TEXCOORDMODE_CLAMP; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return BRW_TEXCOORDMODE_CLAMP; /* conform likes it this way */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return BRW_TEXCOORDMODE_CLAMP_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return BRW_TEXCOORDMODE_MIRROR; + default: + return BRW_TEXCOORDMODE_WRAP; + } +} + + +static unsigned U_FIXED(float value, unsigned frac_bits) +{ + value *= (1<<frac_bits); + return value < 0 ? 0 : value; +} + +static int S_FIXED(float value, unsigned frac_bits) +{ + return value * (1<<frac_bits); +} + + +static unsigned upload_default_color( struct brw_context *brw, + const float *color ) +{ + struct brw_sampler_default_color sdc; + + COPY_4V(sdc.color, color); + + return brw_cache_data( &brw->cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc ); +} + + +/* + */ +static void brw_update_sampler_state( const struct pipe_sampler_state *pipe_sampler, + unsigned sdc_gs_offset, + struct brw_sampler_state *sampler) +{ + memset(sampler, 0, sizeof(*sampler)); + + switch (pipe_sampler->min_mip_filter) { + case PIPE_TEX_FILTER_NEAREST: + sampler->ss0.min_filter = BRW_MAPFILTER_NEAREST; + break; + case PIPE_TEX_FILTER_LINEAR: + sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR; + break; + case PIPE_TEX_FILTER_ANISO: + sampler->ss0.min_filter = BRW_MAPFILTER_ANISOTROPIC; + break; + default: + break; + } + + switch (pipe_sampler->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + sampler->ss0.mip_filter = BRW_MIPFILTER_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + sampler->ss0.mip_filter = BRW_MIPFILTER_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + sampler->ss0.mip_filter = BRW_MIPFILTER_NONE; + break; + default: + break; + } + /* Set Anisotropy: + */ + switch (pipe_sampler->mag_img_filter) { + case PIPE_TEX_FILTER_NEAREST: + sampler->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + break; + case PIPE_TEX_FILTER_LINEAR: + sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + case PIPE_TEX_FILTER_ANISO: + sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + default: + break; + } + + if (pipe_sampler->max_anisotropy > 2.0) { + sampler->ss3.max_aniso = MAX2((pipe_sampler->max_anisotropy - 2) / 2, + BRW_ANISORATIO_16); + } + + sampler->ss1.s_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_s); + sampler->ss1.r_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_r); + sampler->ss1.t_wrap_mode = translate_wrap_mode(pipe_sampler->wrap_t); + + /* Fulsim complains if I don't do this. Hardware doesn't mind: + */ +#if 0 + if (texObj->Target == GL_TEXTURE_CUBE_MAP_ARB) { + sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CUBE; + sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CUBE; + } +#endif + + /* Set shadow function: + */ + if (pipe_sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + /* Shadowing is "enabled" by emitting a particular sampler + * message (sample_c). So need to recompile WM program when + * shadow comparison is enabled on each/any texture unit. + */ + sampler->ss0.shadow_function = intel_translate_shadow_compare_func(pipe_sampler->compare_func); + } + + /* Set LOD bias: + */ + sampler->ss0.lod_bias = S_FIXED(CLAMP(pipe_sampler->lod_bias, -16, 15), 6); + + sampler->ss0.lod_preclamp = 1; /* OpenGL mode */ + sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */ + + /* Set BaseMipLevel, MaxLOD, MinLOD: + * + * XXX: I don't think that using firstLevel, lastLevel works, + * because we always setup the surface state as if firstLevel == + * level zero. Probably have to subtract firstLevel from each of + * these: + */ + sampler->ss0.base_level = U_FIXED(0, 1); + + sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(pipe_sampler->max_lod, 0), 13), 6); + sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(pipe_sampler->min_lod, 0), 13), 6); + + sampler->ss2.default_color_pointer = sdc_gs_offset >> 5; +} + + + +/* All samplers must be uploaded in a single contiguous array, which + * complicates various things. However, this is still too confusing - + * FIXME: simplify all the different new texture state flags. + */ +static void upload_wm_samplers(struct brw_context *brw) +{ + unsigned unit; + unsigned sampler_count = 0; + + /* BRW_NEW_SAMPLER */ + for (unit = 0; unit < brw->num_textures && unit < brw->num_samplers; + unit++) { + /* determine unit enable/disable by looking for a bound texture */ + if (brw->attribs.Texture[unit]) { + const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit]; + unsigned sdc_gs_offset = upload_default_color(brw, sampler->border_color); + + brw_update_sampler_state(sampler, + sdc_gs_offset, + &brw->wm.sampler[unit]); + + sampler_count = unit + 1; + } + } + + if (brw->wm.sampler_count != sampler_count) { + brw->wm.sampler_count = sampler_count; + brw->state.dirty.cache |= CACHE_NEW_SAMPLER; + } + + brw->wm.sampler_gs_offset = 0; + + if (brw->wm.sampler_count) + brw->wm.sampler_gs_offset = + brw_cache_data_sz(&brw->cache[BRW_SAMPLER], + brw->wm.sampler, + sizeof(struct brw_sampler_state) * brw->wm.sampler_count); +} + +const struct brw_tracked_state brw_wm_samplers = { + .dirty = { + .brw = BRW_NEW_SAMPLER, + .cache = 0 + }, + .update = upload_wm_samplers +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm_state.c b/src/gallium/drivers/i965simple/brw_wm_state.c new file mode 100644 index 0000000000..37a9bf919c --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_state.c @@ -0,0 +1,195 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" +#include "brw_wm.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +/*********************************************************************** + * WM unit - fragment programs and rasterization + */ +static void upload_wm_unit(struct brw_context *brw ) +{ + struct brw_wm_unit_state wm; + unsigned max_threads; + unsigned per_thread; + + if (BRW_DEBUG & DEBUG_SINGLE_THREAD) + max_threads = 0; + else + max_threads = 31; + + + memset(&wm, 0, sizeof(wm)); + + /* CACHE_NEW_WM_PROG */ + wm.thread0.grf_reg_count = align(brw->wm.prog_data->total_grf, 16) / 16 - 1; + wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; + wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; + wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; + wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length; + + wm.wm5.max_threads = max_threads; + + per_thread = align(brw->wm.prog_data->total_scratch, 1024); + assert(per_thread <= 12 * 1024); + +#if 0 + if (brw->wm.prog_data->total_scratch) { + unsigned total = per_thread * (max_threads + 1); + + /* Scratch space -- just have to make sure there is sufficient + * allocated for the active program and current number of threads. + */ + brw->wm.scratch_buffer_size = total; + if (brw->wm.scratch_buffer && + brw->wm.scratch_buffer_size > brw->wm.scratch_buffer->size) { + dri_bo_unreference(brw->wm.scratch_buffer); + brw->wm.scratch_buffer = NULL; + } + if (!brw->wm.scratch_buffer) { + brw->wm.scratch_buffer = dri_bo_alloc(intel->intelScreen->bufmgr, + "wm scratch", + brw->wm.scratch_buffer_size, + 4096, DRM_BO_FLAG_MEM_TT); + } + } + /* XXX: Scratch buffers are not implemented correectly. + * + * The scratch offset to be programmed into wm is relative to the general + * state base address. However, using dri_bo_alloc/dri_bo_emit_reloc (or + * the previous bmGenBuffers scheme), we get an offset relative to the + * start of framebuffer. Even before then, it was broken in other ways, + * so just fail for now if we hit that path. + */ + assert(brw->wm.prog_data->total_scratch == 0); +#endif + + /* CACHE_NEW_SURFACE */ + wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; + + /* BRW_NEW_CURBE_OFFSETS */ + wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; + + wm.thread3.urb_entry_read_offset = 0; + wm.thread1.depth_coef_urb_read_offset = 1; + wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + + /* CACHE_NEW_SAMPLER */ + wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4; + wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5; + + /* BRW_NEW_FRAGMENT_PROGRAM */ + { + const struct brw_fragment_program *fp = brw->attribs.FragmentProgram; + + if (fp->UsesDepth) + wm.wm5.program_uses_depth = 1; /* as far as we can tell */ + + if (fp->info.writes_z) + wm.wm5.program_computes_depth = 1; + + /* BRW_NEW_ALPHA_TEST */ + if (fp->info.uses_kill || + brw->attribs.DepthStencil->alpha.enabled) + wm.wm5.program_uses_killpixel = 1; + + wm.wm5.enable_8_pix = 1; + } + + wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ + wm.wm5.legacy_line_rast = 0; + wm.wm5.legacy_global_depth_bias = 0; + wm.wm5.early_depth_test = 1; /* never need to disable */ + wm.wm5.line_aa_region_width = 0; + wm.wm5.line_endcap_aa_region_width = 1; + + /* BRW_NEW_RASTERIZER */ + if (brw->attribs.Raster->poly_stipple_enable) + wm.wm5.polygon_stipple = 1; + +#if 0 + if (brw->attribs.Polygon->OffsetFill) { + wm.wm5.depth_offset = 1; + /* Something wierd going on with legacy_global_depth_bias, + * offset_constant, scaling and MRD. This value passes glean + * but gives some odd results elsewere (eg. the + * quad-offset-units test). + */ + wm.global_depth_offset_constant = brw->attribs.Polygon->OffsetUnits * 2; + + /* This is the only value that passes glean: + */ + wm.global_depth_offset_scale = brw->attribs.Polygon->OffsetFactor; + } +#endif + + if (brw->attribs.Raster->line_stipple_enable) { + wm.wm5.line_stipple = 1; + } + + if (BRW_DEBUG & DEBUG_STATS) + wm.wm4.stats_enable = 1; + + brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); + + if (brw->wm.prog_data->total_scratch) { + /* + dri_emit_reloc(brw->cache[BRW_WM_UNIT].pool->buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + (per_thread / 1024) - 1, + brw->wm.state_gs_offset + + ((char *)&wm.thread2 - (char *)&wm), + brw->wm.scratch_buffer); + */ + } else { + wm.thread2.scratch_space_base_pointer = 0; + } +} + +const struct brw_tracked_state brw_wm_unit = { + .dirty = { + .brw = (BRW_NEW_RASTERIZER | + BRW_NEW_ALPHA_TEST | + BRW_NEW_FS | + BRW_NEW_CURBE_OFFSETS), + + .cache = (CACHE_NEW_SURFACE | + CACHE_NEW_WM_PROG | + CACHE_NEW_SAMPLER) + }, + .update = upload_wm_unit +}; + diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c new file mode 100644 index 0000000000..1a326f9918 --- /dev/null +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -0,0 +1,304 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "brw_context.h" +#include "brw_state.h" +#include "brw_defines.h" + +static unsigned translate_tex_target( enum pipe_texture_target target ) +{ + switch (target) { + case PIPE_TEXTURE_1D: + return BRW_SURFACE_1D; + + case PIPE_TEXTURE_2D: + return BRW_SURFACE_2D; + + case PIPE_TEXTURE_3D: + return BRW_SURFACE_3D; + + case PIPE_TEXTURE_CUBE: + return BRW_SURFACE_CUBE; + + default: + assert(0); + return 0; + } +} + +static unsigned translate_tex_format( enum pipe_format pipe_format ) +{ + switch( pipe_format ) { + case PIPE_FORMAT_L8_UNORM: + return BRW_SURFACEFORMAT_L8_UNORM; + + case PIPE_FORMAT_I8_UNORM: + return BRW_SURFACEFORMAT_I8_UNORM; + + case PIPE_FORMAT_A8_UNORM: + return BRW_SURFACEFORMAT_A8_UNORM; + + case PIPE_FORMAT_A8L8_UNORM: + return BRW_SURFACEFORMAT_L8A8_UNORM; + + case PIPE_FORMAT_R8G8B8_UNORM: + assert(0); /* not supported for sampling */ + return BRW_SURFACEFORMAT_R8G8B8_UNORM; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + + case PIPE_FORMAT_R8G8B8A8_UNORM: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R5G6B5_UNORM: + return BRW_SURFACEFORMAT_B5G6R5_UNORM; + + case PIPE_FORMAT_A1R5G5B5_UNORM: + return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; + + case PIPE_FORMAT_A4R4G4B4_UNORM: + return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; + + case PIPE_FORMAT_YCBCR_REV: + return BRW_SURFACEFORMAT_YCRCB_NORMAL; + + case PIPE_FORMAT_YCBCR: + return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; +#if 0 + case PIPE_FORMAT_RGB_FXT1: + case PIPE_FORMAT_RGBA_FXT1: + return BRW_SURFACEFORMAT_FXT1; +#endif + + case PIPE_FORMAT_Z16_UNORM: + return BRW_SURFACEFORMAT_I16_UNORM; +#if 0 + case PIPE_FORMAT_RGB_DXT1: + return BRW_SURFACEFORMAT_DXT1_RGB; + + case PIPE_FORMAT_RGBA_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM; + + case PIPE_FORMAT_RGBA_DXT3: + return BRW_SURFACEFORMAT_BC2_UNORM; + + case PIPE_FORMAT_RGBA_DXT5: + return BRW_SURFACEFORMAT_BC3_UNORM; + + case PIPE_FORMAT_SRGBA8: + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB; + case PIPE_FORMAT_SRGB_DXT1: + return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; +#endif + + default: + assert(0); + return 0; + } +} + +static unsigned brw_buffer_offset(struct brw_context *brw, + struct pipe_buffer *buffer) +{ + return brw->winsys->get_buffer_offset(brw->winsys, + buffer, + 0); +} + +static +void brw_update_texture_surface( struct brw_context *brw, + unsigned unit ) +{ + const struct brw_texture *tObj = brw->attribs.Texture[unit]; + struct brw_surface_state surf; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = translate_tex_target(tObj->base.target); + surf.ss0.surface_format = translate_tex_format(tObj->base.format); + + /* This is ok for all textures with channel width 8bit or less: + */ +/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + + /* Updated in emit_reloc */ + surf.ss1.base_addr = brw_buffer_offset( brw, tObj->buffer ); + + surf.ss2.mip_count = tObj->base.last_level; + surf.ss2.width = tObj->base.width[0] - 1; + surf.ss2.height = tObj->base.height[0] - 1; + + surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + surf.ss3.tiled_surface = 0; /* always zero */ + surf.ss3.pitch = tObj->stride - 1; + surf.ss3.depth = tObj->base.depth[0] - 1; + + surf.ss4.min_lod = 0; + + if (tObj->base.target == PIPE_TEXTURE_CUBE) { + surf.ss0.cube_pos_x = 1; + surf.ss0.cube_pos_y = 1; + surf.ss0.cube_pos_z = 1; + surf.ss0.cube_neg_x = 1; + surf.ss0.cube_neg_y = 1; + surf.ss0.cube_neg_z = 1; + } + + brw->wm.bind.surf_ss_offset[unit + 1] = + brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); +} + + + +#define OFFSET(TYPE, FIELD) ( (unsigned)&(((TYPE *)0)->FIELD) ) + + +static void upload_wm_surfaces(struct brw_context *brw ) +{ + unsigned i; + + { + struct brw_surface_state surf; + + /* BRW_NEW_FRAMEBUFFER + */ + struct pipe_surface *pipe_surface = brw->attribs.FrameBuffer.cbufs[0];/*fixme*/ + + memset(&surf, 0, sizeof(surf)); + + if (pipe_surface != NULL) { + if (pipe_surface->block.size == 4) + surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + else + surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; + + surf.ss0.surface_type = BRW_SURFACE_2D; + + surf.ss1.base_addr = brw_buffer_offset( brw, pipe_surface->buffer ); + + surf.ss2.width = pipe_surface->width - 1; + surf.ss2.height = pipe_surface->height - 1; + surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; + surf.ss3.tiled_surface = 0; + surf.ss3.pitch = pipe_surface->stride - 1; + } else { + surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + surf.ss0.surface_type = BRW_SURFACE_NULL; + } + + /* BRW_NEW_BLEND */ + surf.ss0.color_blend = (!brw->attribs.Blend->logicop_enable && + brw->attribs.Blend->blend_enable); + + + surf.ss0.writedisable_red = !(brw->attribs.Blend->colormask & PIPE_MASK_R); + surf.ss0.writedisable_green = !(brw->attribs.Blend->colormask & PIPE_MASK_G); + surf.ss0.writedisable_blue = !(brw->attribs.Blend->colormask & PIPE_MASK_B); + surf.ss0.writedisable_alpha = !(brw->attribs.Blend->colormask & PIPE_MASK_A); + + + + + brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); + + brw->wm.nr_surfaces = 1; + } + + + /* BRW_NEW_TEXTURE + */ + for (i = 0; i < brw->num_textures && i < brw->num_samplers; i++) { + const struct brw_texture *texUnit = brw->attribs.Texture[i]; + + if (texUnit && + texUnit->base.refcount/*(texUnit->refcount > 0) == really used */) { + + brw_update_texture_surface(brw, i); + + brw->wm.nr_surfaces = i+2; + } + else { + brw->wm.bind.surf_ss_offset[i+1] = 0; + } + } + + brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND], + &brw->wm.bind ); +} + + +/* KW: Will find a different way to acheive this, see for example the + * state caches with relocs in the i915 swz driver. + */ +#if 0 +static void emit_reloc_wm_surfaces(struct brw_context *brw) +{ + int unit; + + if (brw->state.draw_region != NULL) { + /* Emit framebuffer relocation */ + dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + 0, + brw->wm.bind.surf_ss_offset[0] + + offsetof(struct brw_surface_state, ss1), + brw->state.draw_region->buffer); + } + + /* Emit relocations for texture buffers */ + for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; + struct gl_texture_object *tObj = texUnit->_Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + + if (texUnit->_ReallyEnabled && intelObj->mt != NULL) { + dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + brw->wm.bind.surf_ss_offset[unit + 1] + + offsetof(struct brw_surface_state, ss1), + intelObj->mt->region->buffer); + } + } +} +#endif + +const struct brw_tracked_state brw_wm_surfaces = { + .dirty = { + .brw = (BRW_NEW_FRAMEBUFFER | + BRW_NEW_BLEND | + BRW_NEW_TEXTURE), + .cache = 0 + }, + .update = upload_wm_surfaces, +}; diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile new file mode 100644 index 0000000000..120bdfd9dd --- /dev/null +++ b/src/gallium/drivers/softpipe/Makefile @@ -0,0 +1,47 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = softpipe + +C_SOURCES = \ + sp_fs_exec.c \ + sp_fs_sse.c \ + sp_fs_llvm.c \ + sp_clear.c \ + sp_flush.c \ + sp_query.c \ + sp_context.c \ + sp_draw_arrays.c \ + sp_prim_setup.c \ + sp_prim_vbuf.c \ + sp_quad.c \ + sp_quad_alpha_test.c \ + sp_quad_blend.c \ + sp_quad_colormask.c \ + sp_quad_coverage.c \ + sp_quad_depth_test.c \ + sp_quad_earlyz.c \ + sp_quad_fs.c \ + sp_quad_occlusion.c \ + sp_quad_output.c \ + sp_quad_stencil.c \ + sp_quad_stipple.c \ + sp_screen.c \ + sp_setup.c \ + sp_state_blend.c \ + sp_state_clip.c \ + sp_state_derived.c \ + sp_state_fs.c \ + sp_state_sampler.c \ + sp_state_rasterizer.c \ + sp_state_surface.c \ + sp_state_vertex.c \ + sp_texture.c \ + sp_tex_sample.c \ + sp_tile_cache.c \ + sp_surface.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript new file mode 100644 index 0000000000..c1f7daa8ab --- /dev/null +++ b/src/gallium/drivers/softpipe/SConscript @@ -0,0 +1,46 @@ +Import('*') + +env = env.Clone() + +softpipe = env.ConvenienceLibrary( + target = 'softpipe', + source = [ + 'sp_fs_exec.c', + 'sp_fs_sse.c', + 'sp_fs_llvm.c', + 'sp_clear.c', + 'sp_context.c', + 'sp_draw_arrays.c', + 'sp_flush.c', + 'sp_prim_setup.c', + 'sp_prim_vbuf.c', + 'sp_setup.c', + 'sp_quad_alpha_test.c', + 'sp_quad_blend.c', + 'sp_quad.c', + 'sp_quad_colormask.c', + 'sp_quad_coverage.c', + 'sp_quad_depth_test.c', + 'sp_quad_earlyz.c', + 'sp_quad_fs.c', + 'sp_quad_occlusion.c', + 'sp_quad_output.c', + 'sp_quad_stencil.c', + 'sp_quad_stipple.c', + 'sp_query.c', + 'sp_screen.c', + 'sp_state_blend.c', + 'sp_state_clip.c', + 'sp_state_derived.c', + 'sp_state_fs.c', + 'sp_state_rasterizer.c', + 'sp_state_sampler.c', + 'sp_state_surface.c', + 'sp_state_vertex.c', + 'sp_surface.c', + 'sp_tex_sample.c', + 'sp_texture.c', + 'sp_tile_cache.c', + ]) + +Export('softpipe')
\ No newline at end of file diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c new file mode 100644 index 0000000000..dfa46c9fb7 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "util/u_pack_color.h" +#include "sp_clear.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_state.h" +#include "sp_tile_cache.h" + + +/** + * Convert packed pixel from one format to another. + */ +static unsigned +convert_color(enum pipe_format srcFormat, unsigned srcColor, + enum pipe_format dstFormat) +{ + ubyte r, g, b, a; + unsigned dstColor; + + util_unpack_color_ub(srcFormat, &srcColor, &r, &g, &b, &a); + util_pack_color_ub(r, g, b, a, dstFormat, &dstColor); + + return dstColor; +} + + + +/** + * Clear the given surface to the specified value. + * No masking, no scissor (clear entire buffer). + * Note: when clearing a color buffer, the clearValue is always + * encoded as PIPE_FORMAT_A8R8G8B8_UNORM. + */ +void +softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + + if (softpipe->no_rast) + return; + +#if 0 + softpipe_update_derived(softpipe); /* not needed?? */ +#endif + + if (ps == sp_tile_cache_get_surface(softpipe->zsbuf_cache)) { + sp_tile_cache_clear(softpipe->zsbuf_cache, clearValue); + softpipe->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_CLEAR; +#if TILE_CLEAR_OPTIMIZATION + return; +#endif + } + + for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { + if (ps == sp_tile_cache_get_surface(softpipe->cbuf_cache[i])) { + unsigned cv; + if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) { + cv = convert_color(PIPE_FORMAT_A8R8G8B8_UNORM, clearValue, + ps->format); + } + else { + cv = clearValue; + } + sp_tile_cache_clear(softpipe->cbuf_cache[i], cv); + softpipe->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_CLEAR; + } + } + +#if !TILE_CLEAR_OPTIMIZATION + /* non-cached surface */ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +#endif +} diff --git a/src/gallium/drivers/softpipe/sp_clear.h b/src/gallium/drivers/softpipe/sp_clear.h new file mode 100644 index 0000000000..a8ed1c4ecc --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_clear.h @@ -0,0 +1,43 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + */ + +#ifndef SP_CLEAR_H +#define SP_CLEAR_H + +#include "pipe/p_state.h" +struct pipe_context; + +extern void +softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + + +#endif /* SP_CLEAR_H */ diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c new file mode 100644 index 0000000000..cd1e6663d8 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -0,0 +1,264 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "sp_clear.h" +#include "sp_context.h" +#include "sp_flush.h" +#include "sp_prim_setup.h" +#include "sp_prim_vbuf.h" +#include "sp_state.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_texture.h" +#include "sp_winsys.h" +#include "sp_query.h" + + + +/** + * Map any drawing surfaces which aren't already mapped + */ +void +softpipe_map_surfaces(struct softpipe_context *sp) +{ + unsigned i; + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + sp_tile_cache_map_surfaces(sp->cbuf_cache[i]); + } + + sp_tile_cache_map_surfaces(sp->zsbuf_cache); +} + + +/** + * Unmap any mapped drawing surfaces + */ +void +softpipe_unmap_surfaces(struct softpipe_context *sp) +{ + uint i; + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) + sp_flush_tile_cache(sp, sp->cbuf_cache[i]); + sp_flush_tile_cache(sp, sp->zsbuf_cache); + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]); + } + sp_tile_cache_unmap_surfaces(sp->zsbuf_cache); +} + + +static void softpipe_destroy( struct pipe_context *pipe ) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct pipe_winsys *ws = pipe->winsys; + uint i; + + if (softpipe->draw) + draw_destroy( softpipe->draw ); + + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + softpipe->quad[i].polygon_stipple->destroy( softpipe->quad[i].polygon_stipple ); + softpipe->quad[i].earlyz->destroy( softpipe->quad[i].earlyz ); + softpipe->quad[i].shade->destroy( softpipe->quad[i].shade ); + softpipe->quad[i].alpha_test->destroy( softpipe->quad[i].alpha_test ); + softpipe->quad[i].depth_test->destroy( softpipe->quad[i].depth_test ); + softpipe->quad[i].stencil_test->destroy( softpipe->quad[i].stencil_test ); + softpipe->quad[i].occlusion->destroy( softpipe->quad[i].occlusion ); + softpipe->quad[i].coverage->destroy( softpipe->quad[i].coverage ); + softpipe->quad[i].blend->destroy( softpipe->quad[i].blend ); + softpipe->quad[i].colormask->destroy( softpipe->quad[i].colormask ); + softpipe->quad[i].output->destroy( softpipe->quad[i].output ); + } + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + sp_destroy_tile_cache(softpipe->cbuf_cache[i]); + sp_destroy_tile_cache(softpipe->zsbuf_cache); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + sp_destroy_tile_cache(softpipe->tex_cache[i]); + + for (i = 0; i < Elements(softpipe->constants); i++) { + if (softpipe->constants[i].buffer) { + winsys_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); + } + } + + FREE( softpipe ); +} + + +struct pipe_context * +softpipe_create( struct pipe_screen *screen, + struct pipe_winsys *pipe_winsys, + void *unused ) +{ + struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); + uint i; + + util_init_math(); + +#ifdef PIPE_ARCH_X86 + softpipe->use_sse = !debug_get_bool_option( "GALLIUM_NOSSE", FALSE ); +#else + softpipe->use_sse = FALSE; +#endif + + softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); + + softpipe->pipe.winsys = pipe_winsys; + softpipe->pipe.screen = screen; + softpipe->pipe.destroy = softpipe_destroy; + + /* state setters */ + softpipe->pipe.create_blend_state = softpipe_create_blend_state; + softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; + softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; + + softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; + softpipe->pipe.bind_sampler_states = softpipe_bind_sampler_states; + softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; + + softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; + softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; + softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; + + softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; + softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state; + softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; + + softpipe->pipe.create_fs_state = softpipe_create_fs_state; + softpipe->pipe.bind_fs_state = softpipe_bind_fs_state; + softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; + + softpipe->pipe.create_vs_state = softpipe_create_vs_state; + softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; + softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; + + softpipe->pipe.set_blend_color = softpipe_set_blend_color; + softpipe->pipe.set_clip_state = softpipe_set_clip_state; + softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; + softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; + softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; + softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; + softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures; + softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; + + softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; + softpipe->pipe.set_vertex_elements = softpipe_set_vertex_elements; + + softpipe->pipe.draw_arrays = softpipe_draw_arrays; + softpipe->pipe.draw_elements = softpipe_draw_elements; + softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; + softpipe->pipe.set_edgeflags = softpipe_set_edgeflags; + + + softpipe->pipe.clear = softpipe_clear; + softpipe->pipe.flush = softpipe_flush; + + softpipe_init_query_funcs( softpipe ); + softpipe_init_texture_funcs( softpipe ); + + /* + * Alloc caches for accessing drawing surfaces and textures. + * Must be before quad stage setup! + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + softpipe->cbuf_cache[i] = sp_create_tile_cache( screen ); + softpipe->zsbuf_cache = sp_create_tile_cache( screen ); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + softpipe->tex_cache[i] = sp_create_tile_cache( screen ); + + + /* setup quad rendering stages */ + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + softpipe->quad[i].polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); + softpipe->quad[i].earlyz = sp_quad_earlyz_stage(softpipe); + softpipe->quad[i].shade = sp_quad_shade_stage(softpipe); + softpipe->quad[i].alpha_test = sp_quad_alpha_test_stage(softpipe); + softpipe->quad[i].depth_test = sp_quad_depth_test_stage(softpipe); + softpipe->quad[i].stencil_test = sp_quad_stencil_test_stage(softpipe); + softpipe->quad[i].occlusion = sp_quad_occlusion_stage(softpipe); + softpipe->quad[i].coverage = sp_quad_coverage_stage(softpipe); + softpipe->quad[i].blend = sp_quad_blend_stage(softpipe); + softpipe->quad[i].colormask = sp_quad_colormask_stage(softpipe); + softpipe->quad[i].output = sp_quad_output_stage(softpipe); + } + + /* + * Create drawing context and plug our rendering stage into it. + */ + softpipe->draw = draw_create(); + if (!softpipe->draw) + goto fail; + + softpipe->setup = sp_draw_render_stage(softpipe); + if (!softpipe->setup) + goto fail; + + if (debug_get_bool_option( "SP_NO_RAST", FALSE )) + softpipe->no_rast = TRUE; + + if (debug_get_bool_option( "SP_NO_VBUF", FALSE )) { + /* Deprecated path -- vbuf is the intended interface to the draw module: + */ + draw_set_rasterize_stage(softpipe->draw, softpipe->setup); + } + else { + sp_init_vbuf(softpipe); + } + + /* plug in AA line/point stages */ + draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); + draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); + +#if USE_DRAW_STAGE_PSTIPPLE + /* Do polygon stipple w/ texture map + frag prog? */ + draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); +#endif + + sp_init_surface_functions(softpipe); + + return &softpipe->pipe; + + fail: + softpipe_destroy(&softpipe->pipe); + return NULL; +} + diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h new file mode 100644 index 0000000000..2b9a2a8ee5 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -0,0 +1,165 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_CONTEXT_H +#define SP_CONTEXT_H + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" + +#include "draw/draw_vertex.h" + +#include "sp_quad.h" + + +/** + * This is a temporary variable for testing draw-stage polygon stipple. + * If zero, do stipple in sp_quad_stipple.c + */ +#define USE_DRAW_STAGE_PSTIPPLE 1 + +/* Number of threads working on individual quads. + * Setting to 1 disables this feature. + */ +#define SP_NUM_QUAD_THREADS 1 + +struct softpipe_winsys; +struct softpipe_vbuf_render; +struct draw_context; +struct draw_stage; +struct softpipe_tile_cache; +struct sp_fragment_shader; +struct sp_vertex_shader; + + +struct softpipe_context { + struct pipe_context pipe; /**< base class */ + + /* The most recent drawing state as set by the driver: + */ + const struct pipe_blend_state *blend; + const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct pipe_depth_stencil_alpha_state *depth_stencil; + const struct pipe_rasterizer_state *rasterizer; + const struct sp_fragment_shader *fs; + const struct sp_vertex_shader *vs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned dirty; + + unsigned num_samplers; + unsigned num_textures; + unsigned num_vertex_elements; + unsigned num_vertex_buffers; + + boolean no_rast; + + /* Counter for occlusion queries. Note this supports overlapping + * queries. + */ + uint64 occlusion_count; + + /* + * Mapped vertex buffers + */ + ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; + + /** Mapped constant buffers */ + void *mapped_constants[PIPE_SHADER_TYPES]; + + /** Vertex format */ + struct vertex_info vertex_info; + struct vertex_info vertex_info_vbuf; + + int psize_slot; + + unsigned reduced_api_prim; /**< PIPE_PRIM_POINTS, _LINES or _TRIANGLES */ + +#if 0 + /* Stipple derived state: + */ + ubyte stipple_masks[16][16]; +#endif + + /** Derived from scissor and surface bounds: */ + struct pipe_scissor_state cliprect; + + unsigned line_stipple_counter; + + /** Software quad rendering pipeline */ + struct { + struct quad_stage *polygon_stipple; + struct quad_stage *earlyz; + struct quad_stage *shade; + struct quad_stage *alpha_test; + struct quad_stage *stencil_test; + struct quad_stage *depth_test; + struct quad_stage *occlusion; + struct quad_stage *coverage; + struct quad_stage *blend; + struct quad_stage *colormask; + struct quad_stage *output; + + struct quad_stage *first; /**< points to one of the above stages */ + } quad[SP_NUM_QUAD_THREADS]; + + /** The primitive drawing context */ + struct draw_context *draw; + struct draw_stage *setup; + struct draw_stage *vbuf; + struct softpipe_vbuf_render *vbuf_render; + + struct softpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS]; + struct softpipe_tile_cache *zsbuf_cache; + + struct softpipe_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; + + int use_sse : 1; + int dump_fs : 1; +}; + + +static INLINE struct softpipe_context * +softpipe_context( struct pipe_context *pipe ) +{ + return (struct softpipe_context *)pipe; +} + +#endif /* SP_CONTEXT_H */ + diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c new file mode 100644 index 0000000000..424bd56846 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -0,0 +1,202 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Brian Paul + * Keith Whitwell + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" + +#include "sp_context.h" +#include "sp_state.h" + +#include "draw/draw_context.h" + + + +static void +softpipe_map_constant_buffers(struct softpipe_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + for (i = 0; i < PIPE_SHADER_TYPES; i++) { + if (sp->constants[i].size) + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + } + + draw_set_mapped_constant_buffer(sp->draw, + sp->mapped_constants[PIPE_SHADER_VERTEX], + sp->constants[PIPE_SHADER_VERTEX].size); +} + +static void +softpipe_unmap_constant_buffers(struct softpipe_context *sp) +{ + struct pipe_winsys *ws = sp->pipe.winsys; + uint i; + + /* really need to flush all prims since the vert/frag shaders const buffers + * are going away now. + */ + draw_flush(sp->draw); + + draw_set_mapped_constant_buffer(sp->draw, NULL, 0); + + for (i = 0; i < 2; i++) { + if (sp->constants[i].size) + ws->buffer_unmap(ws, sp->constants[i].buffer); + sp->mapped_constants[i] = NULL; + } +} + + +static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; + + +boolean +softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count) +{ + return softpipe_draw_elements(pipe, NULL, 0, mode, start, count); +} + + + +/** + * Draw vertex arrays, with optional indexing. + * Basically, map the vertex buffers (and drawing surfaces), then hand off + * the drawing to the 'draw' module. + * + * XXX should the element buffer be specified/bound with a separate function? + */ + +boolean +softpipe_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) +{ + struct softpipe_context *sp = softpipe_context(pipe); + struct draw_context *draw = sp->draw; + unsigned i; + + sp->reduced_api_prim = reduced_prim[mode]; + + if (sp->dirty) + softpipe_update_derived( sp ); + + softpipe_map_surfaces(sp); + softpipe_map_constant_buffers(sp); + + /* + * Map vertex buffers + */ + for (i = 0; i < sp->num_vertex_buffers; i++) { + void *buf + = pipe_buffer_map(pipe->screen, + sp->vertex_buffer[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(draw, indexSize, + min_index, + max_index, + mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); + } + + + /* draw! */ + draw_arrays(draw, mode, start, count); + + /* + * unmap vertex/index buffers - will cause draw module to flush + */ + for (i = 0; i < sp->num_vertex_buffers; i++) { + draw_set_mapped_vertex_buffer(draw, i, NULL); + pipe_buffer_unmap(pipe->screen, sp->vertex_buffer[i].buffer); + } + if (indexBuffer) { + draw_set_mapped_element_buffer(draw, 0, NULL); + pipe_buffer_unmap(pipe->screen, indexBuffer); + } + + + /* Note: leave drawing surfaces mapped */ + softpipe_unmap_constant_buffers(sp); + + return TRUE; +} + +boolean +softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + return softpipe_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); +} + + + +void +softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) +{ + struct softpipe_context *sp = softpipe_context(pipe); + draw_set_edgeflags(sp->draw, edgeflags); +} + diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c new file mode 100644 index 0000000000..401764bb43 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -0,0 +1,92 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "pipe/p_defines.h" +#include "draw/draw_context.h" +#include "sp_flush.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_state.h" +#include "sp_tile_cache.h" +#include "sp_winsys.h" + + +void +softpipe_flush( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + + draw_flush(softpipe->draw); + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + for (i = 0; i < softpipe->num_textures; i++) { + sp_flush_tile_cache(softpipe, softpipe->tex_cache[i]); + } + } + + if (flags & PIPE_FLUSH_RENDER_CACHE) { + for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) + if (softpipe->cbuf_cache[i]) + sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]); + + if (softpipe->zsbuf_cache) + sp_flush_tile_cache(softpipe, softpipe->zsbuf_cache); + + /* Need this call for hardware buffers before swapbuffers. + * + * there should probably be another/different flush-type function + * that's called before swapbuffers because we don't always want + * to unmap surfaces when flushing. + */ + softpipe_unmap_surfaces(softpipe); + } + + /* Enable to dump BMPs of the color/depth buffers each frame */ +#if 0 + if(flags & PIPE_FLUSH_FRAME) { + static unsigned frame_no = 1; + static char filename[256]; + util_snprintf(filename, sizeof(filename), "cbuf_%u.bmp", frame_no); + debug_dump_surface_bmp(filename, softpipe->framebuffer.cbufs[0]); + util_snprintf(filename, sizeof(filename), "zsbuf_%u.bmp", frame_no); + debug_dump_surface_bmp(filename, softpipe->framebuffer.zsbuf); + ++frame_no; + } +#endif + + if (fence) + *fence = NULL; +} + diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h new file mode 100644 index 0000000000..68d9b5fa83 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_flush.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_FLUSH_H +#define SP_FLUSH_H + +struct pipe_context; +struct pipe_fence_handle; + +void softpipe_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence); + +#endif diff --git a/src/gallium/drivers/softpipe/sp_fs.h b/src/gallium/drivers/softpipe/sp_fs.h new file mode 100644 index 0000000000..4792ace3a3 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_fs.h @@ -0,0 +1,54 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_FS_H +#define SP_FS_H + +struct sp_fragment_shader * +softpipe_create_fs_exec(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ); + +struct sp_fragment_shader * +softpipe_create_fs_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ); + +struct sp_fragment_shader * +softpipe_create_fs_llvm(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ); + +struct tgsi_interp_coef; +struct tgsi_exec_vector; + +void sp_setup_pos_vector(const struct tgsi_interp_coef *coef, + float x, float y, + struct tgsi_exec_vector *quadpos); + + +#endif diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c new file mode 100644 index 0000000000..f472dd0ed2 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -0,0 +1,164 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_fs.h" +#include "sp_headers.h" + + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_parse.h" + +struct sp_exec_fragment_shader +{ + struct sp_fragment_shader base; +}; + + +/** cast wrapper */ +static INLINE struct sp_exec_fragment_shader * +sp_exec_fragment_shader(const struct sp_fragment_shader *base) +{ + return (struct sp_exec_fragment_shader *) base; +} + + +/** + * Compute quad X,Y,Z,W for the four fragments in a quad. + * + * This should really be part of the compiled shader. + */ +void +sp_setup_pos_vector(const struct tgsi_interp_coef *coef, + float x, float y, + struct tgsi_exec_vector *quadpos) +{ + uint chan; + /* do X */ + quadpos->xyzw[0].f[0] = x; + quadpos->xyzw[0].f[1] = x + 1; + quadpos->xyzw[0].f[2] = x; + quadpos->xyzw[0].f[3] = x + 1; + + /* do Y */ + quadpos->xyzw[1].f[0] = y; + quadpos->xyzw[1].f[1] = y; + quadpos->xyzw[1].f[2] = y + 1; + quadpos->xyzw[1].f[3] = y + 1; + + /* do Z and W for all fragments in the quad */ + for (chan = 2; chan < 4; chan++) { + const float dadx = coef->dadx[chan]; + const float dady = coef->dady[chan]; + const float a0 = coef->a0[chan] + dadx * x + dady * y; + quadpos->xyzw[chan].f[0] = a0; + quadpos->xyzw[chan].f[1] = a0 + dadx; + quadpos->xyzw[chan].f[2] = a0 + dady; + quadpos->xyzw[chan].f[3] = a0 + dadx + dady; + } +} + + +static void +exec_prepare( const struct sp_fragment_shader *base, + struct tgsi_exec_machine *machine, + struct tgsi_sampler *samplers ) +{ + /* + * Bind tokens/shader to the interpreter's machine state. + * Avoid redundant binding. + */ + if (machine->Tokens != base->shader.tokens) { + tgsi_exec_machine_bind_shader( machine, + base->shader.tokens, + PIPE_MAX_SAMPLERS, + samplers ); + } +} + + + + +/* TODO: hide the machine struct in here somewhere, remove from this + * interface: + */ +static unsigned +exec_run( const struct sp_fragment_shader *base, + struct tgsi_exec_machine *machine, + struct quad_header *quad ) +{ + + /* Compute X, Y, Z, W vals for this quad */ + sp_setup_pos_vector(quad->posCoef, + (float)quad->input.x0, (float)quad->input.y0, + &machine->QuadPos); + + return tgsi_exec_machine_run( machine ); +} + + + +static void +exec_delete( struct sp_fragment_shader *base ) +{ + FREE((void *) base->shader.tokens); + FREE(base); +} + + + + + +struct sp_fragment_shader * +softpipe_create_fs_exec(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) +{ + struct sp_exec_fragment_shader *shader; + + /* Decide whether we'll be codegenerating this shader and if so do + * that now. + */ + + shader = CALLOC_STRUCT(sp_exec_fragment_shader); + if (!shader) + return NULL; + + /* we need to keep a local copy of the tokens */ + shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens); + shader->base.prepare = exec_prepare; + shader->base.run = exec_run; + shader->base.delete = exec_delete; + + return &shader->base; +} + diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c new file mode 100644 index 0000000000..34adac5226 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -0,0 +1,200 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Zack Rusin + */ + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_fs.h" + + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "tgsi/tgsi_sse2.h" + +#if 0 + +struct sp_llvm_fragment_shader { + struct sp_fragment_shader base; + struct gallivm_prog *llvm_prog; +}; + +static void +shade_quad_llvm(struct quad_stage *qs, + struct quad_header *quad) +{ + struct quad_shade_stage *qss = quad_shade_stage(qs); + struct softpipe_context *softpipe = qs->softpipe; + float dests[4][16][4] ALIGN16_ATTRIB; + float inputs[4][16][4] ALIGN16_ATTRIB; + const float fx = (float) quad->x0; + const float fy = (float) quad->y0; + struct gallivm_prog *llvm = qss->llvm_prog; + + inputs[0][0][0] = fx; + inputs[1][0][0] = fx + 1.0f; + inputs[2][0][0] = fx; + inputs[3][0][0] = fx + 1.0f; + + inputs[0][0][1] = fy; + inputs[1][0][1] = fy; + inputs[2][0][1] = fy + 1.0f; + inputs[3][0][1] = fy + 1.0f; + + + gallivm_prog_inputs_interpolate(llvm, inputs, quad->coef); + +#if DLLVM + debug_printf("MASK = %d\n", quad->mask); + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 2; ++j) { + debug_printf("IN(%d,%d) [%f %f %f %f]\n", i, j, + inputs[i][j][0], inputs[i][j][1], inputs[i][j][2], inputs[i][j][3]); + } + } +#endif + + quad->mask &= + gallivm_fragment_shader_exec(llvm, fx, fy, dests, inputs, + softpipe->mapped_constants[PIPE_SHADER_FRAGMENT], + qss->samplers); +#if DLLVM + debug_printf("OUT LLVM = 1[%f %f %f %f], 2[%f %f %f %f]\n", + dests[0][0][0], dests[0][0][1], dests[0][0][2], dests[0][0][3], + dests[0][1][0], dests[0][1][1], dests[0][1][2], dests[0][1][3]); +#endif + + /* store result color */ + if (qss->colorOutSlot >= 0) { + unsigned i; + /* XXX need to handle multiple color outputs someday */ + allvmrt(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] + == TGSI_SEMANTIC_COLOR); + for (i = 0; i < QUAD_SIZE; ++i) { + quad->outputs.color[0][0][i] = dests[i][qss->colorOutSlot][0]; + quad->outputs.color[0][1][i] = dests[i][qss->colorOutSlot][1]; + quad->outputs.color[0][2][i] = dests[i][qss->colorOutSlot][2]; + quad->outputs.color[0][3][i] = dests[i][qss->colorOutSlot][3]; + } + } +#if DLLVM + for (int i = 0; i < QUAD_SIZE; ++i) { + debug_printf("QLLVM%d(%d) [%f, %f, %f, %f]\n", i, qss->colorOutSlot, + quad->outputs.color[0][0][i], + quad->outputs.color[0][1][i], + quad->outputs.color[0][2][i], + quad->outputs.color[0][3][i]); + } +#endif + + /* store result Z */ + if (qss->depthOutSlot >= 0) { + /* output[slot] is new Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = dests[i][0][2]; + } + } + else { + /* copy input Z (which was interpolated by the executor) to output Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = inputs[i][0][2]; + } + } +#if DLLVM + debug_printf("D [%f, %f, %f, %f] mask = %d\n", + quad->outputs.depth[0], + quad->outputs.depth[1], + quad->outputs.depth[2], + quad->outputs.depth[3], quad->mask); +#endif + + /* shader may cull fragments */ + if( quad->mask ) { + qs->next->run( qs->next, quad ); + } +} + + +unsigned +run_llvm_fs( const struct sp_fragment_shader *base, + struct foo *machine ) +{ +} + + +void +delete_llvm_fs( struct sp_fragment_shader *base ) +{ + FREE(base); +} + + +struct sp_fragment_shader * +softpipe_create_fs_llvm(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) +{ + struct sp_llvm_fragment_shader *shader = NULL; + + /* LLVM fragment shaders currently disabled: + */ + state = CALLOC_STRUCT(sp_llvm_shader_state); + if (!state) + return NULL; + + state->llvm_prog = 0; + + if (!gallivm_global_cpu_engine()) { + gallivm_cpu_engine_create(state->llvm_prog); + } + else + gallivm_cpu_jit_compile(gallivm_global_cpu_engine(), state->llvm_prog); + + if (shader) { + shader->base.run = run_llvm_fs; + shader->base.delete = delete_llvm_fs; + } + + return shader; +} + + +#else + +struct sp_fragment_shader * +softpipe_create_fs_llvm(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) +{ + return NULL; +} + +#endif diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c new file mode 100644 index 0000000000..0111469405 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -0,0 +1,168 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_fs.h" +#include "sp_headers.h" + + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_sse2.h" + + +#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE) + +#include "rtasm/rtasm_x86sse.h" + +/* Surely this should be defined somewhere in a tgsi header: + */ +typedef void (PIPE_CDECL *codegen_function)( + const struct tgsi_exec_vector *input, + struct tgsi_exec_vector *output, + const float (*constant)[4], + struct tgsi_exec_vector *temporary, + const struct tgsi_interp_coef *coef, + float (*immediates)[4] + //, const struct tgsi_exec_vector *quadPos + ); + + +struct sp_sse_fragment_shader { + struct sp_fragment_shader base; + struct x86_function sse2_program; + codegen_function func; + float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; +}; + + + +static void +fs_sse_prepare( const struct sp_fragment_shader *base, + struct tgsi_exec_machine *machine, + struct tgsi_sampler *samplers ) +{ +} + + +/* TODO: codegenerate the whole run function, skip this wrapper. + * TODO: break dependency on tgsi_exec_machine struct + * TODO: push Position calculation into the generated shader + * TODO: process >1 quad at a time + */ +static unsigned +fs_sse_run( const struct sp_fragment_shader *base, + struct tgsi_exec_machine *machine, + struct quad_header *quad ) +{ + struct sp_sse_fragment_shader *shader = (struct sp_sse_fragment_shader *) base; + + /* Compute X, Y, Z, W vals for this quad -- place in temp[0] for now */ + sp_setup_pos_vector(quad->posCoef, + (float)quad->input.x0, (float)quad->input.y0, + machine->Temps); + + /* init kill mask */ + machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] = 0x0; + + shader->func( machine->Inputs, + machine->Outputs, + machine->Consts, + machine->Temps, + machine->InterpCoefs, + shader->immediates + // , &machine->QuadPos + ); + + return ~(machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0]); +} + + +static void +fs_sse_delete( struct sp_fragment_shader *base ) +{ + struct sp_sse_fragment_shader *shader = (struct sp_sse_fragment_shader *) base; + + x86_release_func( &shader->sse2_program ); + FREE(shader); +} + + +struct sp_fragment_shader * +softpipe_create_fs_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) +{ + struct sp_sse_fragment_shader *shader; + + if (!softpipe->use_sse) + return NULL; + + shader = CALLOC_STRUCT(sp_sse_fragment_shader); + if (!shader) + return NULL; + + x86_init_func( &shader->sse2_program ); + + if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program, + shader->immediates, FALSE )) { + FREE(shader); + return NULL; + } + + shader->func = (codegen_function) x86_get_func( &shader->sse2_program ); + if (!shader->func) { + x86_release_func( &shader->sse2_program ); + FREE(shader); + return NULL; + } + + shader->base.shader = *templ; + shader->base.prepare = fs_sse_prepare; + shader->base.run = fs_sse_run; + shader->base.delete = fs_sse_delete; + + return &shader->base; +} + + +#else + +/* Maybe put this varient in the header file. + */ +struct sp_fragment_shader * +softpipe_create_fs_sse(struct softpipe_context *softpipe, + const struct pipe_shader_state *templ) +{ + return NULL; +} + +#endif diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h new file mode 100644 index 0000000000..4a42cb3c19 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_HEADERS_H +#define SP_HEADERS_H + +#include "pipe/p_state.h" +#include "tgsi/tgsi_exec.h" + +#define PRIM_POINT 1 +#define PRIM_LINE 2 +#define PRIM_TRI 3 + + +/* The rasterizer generates 2x2 quads of fragment and feeds them to + * the current fp_machine (see below). + * Remember that Y=0=top with Y increasing down the window. + */ +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 + +#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) +#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) +#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) +#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) +#define MASK_ALL 0xf + + +/** + * Encodes everything we need to know about a 2x2 pixel block. Uses + * "Channel-Serial" or "SoA" layout. + */ +struct quad_header_input +{ + int x0; + int y0; + float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */ + unsigned facing:1; /**< Front (0) or back (1) facing? */ + unsigned prim:2; /**< PRIM_POINT, LINE, TRI */ +}; + +struct quad_header_inout +{ + unsigned mask:4; +}; + +struct quad_header_output +{ + /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ + float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; + float depth[QUAD_SIZE]; +}; + +struct quad_header { + struct quad_header_input input; + struct quad_header_inout inout; + struct quad_header_output output; + + const struct tgsi_interp_coef *coef; + const struct tgsi_interp_coef *posCoef; + + unsigned nr_attrs; +}; + +#endif /* SP_HEADERS_H */ + diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c new file mode 100644 index 0000000000..038ff04d4f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -0,0 +1,190 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief A draw stage that drives our triangle setup routines from + * within the draw pipeline. One of two ways to drive setup, the + * other being in sp_prim_vbuf.c. + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * \author Brian Paul + */ + + +#include "sp_context.h" +#include "sp_setup.h" +#include "sp_state.h" +#include "sp_prim_setup.h" +#include "draw/draw_pipe.h" +#include "draw/draw_vertex.h" +#include "util/u_memory.h" + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct setup_context *setup; +}; + + + +/** + * Basically a cast wrapper. + */ +static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +{ + return (struct setup_stage *)stage; +} + + +typedef const float (*cptrf4)[4]; + +static void +do_tri(struct draw_stage *stage, struct prim_header *prim) +{ + struct setup_stage *setup = setup_stage( stage ); + + setup_tri( setup->setup, + (cptrf4)prim->v[0]->data, + (cptrf4)prim->v[1]->data, + (cptrf4)prim->v[2]->data ); +} + +static void +do_line(struct draw_stage *stage, struct prim_header *prim) +{ + struct setup_stage *setup = setup_stage( stage ); + + setup_line( setup->setup, + (cptrf4)prim->v[0]->data, + (cptrf4)prim->v[1]->data ); +} + +static void +do_point(struct draw_stage *stage, struct prim_header *prim) +{ + struct setup_stage *setup = setup_stage( stage ); + + setup_point( setup->setup, + (cptrf4)prim->v[0]->data ); +} + + + + +static void setup_begin( struct draw_stage *stage ) +{ + struct setup_stage *setup = setup_stage(stage); + + setup_prepare( setup->setup ); + + stage->point = do_point; + stage->line = do_line; + stage->tri = do_tri; +} + + +static void setup_first_point( struct draw_stage *stage, + struct prim_header *header ) +{ + setup_begin(stage); + stage->point( stage, header ); +} + +static void setup_first_line( struct draw_stage *stage, + struct prim_header *header ) +{ + setup_begin(stage); + stage->line( stage, header ); +} + + +static void setup_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + setup_begin(stage); + stage->tri( stage, header ); +} + + + +static void setup_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->point = setup_first_point; + stage->line = setup_first_line; + stage->tri = setup_first_tri; +} + + +static void reset_stipple_counter( struct draw_stage *stage ) +{ +} + + +static void render_destroy( struct draw_stage *stage ) +{ + struct setup_stage *ssetup = setup_stage(stage); + setup_destroy_context(ssetup->setup); + FREE( stage ); +} + + +/** + * Create a new primitive setup/render stage. + */ +struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe ) +{ + struct setup_stage *sstage = CALLOC_STRUCT(setup_stage); + + sstage->setup = setup_create_context(softpipe); + sstage->stage.draw = softpipe->draw; + sstage->stage.point = setup_first_point; + sstage->stage.line = setup_first_line; + sstage->stage.tri = setup_first_tri; + sstage->stage.flush = setup_flush; + sstage->stage.reset_stipple_counter = reset_stipple_counter; + sstage->stage.destroy = render_destroy; + + return (struct draw_stage *)sstage; +} + +struct setup_context * +sp_draw_setup_context( struct draw_stage *stage ) +{ + struct setup_stage *ssetup = setup_stage(stage); + return ssetup->setup; +} + +void +sp_draw_flush( struct draw_stage *stage ) +{ + stage->flush( stage, 0 ); +} diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.h b/src/gallium/drivers/softpipe/sp_prim_setup.h new file mode 100644 index 0000000000..49bdd98ed8 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_setup.h @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SP_PRIM_SETUP_H +#define SP_PRIM_SETUP_H + + +/** + * vbuf is a special stage to gather the stream of triangles, lines, points + * together and reconstruct vertex buffers for hardware upload. + * + * First attempt, work in progress. + * + * TODO: + * - separate out vertex buffer building and primitive emit, ie >1 draw per vb. + * - tell vbuf stage how to build hw vertices directly + * - pass vbuf stage a buffer pointer for direct emit to agp/vram. + * + * + * + * Vertices are just an array of floats, with all the attributes + * packed. We currently assume a layout like: + * + * attr[0][0..3] - window position + * attr[1..n][0..3] - remaining attributes. + * + * Attributes are assumed to be 4 floats wide but are packed so that + * all the enabled attributes run contiguously. + */ + + +struct draw_stage; +struct softpipe_context; + + +typedef void (*vbuf_draw_func)( struct pipe_context *pipe, + unsigned prim, + const ushort *elements, + unsigned nr_elements, + const void *vertex_buffer, + unsigned nr_vertices ); + + +extern struct draw_stage * +sp_draw_render_stage( struct softpipe_context *softpipe ); + +extern struct setup_context * +sp_draw_setup_context( struct draw_stage * ); + +extern void +sp_draw_flush( struct draw_stage * ); + + +extern struct draw_stage * +sp_draw_vbuf_stage( struct draw_context *draw_context, + struct pipe_context *pipe, + vbuf_draw_func draw ); + + +#endif /* SP_PRIM_SETUP_H */ diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c new file mode 100644 index 0000000000..425e13cd28 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -0,0 +1,405 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Post-transform vertex buffering. This is an optional part of the + * softpipe rendering pipeline. + * Probably not desired in general, but useful for testing/debuggin. + * Enabled/Disabled with SP_VBUF env var. + * + * Authors + * Brian Paul + */ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_prim_vbuf.h" +#include "sp_prim_setup.h" +#include "sp_setup.h" +#include "draw/draw_context.h" +#include "draw/draw_vbuf.h" +#include "util/u_memory.h" + + +#define SP_MAX_VBUF_INDEXES 1024 +#define SP_MAX_VBUF_SIZE 4096 + +typedef const float (*cptrf4)[4]; + +/** + * Subclass of vbuf_render. + */ +struct softpipe_vbuf_render +{ + struct vbuf_render base; + struct softpipe_context *softpipe; + uint prim; + uint vertex_size; + void *vertex_buffer; +}; + + +/** cast wrapper */ +static struct softpipe_vbuf_render * +softpipe_vbuf_render(struct vbuf_render *vbr) +{ + return (struct softpipe_vbuf_render *) vbr; +} + + +static const struct vertex_info * +sp_vbuf_get_vertex_info(struct vbuf_render *vbr) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + return softpipe_get_vbuf_vertex_info(cvbr->softpipe); +} + + +static void * +sp_vbuf_allocate_vertices(struct vbuf_render *vbr, + ushort vertex_size, ushort nr_vertices) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + assert(!cvbr->vertex_buffer); + cvbr->vertex_buffer = align_malloc(vertex_size * nr_vertices, 16); + cvbr->vertex_size = vertex_size; + return cvbr->vertex_buffer; +} + + +static void +sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, + unsigned vertex_size, unsigned vertices_used) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + align_free(vertices); + assert(vertices == cvbr->vertex_buffer); + cvbr->vertex_buffer = NULL; +} + + +static boolean +sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + + /* XXX: break this dependency - make setup_context live under + * softpipe, rename the old "setup" draw stage to something else. + */ + struct setup_context *setup_ctx = sp_draw_setup_context(cvbr->softpipe->setup); + + setup_prepare( setup_ctx ); + + + + cvbr->prim = prim; + return TRUE; + +} + + +static INLINE cptrf4 get_vert( const void *vertex_buffer, + int index, + int stride ) +{ + return (cptrf4)((char *)vertex_buffer + index * stride); +} + + +static void +sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + struct softpipe_context *softpipe = cvbr->softpipe; + unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); + unsigned i; + const void *vertex_buffer = cvbr->vertex_buffer; + + /* XXX: break this dependency - make setup_context live under + * softpipe, rename the old "setup" draw stage to something else. + */ + struct draw_stage *setup = softpipe->setup; + struct setup_context *setup_ctx = sp_draw_setup_context(softpipe->setup); + + + switch (cvbr->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + setup_point( setup_ctx, + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 1; i < nr; i += 2) { + setup_line( setup_ctx, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i ++) { + setup_line( setup_ctx, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup_line( setup_ctx, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride) ); + } + if (nr) { + setup_line( setup_ctx, + get_vert(vertex_buffer, indices[nr-1], stride), + get_vert(vertex_buffer, indices[0], stride) ); + } + break; + + + case PIPE_PRIM_TRIANGLES: + for (i = 2; i < nr; i += 3) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + for (i = 2; i < nr; i += 1) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i+(i&1)-2], stride), + get_vert(vertex_buffer, indices[i-(i&1)-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + for (i = 2; i < nr; i += 1) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[0], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + case PIPE_PRIM_QUADS: + for (i = 3; i < nr; i += 4) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = 3; i < nr; i += 2) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-2], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + + setup_tri( setup_ctx, + get_vert(vertex_buffer, indices[i-1], stride), + get_vert(vertex_buffer, indices[i-3], stride), + get_vert(vertex_buffer, indices[i-0], stride)); + } + break; + default: + assert(0); + } + + /* XXX: why are we calling this??? If we had to call something, it + * would be a function in sp_setup.c: + */ + sp_draw_flush( setup ); +} + + +/** + * This function is hit when the draw module is working in pass-through mode. + * It's up to us to convert the vertex array into point/line/tri prims. + */ +static void +sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + struct softpipe_context *softpipe = cvbr->softpipe; + struct draw_stage *setup = softpipe->setup; + const void *vertex_buffer = NULL; + const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); + unsigned i; + struct setup_context *setup_ctx = sp_draw_setup_context(setup); + + vertex_buffer = (void *)get_vert(cvbr->vertex_buffer, start, stride); + + switch (cvbr->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < nr; i++) { + setup_point( setup_ctx, + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 1; i < nr; i += 2) { + setup_line( setup_ctx, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < nr; i ++) { + setup_line( setup_ctx, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + break; + + case PIPE_PRIM_LINE_LOOP: + for (i = 1; i < nr; i ++) { + setup_line( setup_ctx, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride) ); + } + if (nr) { + setup_line( setup_ctx, + get_vert(vertex_buffer, nr-1, stride), + get_vert(vertex_buffer, 0, stride) ); + } + break; + + + case PIPE_PRIM_TRIANGLES: + for (i = 2; i < nr; i += 3) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride)); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + for (i = 2; i < nr; i += 1) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, i+(i&1)-2, stride), + get_vert(vertex_buffer, i-(i&1)-1, stride), + get_vert(vertex_buffer, i-0, stride)); + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + for (i = 2; i < nr; i += 1) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, 0, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride)); + } + break; + case PIPE_PRIM_QUADS: + for (i = 3; i < nr; i += 4) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride)); + + setup_tri( setup_ctx, + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-0, stride)); + } + break; + case PIPE_PRIM_QUAD_STRIP: + for (i = 3; i < nr; i += 2) { + setup_tri( setup_ctx, + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-2, stride), + get_vert(vertex_buffer, i-0, stride)); + + setup_tri( setup_ctx, + get_vert(vertex_buffer, i-1, stride), + get_vert(vertex_buffer, i-3, stride), + get_vert(vertex_buffer, i-0, stride)); + } + break; + default: + assert(0); + } +} + + + +static void +sp_vbuf_destroy(struct vbuf_render *vbr) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + cvbr->softpipe->vbuf_render = NULL; + FREE(cvbr); +} + + +/** + * Initialize the post-transform vertex buffer information for the given + * context. + */ +void +sp_init_vbuf(struct softpipe_context *sp) +{ + assert(sp->draw); + + sp->vbuf_render = CALLOC_STRUCT(softpipe_vbuf_render); + + sp->vbuf_render->base.max_indices = SP_MAX_VBUF_INDEXES; + sp->vbuf_render->base.max_vertex_buffer_bytes = SP_MAX_VBUF_SIZE; + + sp->vbuf_render->base.get_vertex_info = sp_vbuf_get_vertex_info; + sp->vbuf_render->base.allocate_vertices = sp_vbuf_allocate_vertices; + sp->vbuf_render->base.set_primitive = sp_vbuf_set_primitive; + sp->vbuf_render->base.draw = sp_vbuf_draw; + sp->vbuf_render->base.draw_arrays = sp_vbuf_draw_arrays; + sp->vbuf_render->base.release_vertices = sp_vbuf_release_vertices; + sp->vbuf_render->base.destroy = sp_vbuf_destroy; + + sp->vbuf_render->softpipe = sp; + + sp->vbuf = draw_vbuf_stage(sp->draw, &sp->vbuf_render->base); + + draw_set_rasterize_stage(sp->draw, sp->vbuf); + + draw_set_render(sp->draw, &sp->vbuf_render->base); +} diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.h b/src/gallium/drivers/softpipe/sp_prim_vbuf.h new file mode 100644 index 0000000000..1de9cc2a89 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_VBUF_H +#define SP_VBUF_H + + +struct softpipe_context; + +extern void +sp_init_vbuf(struct softpipe_context *softpipe); + + +#endif /* SP_VBUF_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c new file mode 100644 index 0000000000..892ef87ee9 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "sp_context.h" +#include "sp_state.h" +#include "pipe/p_shader_tokens.h" + +static void +sp_push_quad_first( + struct softpipe_context *sp, + struct quad_stage *quad, + uint i ) +{ + quad->next = sp->quad[i].first; + sp->quad[i].first = quad; +} + +static void +sp_build_depth_stencil( + struct softpipe_context *sp, + uint i ) +{ + if (sp->depth_stencil->stencil[0].enabled || + sp->depth_stencil->stencil[1].enabled) { + sp_push_quad_first( sp, sp->quad[i].stencil_test, i ); + } + else if (sp->depth_stencil->depth.enabled && + sp->framebuffer.zsbuf) { + sp_push_quad_first( sp, sp->quad[i].depth_test, i ); + } +} + +void +sp_build_quad_pipeline(struct softpipe_context *sp) +{ + uint i; + + boolean early_depth_test = + sp->depth_stencil->depth.enabled && + sp->framebuffer.zsbuf && + !sp->depth_stencil->alpha.enabled && + !sp->fs->info.uses_kill && + !sp->fs->info.writes_z; + + /* build up the pipeline in reverse order... */ + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + sp->quad[i].first = sp->quad[i].output; + + if (sp->blend->colormask != 0xf) { + sp_push_quad_first( sp, sp->quad[i].colormask, i ); + } + + if (sp->blend->blend_enable || + sp->blend->logicop_enable) { + sp_push_quad_first( sp, sp->quad[i].blend, i ); + } + + if (sp->depth_stencil->depth.occlusion_count) { + sp_push_quad_first( sp, sp->quad[i].occlusion, i ); + } + + if (sp->rasterizer->poly_smooth || + sp->rasterizer->line_smooth || + sp->rasterizer->point_smooth) { + sp_push_quad_first( sp, sp->quad[i].coverage, i ); + } + + if (!early_depth_test) { + sp_build_depth_stencil( sp, i ); + } + + if (sp->depth_stencil->alpha.enabled) { + sp_push_quad_first( sp, sp->quad[i].alpha_test, i ); + } + + /* XXX always enable shader? */ + if (1) { + sp_push_quad_first( sp, sp->quad[i].shade, i ); + } + + if (early_depth_test) { + sp_build_depth_stencil( sp, i ); + sp_push_quad_first( sp, sp->quad[i].earlyz, i ); + } + +#if !USE_DRAW_STAGE_PSTIPPLE + if (sp->rasterizer->poly_stipple_enable) { + sp_push_quad_first( sp, sp->quad[i].polygon_stipple, i ); + } +#endif + } +} + diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h new file mode 100644 index 0000000000..08513cb95f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad.h @@ -0,0 +1,69 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_QUAD_H +#define SP_QUAD_H + + +struct softpipe_context; +struct quad_header; + + +struct quad_stage { + struct softpipe_context *softpipe; + + struct quad_stage *next; + + void (*begin)(struct quad_stage *qs); + + /** the stage action */ + void (*run)(struct quad_stage *qs, struct quad_header *quad); + + void (*destroy)(struct quad_stage *qs); +}; + + +struct quad_stage *sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_earlyz_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_alpha_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ); + +void sp_build_quad_pipeline(struct softpipe_context *sp); + +void sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad); + +#endif /* SP_QUAD_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c new file mode 100644 index 0000000000..5bebd141e9 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -0,0 +1,108 @@ + +/** + * quad alpha test + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" + + +static void +alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + const float ref = softpipe->depth_stencil->alpha.ref; + unsigned passMask = 0x0, j; + const uint cbuf = 0; /* only output[0].alpha is tested */ + const float *aaaa = quad->output.color[cbuf][3]; + + switch (softpipe->depth_stencil->alpha.func) { + case PIPE_FUNC_NEVER: + break; + case PIPE_FUNC_LESS: + /* + * If mask were an array [4] we could do this SIMD-style: + * passMask = (quad->outputs.color[0][3] <= vec4(ref)); + */ + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] < ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] == ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] <= ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] > ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] != ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (aaaa[j] >= ref) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_ALWAYS: + passMask = MASK_ALL; + break; + default: + assert(0); + } + + quad->inout.mask &= passMask; + + if (quad->inout.mask) + qs->next->run(qs->next, quad); +} + + +static void alpha_test_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void alpha_test_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage * +sp_quad_alpha_test_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = alpha_test_begin; + stage->run = alpha_test_quad; + stage->destroy = alpha_test_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c new file mode 100644 index 0000000000..6f64c6e584 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -0,0 +1,759 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * quad blending + * \author Brian Paul + */ + +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_quad.h" + + +#define VEC4_COPY(DST, SRC) \ +do { \ + DST[0] = SRC[0]; \ + DST[1] = SRC[1]; \ + DST[2] = SRC[2]; \ + DST[3] = SRC[3]; \ +} while(0) + +#define VEC4_SCALAR(DST, SRC) \ +do { \ + DST[0] = SRC; \ + DST[1] = SRC; \ + DST[2] = SRC; \ + DST[3] = SRC; \ +} while(0) + +#define VEC4_ADD(R, A, B) \ +do { \ + R[0] = A[0] + B[0]; \ + R[1] = A[1] + B[1]; \ + R[2] = A[2] + B[2]; \ + R[3] = A[3] + B[3]; \ +} while (0) + +#define VEC4_SUB(R, A, B) \ +do { \ + R[0] = A[0] - B[0]; \ + R[1] = A[1] - B[1]; \ + R[2] = A[2] - B[2]; \ + R[3] = A[3] - B[3]; \ +} while (0) + +#define VEC4_MUL(R, A, B) \ +do { \ + R[0] = A[0] * B[0]; \ + R[1] = A[1] * B[1]; \ + R[2] = A[2] * B[2]; \ + R[3] = A[3] * B[3]; \ +} while (0) + +#define VEC4_MIN(R, A, B) \ +do { \ + R[0] = (A[0] < B[0]) ? A[0] : B[0]; \ + R[1] = (A[1] < B[1]) ? A[1] : B[1]; \ + R[2] = (A[2] < B[2]) ? A[2] : B[2]; \ + R[3] = (A[3] < B[3]) ? A[3] : B[3]; \ +} while (0) + +#define VEC4_MAX(R, A, B) \ +do { \ + R[0] = (A[0] > B[0]) ? A[0] : B[0]; \ + R[1] = (A[1] > B[1]) ? A[1] : B[1]; \ + R[2] = (A[2] > B[2]) ? A[2] : B[2]; \ + R[3] = (A[3] > B[3]) ? A[3] : B[3]; \ +} while (0) + + + +static void +logicop_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float dest[4][QUAD_SIZE]; + ubyte src[4][4], dst[4][4], res[4][4]; + uint *src4 = (uint *) src; + uint *dst4 = (uint *) dst; + uint *res4 = (uint *) res; + struct softpipe_cached_tile * + tile = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; + uint i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + /* convert to ubyte */ + for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ + dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ + dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ + dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ + dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ + + src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ + src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ + src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ + src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ + } + + switch (softpipe->blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: + for (j = 0; j < 4; j++) + res4[j] = 0; + break; + case PIPE_LOGICOP_NOR: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] | dst4[j]); + break; + case PIPE_LOGICOP_AND_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_COPY_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j]; + break; + case PIPE_LOGICOP_AND_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & ~dst4[j]; + break; + case PIPE_LOGICOP_INVERT: + for (j = 0; j < 4; j++) + res4[j] = ~dst4[j]; + break; + case PIPE_LOGICOP_XOR: + for (j = 0; j < 4; j++) + res4[j] = dst4[j] ^ src4[j]; + break; + case PIPE_LOGICOP_NAND: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] & dst4[j]); + break; + case PIPE_LOGICOP_AND: + for (j = 0; j < 4; j++) + res4[j] = src4[j] & dst4[j]; + break; + case PIPE_LOGICOP_EQUIV: + for (j = 0; j < 4; j++) + res4[j] = ~(src4[j] ^ dst4[j]); + break; + case PIPE_LOGICOP_NOOP: + for (j = 0; j < 4; j++) + res4[j] = dst4[j]; + break; + case PIPE_LOGICOP_OR_INVERTED: + for (j = 0; j < 4; j++) + res4[j] = ~src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_COPY: + for (j = 0; j < 4; j++) + res4[j] = src4[j]; + break; + case PIPE_LOGICOP_OR_REVERSE: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | ~dst4[j]; + break; + case PIPE_LOGICOP_OR: + for (j = 0; j < 4; j++) + res4[j] = src4[j] | dst4[j]; + break; + case PIPE_LOGICOP_SET: + for (j = 0; j < 4; j++) + res4[j] = ~0; + break; + default: + assert(0); + } + + for (j = 0; j < 4; j++) { + quadColor[j][0] = ubyte_to_float(res[j][0]); + quadColor[j][1] = ubyte_to_float(res[j][1]); + quadColor[j][2] = ubyte_to_float(res[j][2]); + quadColor[j][3] = ubyte_to_float(res[j][3]); + } + } + + /* pass quad to next stage */ + qs->next->run(qs->next, quad); +} + + + + +static void +blend_quad(struct quad_stage *qs, struct quad_header *quad) +{ + static const float zero[4] = { 0, 0, 0, 0 }; + static const float one[4] = { 1, 1, 1, 1 }; + + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + if (softpipe->blend->logicop_enable) { + logicop_quad(qs, quad); + return; + } + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; + uint i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + /* + * Compute src/first term RGB + */ + switch (softpipe->blend->rgb_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[0], quadColor[0]); /* R */ + VEC4_COPY(source[1], quadColor[1]); /* G */ + VEC4_COPY(source[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */ + VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */ + VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + { + const float *alpha = dest[3]; + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + { + const float *alpha = quadColor[3]; + float diff[4], temp[4]; + VEC4_SUB(diff, one, dest[3]); + VEC4_MIN(temp, alpha, diff); + VEC4_MUL(source[0], quadColor[0], temp); /* R */ + VEC4_MUL(source[1], quadColor[1], temp); /* G */ + VEC4_MUL(source[2], quadColor[2], temp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float alpha[4]; + VEC4_SCALAR(alpha, softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_SRC1_ALPHA: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[0], zero); /* R */ + VEC4_COPY(source[1], zero); /* G */ + VEC4_COPY(source[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(source[0], quadColor[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(source[1], quadColor[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(source[2], quadColor[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_alpha[4]; + VEC4_SCALAR(inv_alpha, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */ + VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */ + VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + assert(0); /* to do */ + break; + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + assert(0); /* to do */ + break; + default: + assert(0); + } + + /* + * Compute src/first term A + */ + switch (softpipe->blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ONE: + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + { + const float *alpha = quadColor[3]; + VEC4_MUL(source[3], quadColor[3], alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + /* multiply alpha by 1.0 */ + VEC4_COPY(source[3], quadColor[3]); /* A */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(source[3], quadColor[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(source[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, quadColor[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_alpha[4]; + VEC4_SUB(inv_alpha, one, dest[3]); + VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + /* A */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(source[3], quadColor[3], inv_comp); + } + break; + default: + assert(0); + } + + + /* + * Compute dest/second term RGB + */ + switch (softpipe->blend->rgb_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + VEC4_MUL(dest[0], dest[0], quadColor[0]); /* R */ + VEC4_MUL(dest[1], dest[1], quadColor[1]); /* G */ + VEC4_MUL(dest[2], dest[2], quadColor[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[0], dest[0], quadColor[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], quadColor[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], quadColor[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[0], dest[0], dest[3]); /* R * A */ + VEC4_MUL(dest[1], dest[1], dest[3]); /* G * A */ + VEC4_MUL(dest[2], dest[2], dest[3]); /* B * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + VEC4_MUL(dest[0], dest[0], dest[0]); /* R */ + VEC4_MUL(dest[1], dest[1], dest[1]); /* G */ + VEC4_MUL(dest[2], dest[2], dest[2]); /* B */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[0]); /* R */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_SCALAR(comp, softpipe->blend_color.color[1]); /* G */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_SCALAR(comp, softpipe->blend_color.color[2]); /* B */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[0], dest[0], comp); /* R */ + VEC4_MUL(dest[1], dest[1], comp); /* G */ + VEC4_MUL(dest[2], dest[2], comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[0], zero); /* R */ + VEC4_COPY(dest[1], zero); /* G */ + VEC4_COPY(dest[2], zero); /* B */ + break; + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, quadColor[0]); /* R */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_SUB(inv_comp, one, quadColor[1]); /* G */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_SUB(inv_comp, one, quadColor[2]); /* B */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */ + VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */ + VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[3]); /* A */ + VEC4_MUL(dest[0], inv_comp, dest[0]); /* R */ + VEC4_MUL(dest[1], inv_comp, dest[1]); /* G */ + VEC4_MUL(dest[2], inv_comp, dest[2]); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[0]); /* R */ + VEC4_MUL(dest[0], dest[0], inv_comp); /* R */ + VEC4_SUB(inv_comp, one, dest[1]); /* G */ + VEC4_MUL(dest[1], dest[1], inv_comp); /* G */ + VEC4_SUB(inv_comp, one, dest[2]); /* B */ + VEC4_MUL(dest[2], dest[2], inv_comp); /* B */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + { + float inv_comp[4]; + /* R */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[0]); + VEC4_MUL(dest[0], dest[0], inv_comp); + /* G */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[1]); + VEC4_MUL(dest[1], dest[1], inv_comp); + /* B */ + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[2]); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[0], dest[0], inv_comp); + VEC4_MUL(dest[1], dest[1], inv_comp); + VEC4_MUL(dest[2], dest[2], inv_comp); + } + break; + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* XXX what are these? */ + assert(0); + break; + default: + assert(0); + } + + /* + * Compute dest/second term A + */ + switch (softpipe->blend->alpha_dst_factor) { + case PIPE_BLENDFACTOR_ONE: + /* dest = dest * 1 NO-OP, leave dest as-is */ + break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_SRC_ALPHA: + VEC4_MUL(dest[3], dest[3], quadColor[3]); /* A * A */ + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + VEC4_MUL(dest[3], dest[3], dest[3]); /* A */ + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + assert(0); /* illegal */ + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + { + float comp[4]; + VEC4_SCALAR(comp, softpipe->blend_color.color[3]); /* A */ + VEC4_MUL(dest[3], dest[3], comp); /* A */ + } + break; + case PIPE_BLENDFACTOR_ZERO: + VEC4_COPY(dest[3], zero); /* A */ + break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + { + float one_minus_alpha[QUAD_SIZE]; + VEC4_SUB(one_minus_alpha, one, quadColor[3]); + VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + { + float inv_comp[4]; + VEC4_SUB(inv_comp, one, dest[3]); /* A */ + VEC4_MUL(dest[3], inv_comp, dest[3]); /* A */ + } + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + { + float inv_comp[4]; + VEC4_SCALAR(inv_comp, 1.0f - softpipe->blend_color.color[3]); + VEC4_MUL(dest[3], dest[3], inv_comp); + } + break; + default: + assert(0); + } + + /* + * Combine RGB terms + */ + switch (softpipe->blend->rgb_func) { + case PIPE_BLEND_ADD: + VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */ + VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */ + VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB(quadColor[0], source[0], dest[0]); /* R */ + VEC4_SUB(quadColor[1], source[1], dest[1]); /* G */ + VEC4_SUB(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB(quadColor[0], dest[0], source[0]); /* R */ + VEC4_SUB(quadColor[1], dest[1], source[1]); /* G */ + VEC4_SUB(quadColor[2], dest[2], source[2]); /* B */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MIN(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MIN(quadColor[2], source[2], dest[2]); /* B */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[0], source[0], dest[0]); /* R */ + VEC4_MAX(quadColor[1], source[1], dest[1]); /* G */ + VEC4_MAX(quadColor[2], source[2], dest[2]); /* B */ + break; + default: + assert(0); + } + + /* + * Combine A terms + */ + switch (softpipe->blend->alpha_func) { + case PIPE_BLEND_ADD: + VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_SUBTRACT: + VEC4_SUB(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_REVERSE_SUBTRACT: + VEC4_SUB(quadColor[3], dest[3], source[3]); /* A */ + break; + case PIPE_BLEND_MIN: + VEC4_MIN(quadColor[3], source[3], dest[3]); /* A */ + break; + case PIPE_BLEND_MAX: + VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ + break; + default: + assert(0); + } + + } /* cbuf loop */ + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quad); +} + + +static void blend_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void blend_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = blend_begin; + stage->run = blend_quad; + stage->destroy = blend_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c new file mode 100644 index 0000000000..92e9af09c1 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c @@ -0,0 +1,74 @@ + +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" + + +/** + * Loop over colorbuffers, passing quad to next stage each time. + */ +static void +cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + float tmp[PIPE_MAX_COLOR_BUFS][4][QUAD_SIZE]; + unsigned i; + + assert(sizeof(quad->outputs.color) == sizeof(tmp)); + assert(softpipe->framebuffer.num_cbufs <= PIPE_MAX_COLOR_BUFS); + + /* make copy of original colors since they can get modified + * by blending and masking. + * XXX we won't have to do this if the fragment program actually emits + * N separate colors and we're drawing to N color buffers (MRT). + * But if we emitted one color and glDrawBuffer(GL_FRONT_AND_BACK) is + * in effect, we need to save/restore colors like this. + */ + memcpy(tmp, quad->outputs.color, sizeof(tmp)); + + for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { + /* set current cbuffer */ +#if 0 /* obsolete & going away */ + softpipe->current_cbuf = i; +#endif + + /* pass blended quad to next stage */ + qs->next->run(qs->next, quad); + + /* restore quad's colors for next buffer */ + memcpy(quad->outputs.color, tmp, sizeof(tmp)); + } +} + + +static void cbuf_loop_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void cbuf_loop_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +/** + * Create the colorbuffer loop stage. + * This is used to implement multiple render targets and GL_FRONT_AND_BACK + * rendering. + */ +struct quad_stage *sp_quad_bufloop_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = cbuf_loop_begin; + stage->run = cbuf_loop_quad; + stage->destroy = cbuf_loop_destroy; + + return stage; +} + diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c new file mode 100644 index 0000000000..f32bdfab78 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -0,0 +1,116 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief quad colormask stage + * \author Brian Paul + */ + +#include "pipe/p_defines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + + +/** + * XXX colormask could be rolled into blending... + */ +static void +colormask_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float dest[4][QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; + uint i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + int x = (quad->input.x0 & (TILE_SIZE-1)) + (j & 1); + int y = (quad->input.y0 & (TILE_SIZE-1)) + (j >> 1); + for (i = 0; i < 4; i++) { + dest[i][j] = tile->data.color[y][x][i]; + } + } + + /* R */ + if (!(softpipe->blend->colormask & PIPE_MASK_R)) + COPY_4V(quadColor[0], dest[0]); + + /* G */ + if (!(softpipe->blend->colormask & PIPE_MASK_G)) + COPY_4V(quadColor[1], dest[1]); + + /* B */ + if (!(softpipe->blend->colormask & PIPE_MASK_B)) + COPY_4V(quadColor[2], dest[2]); + + /* A */ + if (!(softpipe->blend->colormask & PIPE_MASK_A)) + COPY_4V(quadColor[3], dest[3]); + } + + /* pass quad to next stage */ + qs->next->run(qs->next, quad); +} + + +static void colormask_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void colormask_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = colormask_begin; + stage->run = colormask_quad; + stage->destroy = colormask_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c new file mode 100644 index 0000000000..ee29aa7dfe --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -0,0 +1,93 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * \brief Apply AA coverage to quad alpha valus + * \author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" + + +/** + * Multiply quad's alpha values by the fragment coverage. + */ +static void +coverage_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + + if ((softpipe->rasterizer->poly_smooth && quad->input.prim == PRIM_TRI) || + (softpipe->rasterizer->line_smooth && quad->input.prim == PRIM_LINE) || + (softpipe->rasterizer->point_smooth && quad->input.prim == PRIM_POINT)) { + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + float (*quadColor)[4] = quad->output.color[cbuf]; + unsigned j; + for (j = 0; j < QUAD_SIZE; j++) { + assert(quad->input.coverage[j] >= 0.0); + assert(quad->input.coverage[j] <= 1.0); + quadColor[3][j] *= quad->input.coverage[j]; + } + } + } + + qs->next->run(qs->next, quad); +} + + +static void coverage_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void coverage_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = coverage_begin; + stage->run = coverage_quad; + stage->destroy = coverage_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c new file mode 100644 index 0000000000..523bd3e080 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -0,0 +1,290 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Quad depth testing + */ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + +/** + * Do depth testing for a quad. + * Not static since it's used by the stencil code. + */ + +/* + * To increase efficiency, we should probably have multiple versions + * of this function that are specifically for Z16, Z32 and FP Z buffers. + * Try to effectively do that with codegen... + */ + +void +sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + struct pipe_surface *ps = softpipe->framebuffer.zsbuf; + const enum pipe_format format = ps->format; + unsigned bzzzz[QUAD_SIZE]; /**< Z values fetched from depth buffer */ + unsigned qzzzz[QUAD_SIZE]; /**< Z values from the quad */ + unsigned zmask = 0; + unsigned j; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); + + assert(ps); /* shouldn't get here if there's no zbuffer */ + + /* + * Convert quad's float depth values to int depth values (qzzzz). + * If the Z buffer stores integer values, we _have_ to do the depth + * compares with integers (not floats). Otherwise, the float->int->float + * conversion of Z values (which isn't an identity function) will cause + * Z-fighting errors. + * + * Also, get the zbuffer values (bzzzz) from the cached tile. + */ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + { + float scale = 65535.0; + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth16[y][x]; + } + } + break; + case PIPE_FORMAT_Z32_UNORM: + { + double scale = (double) (uint) ~0UL; + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth32[y][x]; + } + } + break; + case PIPE_FORMAT_X8Z24_UNORM: + /* fall-through */ + case PIPE_FORMAT_S8Z24_UNORM: + { + float scale = (float) ((1 << 24) - 1); + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth32[y][x] & 0xffffff; + } + } + break; + case PIPE_FORMAT_Z24X8_UNORM: + /* fall-through */ + case PIPE_FORMAT_Z24S8_UNORM: + { + float scale = (float) ((1 << 24) - 1); + + for (j = 0; j < QUAD_SIZE; j++) { + qzzzz[j] = (unsigned) (quad->output.depth[j] * scale); + } + + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + bzzzz[j] = tile->data.depth32[y][x] >> 8; + } + } + break; + default: + assert(0); + } + + switch (softpipe->depth_stencil->depth.func) { + case PIPE_FUNC_NEVER: + /* zmask = 0 */ + break; + case PIPE_FUNC_LESS: + /* Note this is pretty much a single sse or cell instruction. + * Like this: quad->mask &= (quad->outputs.depth < zzzz); + */ + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] < bzzzz[j]) + zmask |= 1 << j; + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] == bzzzz[j]) + zmask |= 1 << j; + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] <= bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] > bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] != bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (qzzzz[j] >= bzzzz[j]) + zmask |= (1 << j); + } + break; + case PIPE_FUNC_ALWAYS: + zmask = MASK_ALL; + break; + default: + assert(0); + } + + quad->inout.mask &= zmask; + + if (softpipe->depth_stencil->depth.writemask) { + + /* This is also efficient with sse / spe instructions: + */ + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + bzzzz[j] = qzzzz[j]; + } + } + + /* put updated Z values back into cached tile */ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth16[y][x] = (ushort) bzzzz[j]; + } + break; + case PIPE_FORMAT_X8Z24_UNORM: + /* fall-through */ + /* (yes, this falls through to a different case than above) */ + case PIPE_FORMAT_Z32_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = bzzzz[j]; + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + uint s8z24 = tile->data.depth32[y][x]; + s8z24 = (s8z24 & 0xff000000) | bzzzz[j]; + tile->data.depth32[y][x] = s8z24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + uint z24s8 = tile->data.depth32[y][x]; + z24s8 = (z24s8 & 0xff) | (bzzzz[j] << 8); + tile->data.depth32[y][x] = z24s8; + } + break; + case PIPE_FORMAT_Z24X8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.depth32[y][x] = bzzzz[j] << 8; + } + break; + default: + assert(0); + } + } +} + + +static void +depth_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + sp_depth_test_quad(qs, quad); + + if (quad->inout.mask) + qs->next->run(qs->next, quad); +} + + +static void depth_test_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void depth_test_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = depth_test_begin; + stage->run = depth_test_quad; + stage->destroy = depth_test_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c new file mode 100644 index 0000000000..6e2dde304e --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c @@ -0,0 +1,88 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Quad early-z testing + */ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_headers.h" +#include "sp_quad.h" + + +/** + * All this stage does is compute the quad's Z values (which is normally + * done by the shading stage). + * The next stage will do the actual depth test. + */ +static void +earlyz_quad( + struct quad_stage *qs, + struct quad_header *quad ) +{ + const float fx = (float) quad->input.x0; + const float fy = (float) quad->input.y0; + const float dzdx = quad->posCoef->dadx[2]; + const float dzdy = quad->posCoef->dady[2]; + const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + + quad->output.depth[0] = z0; + quad->output.depth[1] = z0 + dzdx; + quad->output.depth[2] = z0 + dzdy; + quad->output.depth[3] = z0 + dzdx + dzdy; + + qs->next->run( qs->next, quad ); +} + +static void +earlyz_begin( + struct quad_stage *qs ) +{ + qs->next->begin( qs->next ); +} + +static void +earlyz_destroy( + struct quad_stage *qs ) +{ + FREE( qs ); +} + +struct quad_stage * +sp_quad_earlyz_stage( + struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT( quad_stage ); + + stage->softpipe = softpipe; + stage->begin = earlyz_begin; + stage->run = earlyz_quad; + stage->destroy = earlyz_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c new file mode 100644 index 0000000000..1f0cb3e035 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -0,0 +1,205 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Vertices are just an array of floats, with all the attributes + * packed. We currently assume a layout like: + * + * attr[0][0..3] - window position + * attr[1..n][0..3] - remaining attributes. + * + * Attributes are assumed to be 4 floats wide but are packed so that + * all the enabled attributes run contiguously. + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_texture.h" +#include "sp_tex_sample.h" + + +struct quad_shade_stage +{ + struct quad_stage stage; + struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS]; + struct tgsi_exec_machine machine; + struct tgsi_exec_vector *inputs, *outputs; +}; + + +/** cast wrapper */ +static INLINE struct quad_shade_stage * +quad_shade_stage(struct quad_stage *qs) +{ + return (struct quad_shade_stage *) qs; +} + + + +/** + * Execute fragment shader for the four fragments in the quad. + */ +static void +shade_quad( + struct quad_stage *qs, + struct quad_header *quad ) +{ + struct quad_shade_stage *qss = quad_shade_stage( qs ); + struct softpipe_context *softpipe = qs->softpipe; + struct tgsi_exec_machine *machine = &qss->machine; + boolean z_written; + + /* Consts do not require 16 byte alignment. */ + machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; + + machine->InterpCoefs = quad->coef; + + /* run shader */ + quad->inout.mask &= softpipe->fs->run( softpipe->fs, + &qss->machine, + quad ); + + /* store outputs */ + z_written = FALSE; + { + const ubyte *sem_name = softpipe->fs->info.output_semantic_name; + const ubyte *sem_index = softpipe->fs->info.output_semantic_index; + const uint n = qss->stage.softpipe->fs->info.num_outputs; + uint i; + for (i = 0; i < n; i++) { + switch (sem_name[i]) { + case TGSI_SEMANTIC_COLOR: + { + uint cbuf = sem_index[i]; + memcpy(quad->output.color[cbuf], + &machine->Outputs[i].xyzw[0].f[0], + sizeof(quad->output.color[0]) ); + } + break; + case TGSI_SEMANTIC_POSITION: + { + uint j; + for (j = 0; j < 4; j++) { + quad->output.depth[j] = machine->Outputs[0].xyzw[2].f[j]; + } + z_written = TRUE; + } + break; + } + } + } + + if (!z_written) { + /* compute Z values now, as in the quad earlyz stage */ + /* XXX we should really only do this if the earlyz stage is not used */ + const float fx = (float) quad->input.x0; + const float fy = (float) quad->input.y0; + const float dzdx = quad->posCoef->dadx[2]; + const float dzdy = quad->posCoef->dady[2]; + const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy; + + quad->output.depth[0] = z0; + quad->output.depth[1] = z0 + dzdx; + quad->output.depth[2] = z0 + dzdy; + quad->output.depth[3] = z0 + dzdx + dzdy; + } + + /* shader may cull fragments */ + if( quad->inout.mask ) { + qs->next->run( qs->next, quad ); + } +} + +/** + * Per-primitive (or per-begin?) setup + */ +static void shade_begin(struct quad_stage *qs) +{ + struct quad_shade_stage *qss = quad_shade_stage(qs); + struct softpipe_context *softpipe = qs->softpipe; + unsigned i; + unsigned num = MAX2(softpipe->num_textures, softpipe->num_samplers); + + /* set TGSI sampler state that varies */ + for (i = 0; i < num; i++) { + qss->samplers[i].state = softpipe->sampler[i]; + qss->samplers[i].texture = softpipe->texture[i]; + } + + softpipe->fs->prepare( softpipe->fs, + &qss->machine, + qss->samplers ); + + qs->next->begin(qs->next); +} + + +static void shade_destroy(struct quad_stage *qs) +{ + struct quad_shade_stage *qss = (struct quad_shade_stage *) qs; + + tgsi_exec_machine_free_data(&qss->machine); + FREE( qss->inputs ); + FREE( qss->outputs ); + FREE( qs ); +} + + +struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) +{ + struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); + uint i; + + /* allocate storage for program inputs/outputs, aligned to 16 bytes */ + qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16); + qss->outputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->outputs) + 16); + qss->machine.Inputs = align16(qss->inputs); + qss->machine.Outputs = align16(qss->outputs); + + qss->stage.softpipe = softpipe; + qss->stage.begin = shade_begin; + qss->stage.run = shade_quad; + qss->stage.destroy = shade_destroy; + + /* set TGSI sampler state that's constant */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + assert(softpipe->tex_cache[i]); + qss->samplers[i].get_samples = sp_get_samples; + qss->samplers[i].pipe = &softpipe->pipe; + qss->samplers[i].cache = softpipe->tex_cache[i]; + } + + tgsi_exec_machine_init( &qss->machine ); + + return &qss->stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c new file mode 100644 index 0000000000..169bd82876 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * \brief Quad occlusion counter stage + * \author Brian Paul + */ + + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" + +static unsigned count_bits( unsigned val ) +{ + unsigned i; + + for (i = 0; val ; val >>= 1) + i += (val & 1); + + return i; +} + +static void +occlusion_count_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + + softpipe->occlusion_count += count_bits(quad->inout.mask); + + qs->next->run(qs->next, quad); +} + + +static void occlusion_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void occlusion_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = occlusion_begin; + stage->run = occlusion_count_quad; + stage->destroy = occlusion_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c new file mode 100644 index 0000000000..d05e12d1d9 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_quad.h" +#include "sp_tile_cache.h" + + +/** + * Last step of quad processing: write quad colors to the framebuffer, + * taking mask into account. + */ +static void +output_quad(struct quad_stage *qs, struct quad_header *quad) +{ + /* in-tile pos: */ + const int itx = quad->input.x0 % TILE_SIZE; + const int ity = quad->input.y0 % TILE_SIZE; + + struct softpipe_context *softpipe = qs->softpipe; + uint cbuf; + + /* loop over colorbuffer outputs */ + for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, + softpipe->cbuf_cache[cbuf], + quad->input.x0, quad->input.y0); + float (*quadColor)[4] = quad->output.color[cbuf]; + int i, j; + + /* get/swizzle dest colors */ + for (j = 0; j < QUAD_SIZE; j++) { + if (quad->inout.mask & (1 << j)) { + int x = itx + (j & 1); + int y = ity + (j >> 1); + for (i = 0; i < 4; i++) { /* loop over color chans */ + tile->data.color[y][x][i] = quadColor[i][j]; + } + } + } + } +} + + +static void output_begin(struct quad_stage *qs) +{ + assert(qs->next == NULL); +} + + +static void output_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = output_begin; + stage->run = output_quad; + stage->destroy = output_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c new file mode 100644 index 0000000000..abb5487748 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -0,0 +1,352 @@ + +/** + * \brief Quad stencil testing + */ + + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" + + +/** Only 8-bit stencil supported */ +#define STENCIL_MAX 0xff + + +/** + * Do the basic stencil test (compare stencil buffer values against the + * reference value. + * + * \param stencilVals the stencil values from the stencil buffer + * \param func the stencil func (PIPE_FUNC_x) + * \param ref the stencil reference value + * \param valMask the stencil value mask indicating which bits of the stencil + * values and ref value are to be used. + * \return mask indicating which pixels passed the stencil test + */ +static unsigned +do_stencil_test(const ubyte stencilVals[QUAD_SIZE], unsigned func, + unsigned ref, unsigned valMask) +{ + unsigned passMask = 0x0; + unsigned j; + + ref &= valMask; + + switch (func) { + case PIPE_FUNC_NEVER: + /* passMask = 0x0 */ + break; + case PIPE_FUNC_LESS: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref < (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_EQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref == (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_LEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref <= (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GREATER: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref > (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_NOTEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref != (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_GEQUAL: + for (j = 0; j < QUAD_SIZE; j++) { + if (ref >= (stencilVals[j] & valMask)) { + passMask |= (1 << j); + } + } + break; + case PIPE_FUNC_ALWAYS: + passMask = MASK_ALL; + break; + default: + assert(0); + } + + return passMask; +} + + +/** + * Apply the stencil operator to stencil values. + * + * \param stencilVals the stencil buffer values (read and written) + * \param mask indicates which pixels to update + * \param op the stencil operator (PIPE_STENCIL_OP_x) + * \param ref the stencil reference value + * \param wrtMask writemask controlling which bits are changed in the + * stencil values + */ +static void +apply_stencil_op(ubyte stencilVals[QUAD_SIZE], + unsigned mask, unsigned op, ubyte ref, ubyte wrtMask) +{ + unsigned j; + ubyte newstencil[QUAD_SIZE]; + + for (j = 0; j < QUAD_SIZE; j++) { + newstencil[j] = stencilVals[j]; + } + + switch (op) { + case PIPE_STENCIL_OP_KEEP: + /* no-op */ + break; + case PIPE_STENCIL_OP_ZERO: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = 0; + } + } + break; + case PIPE_STENCIL_OP_REPLACE: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = ref; + } + } + break; + case PIPE_STENCIL_OP_INCR: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + if (stencilVals[j] < STENCIL_MAX) { + newstencil[j] = stencilVals[j] + 1; + } + } + } + break; + case PIPE_STENCIL_OP_DECR: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + if (stencilVals[j] > 0) { + newstencil[j] = stencilVals[j] - 1; + } + } + } + break; + case PIPE_STENCIL_OP_INCR_WRAP: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = stencilVals[j] + 1; + } + } + break; + case PIPE_STENCIL_OP_DECR_WRAP: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = stencilVals[j] - 1; + } + } + break; + case PIPE_STENCIL_OP_INVERT: + for (j = 0; j < QUAD_SIZE; j++) { + if (mask & (1 << j)) { + newstencil[j] = ~stencilVals[j]; + } + } + break; + default: + assert(0); + } + + /* + * update the stencil values + */ + if (wrtMask != STENCIL_MAX) { + /* apply bit-wise stencil buffer writemask */ + for (j = 0; j < QUAD_SIZE; j++) { + stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & stencilVals[j]); + } + } + else { + for (j = 0; j < QUAD_SIZE; j++) { + stencilVals[j] = newstencil[j]; + } + } +} + + +/** + * Do stencil (and depth) testing. Stenciling depends on the outcome of + * depth testing. + */ +static void +stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) +{ + struct softpipe_context *softpipe = qs->softpipe; + struct pipe_surface *ps = softpipe->framebuffer.zsbuf; + unsigned func, zFailOp, zPassOp, failOp; + ubyte ref, wrtMask, valMask; + ubyte stencilVals[QUAD_SIZE]; + struct softpipe_cached_tile *tile + = sp_get_cached_tile(softpipe, softpipe->zsbuf_cache, quad->input.x0, quad->input.y0); + uint j; + uint face = quad->input.facing; + + if (!softpipe->depth_stencil->stencil[1].enabled) { + /* single-sided stencil test, use front (face=0) state */ + face = 0; + } + + /* choose front or back face function, operator, etc */ + /* XXX we could do these initializations once per primitive */ + func = softpipe->depth_stencil->stencil[face].func; + failOp = softpipe->depth_stencil->stencil[face].fail_op; + zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; + zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; + ref = softpipe->depth_stencil->stencil[face].ref_value; + wrtMask = softpipe->depth_stencil->stencil[face].write_mask; + valMask = softpipe->depth_stencil->stencil[face].value_mask; + + assert(ps); /* shouldn't get here if there's no stencil buffer */ + + /* get stencil values from cached tile */ + switch (ps->format) { + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.depth32[y][x] >> 24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.depth32[y][x] & 0xff; + } + break; + case PIPE_FORMAT_S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + stencilVals[j] = tile->data.stencil8[y][x]; + } + break; + default: + assert(0); + } + + /* do the stencil test first */ + { + unsigned passMask, failMask; + passMask = do_stencil_test(stencilVals, func, ref, valMask); + failMask = quad->inout.mask & ~passMask; + quad->inout.mask &= passMask; + + if (failOp != PIPE_STENCIL_OP_KEEP) { + apply_stencil_op(stencilVals, failMask, failOp, ref, wrtMask); + } + } + + if (quad->inout.mask) { + /* now the pixels that passed the stencil test are depth tested */ + if (softpipe->depth_stencil->depth.enabled) { + const unsigned origMask = quad->inout.mask; + + sp_depth_test_quad(qs, quad); /* quad->mask is updated */ + + /* update stencil buffer values according to z pass/fail result */ + if (zFailOp != PIPE_STENCIL_OP_KEEP) { + const unsigned failMask = origMask & ~quad->inout.mask; + apply_stencil_op(stencilVals, failMask, zFailOp, ref, wrtMask); + } + + if (zPassOp != PIPE_STENCIL_OP_KEEP) { + const unsigned passMask = origMask & quad->inout.mask; + apply_stencil_op(stencilVals, passMask, zPassOp, ref, wrtMask); + } + } + else { + /* no depth test, apply Zpass operator to stencil buffer values */ + apply_stencil_op(stencilVals, quad->inout.mask, zPassOp, ref, wrtMask); + } + + } + + /* put new stencil values into cached tile */ + switch (ps->format) { + case PIPE_FORMAT_S8Z24_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + uint s8z24 = tile->data.depth32[y][x]; + s8z24 = (stencilVals[j] << 24) | (s8z24 & 0xffffff); + tile->data.depth32[y][x] = s8z24; + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + uint z24s8 = tile->data.depth32[y][x]; + z24s8 = (z24s8 & 0xffffff00) | stencilVals[j]; + tile->data.depth32[y][x] = z24s8; + } + break; + case PIPE_FORMAT_S8_UNORM: + for (j = 0; j < QUAD_SIZE; j++) { + int x = quad->input.x0 % TILE_SIZE + (j & 1); + int y = quad->input.y0 % TILE_SIZE + (j >> 1); + tile->data.stencil8[y][x] = stencilVals[j]; + } + break; + default: + assert(0); + } + + if (quad->inout.mask) + qs->next->run(qs->next, quad); +} + + +static void stencil_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void stencil_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = stencil_begin; + stage->run = stencil_test_quad; + stage->destroy = stencil_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c new file mode 100644 index 0000000000..ccf37f6be5 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -0,0 +1,94 @@ + +/** + * quad polygon stipple stage + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" + + +/** + * Apply polygon stipple to quads produced by triangle rasterization + */ +static void +stipple_quad(struct quad_stage *qs, struct quad_header *quad) +{ + static const uint bit31 = 1 << 31; + static const uint bit30 = 1 << 30; + + if (quad->input.prim == PRIM_TRI) { + struct softpipe_context *softpipe = qs->softpipe; + /* need to invert Y to index into OpenGL's stipple pattern */ + int y0, y1; + uint stipple0, stipple1; + if (softpipe->rasterizer->origin_lower_left) { + y0 = softpipe->framebuffer.height - 1 - quad->input.y0; + y1 = y0 - 1; + } + else { + y0 = quad->input.y0; + y1 = y0 + 1; + } + stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; + stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; + +#if 1 + { + const int col0 = quad->input.x0 % 32; + if ((stipple0 & (bit31 >> col0)) == 0) + quad->inout.mask &= ~MASK_TOP_LEFT; + + if ((stipple0 & (bit30 >> col0)) == 0) + quad->inout.mask &= ~MASK_TOP_RIGHT; + + if ((stipple1 & (bit31 >> col0)) == 0) + quad->inout.mask &= ~MASK_BOTTOM_LEFT; + + if ((stipple1 & (bit30 >> col0)) == 0) + quad->inout.mask &= ~MASK_BOTTOM_RIGHT; + } +#else + /* We'd like to use this code, but we'd need to redefine + * MASK_TOP_LEFT to be (1 << 1) and MASK_TOP_RIGHT to be (1 << 0), + * and similarly for the BOTTOM bits. But that may have undesirable + * side effects elsewhere. + */ + const int col0 = 30 - (quad->input.x0 % 32); + quad->inout.mask &= (((stipple0 >> col0) & 0x3) | + (((stipple1 >> col0) & 0x3) << 2)); +#endif + if (!quad->inout.mask) + return; + } + + qs->next->run(qs->next, quad); +} + + +static void stipple_begin(struct quad_stage *qs) +{ + qs->next->begin(qs->next); +} + + +static void stipple_destroy(struct quad_stage *qs) +{ + FREE( qs ); +} + + +struct quad_stage * +sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe ) +{ + struct quad_stage *stage = CALLOC_STRUCT(quad_stage); + + stage->softpipe = softpipe; + stage->begin = stipple_begin; + stage->run = stipple_quad; + stage->destroy = stipple_destroy; + + return stage; +} diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c new file mode 100644 index 0000000000..2106ee1d23 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_query.h" + +struct softpipe_query { + uint64 start; + uint64 end; +}; + + +static struct softpipe_query *softpipe_query( struct pipe_query *p ) +{ + return (struct softpipe_query *)p; +} + +static struct pipe_query * +softpipe_create_query(struct pipe_context *pipe, + unsigned type) +{ + assert(type == PIPE_QUERY_OCCLUSION_COUNTER); + return (struct pipe_query *)CALLOC_STRUCT( softpipe_query ); +} + + +static void +softpipe_destroy_query(struct pipe_context *pipe, struct pipe_query *q) +{ + FREE(q); +} + + +static void +softpipe_begin_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct softpipe_query *sq = softpipe_query(q); + + sq->start = softpipe->occlusion_count; +} + + +static void +softpipe_end_query(struct pipe_context *pipe, struct pipe_query *q) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct softpipe_query *sq = softpipe_query(q); + + sq->end = softpipe->occlusion_count; +} + + +static boolean +softpipe_get_query_result(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + uint64 *result ) +{ + struct softpipe_query *sq = softpipe_query(q); + *result = sq->end - sq->start; + return TRUE; +} + + +void softpipe_init_query_funcs(struct softpipe_context *softpipe ) +{ + softpipe->pipe.create_query = softpipe_create_query; + softpipe->pipe.destroy_query = softpipe_destroy_query; + softpipe->pipe.begin_query = softpipe_begin_query; + softpipe->pipe.end_query = softpipe_end_query; + softpipe->pipe.get_query_result = softpipe_get_query_result; +} + + diff --git a/src/gallium/drivers/softpipe/sp_query.h b/src/gallium/drivers/softpipe/sp_query.h new file mode 100644 index 0000000000..05060a4575 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_query.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell + */ + +#ifndef SP_QUERY_H +#define SP_QUERY_H + +struct softpipe_context; +extern void softpipe_init_query_funcs(struct softpipe_context * ); + + +#endif /* SP_QUERY_H */ diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c new file mode 100644 index 0000000000..9644dbd168 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -0,0 +1,177 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "util/u_memory.h" +#include "pipe/p_winsys.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" + +#include "sp_texture.h" +#include "sp_winsys.h" +#include "sp_screen.h" + + +static const char * +softpipe_get_vendor(struct pipe_screen *screen) +{ + return "Tungsten Graphics, Inc."; +} + + +static const char * +softpipe_get_name(struct pipe_screen *screen) +{ + return "softpipe"; +} + + +static int +softpipe_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 8; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 1; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return PIPE_MAX_COLOR_BUFS; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 1; + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; /* max 2Kx2K */ + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 8; /* max 128x128x128 */ + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; /* max 2Kx2K */ + default: + return 0; + } +} + + +static float +softpipe_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 255.0; /* arbitrary */ + case PIPE_CAP_MAX_POINT_WIDTH: + /* fall-through */ + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 255.0; /* arbitrary */ + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; /* arbitrary */ + default: + return 0; + } +} + + +/** + * Query format support for creating a texture, drawing surface, etc. + * \param format the format to test + * \param type one of PIPE_TEXTURE, PIPE_SURFACE + */ +static boolean +softpipe_is_format_supported( struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) +{ + switch(format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return FALSE; + default: + return TRUE; + } +} + + +static void +softpipe_destroy_screen( struct pipe_screen *screen ) +{ + struct pipe_winsys *winsys = screen->winsys; + + if(winsys->destroy) + winsys->destroy(winsys); + + FREE(screen); +} + + + +/** + * Create a new pipe_screen object + * Note: we're not presently subclassing pipe_screen (no softpipe_screen). + */ +struct pipe_screen * +softpipe_create_screen(struct pipe_winsys *winsys) +{ + struct softpipe_screen *screen = CALLOC_STRUCT(softpipe_screen); + + if (!screen) + return NULL; + + screen->base.winsys = winsys; + + screen->base.destroy = softpipe_destroy_screen; + + screen->base.get_name = softpipe_get_name; + screen->base.get_vendor = softpipe_get_vendor; + screen->base.get_param = softpipe_get_param; + screen->base.get_paramf = softpipe_get_paramf; + screen->base.is_format_supported = softpipe_is_format_supported; + + softpipe_init_screen_texture_funcs(&screen->base); + + return &screen->base; +} diff --git a/src/gallium/drivers/softpipe/sp_screen.h b/src/gallium/drivers/softpipe/sp_screen.h new file mode 100644 index 0000000000..3d4bfd3e84 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_screen.h @@ -0,0 +1,58 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_SCREEN_H +#define SP_SCREEN_H + +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" + + + +struct softpipe_screen { + struct pipe_screen base; + + /* Increments whenever textures are modified. Contexts can track + * this. + */ + unsigned timestamp; +}; + + + + +static INLINE struct softpipe_screen * +softpipe_screen( struct pipe_screen *pipe ) +{ + return (struct softpipe_screen *)pipe; +} + + +#endif /* SP_SCREEN_H */ diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c new file mode 100644 index 0000000000..13d8017393 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -0,0 +1,1569 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Primitive rasterization/rendering (points, lines, triangles) + * + * \author Keith Whitwell <keith@tungstengraphics.com> + * \author Brian Paul + */ + +#include "sp_setup.h" + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_state.h" +#include "sp_prim_setup.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_thread.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + +#define DEBUG_VERTS 0 +#define DEBUG_FRAGS 0 + +/** + * Triangle edge info + */ +struct edge { + float dx; /**< X(v1) - X(v0), used only during setup */ + float dy; /**< Y(v1) - Y(v0), used only during setup */ + float dxdy; /**< dx/dy */ + float sx, sy; /**< first sample point coord */ + int lines; /**< number of lines on this edge */ +}; + +#if SP_NUM_QUAD_THREADS > 1 + +/* Set to 1 if you want other threads to be instantly + * notified of pending jobs. + */ +#define INSTANT_NOTEMPTY_NOTIFY 0 + +struct thread_info +{ + struct setup_context *setup; + uint id; + pipe_thread handle; +}; + +struct quad_job; + +typedef void (* quad_job_routine)( struct setup_context *setup, uint thread, struct quad_job *job ); + +struct quad_job +{ + struct quad_header_input input; + struct quad_header_inout inout; + quad_job_routine routine; +}; + +#define NUM_QUAD_JOBS 64 + +struct quad_job_que +{ + struct quad_job jobs[NUM_QUAD_JOBS]; + uint first; + uint last; + pipe_mutex que_mutex; + pipe_condvar que_notfull_condvar; + pipe_condvar que_notempty_condvar; + uint jobs_added; + uint jobs_done; + pipe_condvar que_done_condvar; +}; + +static void +add_quad_job( struct quad_job_que *que, struct quad_header *quad, quad_job_routine routine ) +{ +#if INSTANT_NOTEMPTY_NOTIFY + boolean empty; +#endif + + /* Wait for empty slot, see if the que is empty. + */ + pipe_mutex_lock( que->que_mutex ); + while ((que->last + 1) % NUM_QUAD_JOBS == que->first) { +#if !INSTANT_NOTEMPTY_NOTIFY + pipe_condvar_broadcast( que->que_notempty_condvar ); +#endif + pipe_condvar_wait( que->que_notfull_condvar, que->que_mutex ); + } +#if INSTANT_NOTEMPTY_NOTIFY + empty = que->last == que->first; +#endif + que->jobs_added++; + pipe_mutex_unlock( que->que_mutex ); + + /* Submit new job. + */ + que->jobs[que->last].input = quad->input; + que->jobs[que->last].inout = quad->inout; + que->jobs[que->last].routine = routine; + que->last = (que->last + 1) % NUM_QUAD_JOBS; + +#if INSTANT_NOTEMPTY_NOTIFY + /* If the que was empty, notify consumers there's a job to be done. + */ + if (empty) { + pipe_mutex_lock( que->que_mutex ); + pipe_condvar_broadcast( que->que_notempty_condvar ); + pipe_mutex_unlock( que->que_mutex ); + } +#endif +} + +#endif + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_context { + struct softpipe_context *softpipe; + + /* Vertices are just an array of floats making up each attribute in + * turn. Currently fixed at 4 floats, but should change in time. + * Codegen will help cope with this. + */ + const float (*vmax)[4]; + const float (*vmid)[4]; + const float (*vmin)[4]; + const float (*vprovoke)[4]; + + struct edge ebot; + struct edge etop; + struct edge emaj; + + float oneoverarea; + + struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + struct tgsi_interp_coef posCoef; /* For Z, W */ + struct quad_header quad; + +#if SP_NUM_QUAD_THREADS > 1 + struct quad_job_que que; + struct thread_info threads[SP_NUM_QUAD_THREADS]; +#endif + + struct { + int left[2]; /**< [0] = row0, [1] = row1 */ + int right[2]; + int y; + unsigned y_flags; + unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ + } span; + +#if DEBUG_FRAGS + uint numFragsEmitted; /**< per primitive */ + uint numFragsWritten; /**< per primitive */ +#endif + + unsigned winding; /* which winding to cull */ +}; + +#if SP_NUM_QUAD_THREADS > 1 + +static PIPE_THREAD_ROUTINE( quad_thread, param ) +{ + struct thread_info *info = (struct thread_info *) param; + struct quad_job_que *que = &info->setup->que; + + for (;;) { + struct quad_job job; + boolean full; + + /* Wait for an available job. + */ + pipe_mutex_lock( que->que_mutex ); + while (que->last == que->first) + pipe_condvar_wait( que->que_notempty_condvar, que->que_mutex ); + + /* See if the que is full. + */ + full = (que->last + 1) % NUM_QUAD_JOBS == que->first; + + /* Take a job and remove it from que. + */ + job = que->jobs[que->first]; + que->first = (que->first + 1) % NUM_QUAD_JOBS; + + /* Notify the producer if the que is not full. + */ + if (full) + pipe_condvar_signal( que->que_notfull_condvar ); + pipe_mutex_unlock( que->que_mutex ); + + job.routine( info->setup, info->id, &job ); + + /* Notify the producer if that's the last finished job. + */ + pipe_mutex_lock( que->que_mutex ); + que->jobs_done++; + if (que->jobs_added == que->jobs_done) + pipe_condvar_signal( que->que_done_condvar ); + pipe_mutex_unlock( que->que_mutex ); + } + + return NULL; +} + +#define WAIT_FOR_COMPLETION(setup) \ + do {\ + pipe_mutex_lock( setup->que.que_mutex );\ + if (!INSTANT_NOTEMPTY_NOTIFY)\ + pipe_condvar_broadcast( setup->que.que_notempty_condvar );\ + while (setup->que.jobs_added != setup->que.jobs_done)\ + pipe_condvar_wait( setup->que.que_done_condvar, setup->que.que_mutex );\ + pipe_mutex_unlock( setup->que.que_mutex );\ + } while (0) + +#else + +#define WAIT_FOR_COMPLETION(setup) ((void) 0) + +#endif + +/** + * Test if x is NaN or +/- infinity. + */ +static INLINE boolean +is_inf_or_nan(float x) +{ + union fi tmp; + tmp.f = x; + return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31); +} + + +static boolean cull_tri( struct setup_context *setup, + float det ) +{ + if (det != 0) + { + /* if (det < 0 then Z points toward camera and triangle is + * counter-clockwise winding. + */ + unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; + + if ((winding & setup->winding) == 0) + return FALSE; + } + + /* Culled: + */ + return TRUE; +} + + + +/** + * Clip setup->quad against the scissor/surface bounds. + */ +static INLINE void +quad_clip( struct setup_context *setup, struct quad_header *quad ) +{ + const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + + if (quad->input.x0 >= maxx || + quad->input.y0 >= maxy || + quad->input.x0 + 1 < minx || + quad->input.y0 + 1 < miny) { + /* totally clipped */ + quad->inout.mask = 0x0; + return; + } + if (quad->input.x0 < minx) + quad->inout.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (quad->input.y0 < miny) + quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (quad->input.x0 == maxx - 1) + quad->inout.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (quad->input.y0 == maxy - 1) + quad->inout.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); +} + + +/** + * Emit a quad (pass to next stage) with clipping. + */ +static INLINE void +clip_emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) +{ + quad_clip( setup, quad ); + if (quad->inout.mask) { + struct softpipe_context *sp = setup->softpipe; + + sp->quad[thread].first->run( sp->quad[thread].first, quad ); + } +} + +#if SP_NUM_QUAD_THREADS > 1 + +static void +clip_emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) +{ + struct quad_header quad; + + quad.input = job->input; + quad.inout = job->inout; + quad.coef = setup->quad.coef; + quad.posCoef = setup->quad.posCoef; + quad.nr_attrs = setup->quad.nr_attrs; + clip_emit_quad( setup, &quad, thread ); +} + +#define CLIP_EMIT_QUAD(setup) add_quad_job( &setup->que, &setup->quad, clip_emit_quad_job ) + +#else + +#define CLIP_EMIT_QUAD(setup) clip_emit_quad( setup, &setup->quad, 0 ) + +#endif + +/** + * Emit a quad (pass to next stage). No clipping is done. + */ +static INLINE void +emit_quad( struct setup_context *setup, struct quad_header *quad, uint thread ) +{ + struct softpipe_context *sp = setup->softpipe; +#if DEBUG_FRAGS + uint mask = quad->inout.mask; +#endif + +#if DEBUG_FRAGS + if (mask & 1) setup->numFragsEmitted++; + if (mask & 2) setup->numFragsEmitted++; + if (mask & 4) setup->numFragsEmitted++; + if (mask & 8) setup->numFragsEmitted++; +#endif + sp->quad[thread].first->run( sp->quad[thread].first, quad ); +#if DEBUG_FRAGS + mask = quad->inout.mask; + if (mask & 1) setup->numFragsWritten++; + if (mask & 2) setup->numFragsWritten++; + if (mask & 4) setup->numFragsWritten++; + if (mask & 8) setup->numFragsWritten++; +#endif +} + +#if SP_NUM_QUAD_THREADS > 1 + +static void +emit_quad_job( struct setup_context *setup, uint thread, struct quad_job *job ) +{ + struct quad_header quad; + + quad.input = job->input; + quad.inout = job->inout; + quad.coef = setup->quad.coef; + quad.posCoef = setup->quad.posCoef; + quad.nr_attrs = setup->quad.nr_attrs; + emit_quad( setup, &quad, thread ); +} + +#define EMIT_QUAD(setup,x,y,mask) do {\ + setup->quad.input.x0 = x;\ + setup->quad.input.y0 = y;\ + setup->quad.inout.mask = mask;\ + add_quad_job( &setup->que, &setup->quad, emit_quad_job );\ + } while (0) + +#else + +#define EMIT_QUAD(setup,x,y,mask) do {\ + setup->quad.input.x0 = x;\ + setup->quad.input.y0 = y;\ + setup->quad.inout.mask = mask;\ + emit_quad( setup, &setup->quad, 0 );\ + } while (0) + +#endif + +/** + * Given an X or Y coordinate, return the block/quad coordinate that it + * belongs to. + */ +static INLINE int block( int x ) +{ + return x & ~1; +} + + +/** + * Render a horizontal span of quads + */ +static void flush_spans( struct setup_context *setup ) +{ + const int xleft0 = setup->span.left[0]; + const int xleft1 = setup->span.left[1]; + const int xright0 = setup->span.right[0]; + const int xright1 = setup->span.right[1]; + int minleft, maxright; + int x; + + switch (setup->span.y_flags) { + case 0x3: + /* both odd and even lines written (both quad rows) */ + minleft = block(MIN2(xleft0, xleft1)); + maxright = block(MAX2(xright0, xright1)); + for (x = minleft; x <= maxright; x += 2) { + /* determine which of the four pixels is inside the span bounds */ + uint mask = 0x0; + if (x >= xleft0 && x < xright0) + mask |= MASK_TOP_LEFT; + if (x >= xleft1 && x < xright1) + mask |= MASK_BOTTOM_LEFT; + if (x+1 >= xleft0 && x+1 < xright0) + mask |= MASK_TOP_RIGHT; + if (x+1 >= xleft1 && x+1 < xright1) + mask |= MASK_BOTTOM_RIGHT; + EMIT_QUAD( setup, x, setup->span.y, mask ); + } + break; + + case 0x1: + /* only even line written (quad top row) */ + minleft = block(xleft0); + maxright = block(xright0); + for (x = minleft; x <= maxright; x += 2) { + uint mask = 0x0; + if (x >= xleft0 && x < xright0) + mask |= MASK_TOP_LEFT; + if (x+1 >= xleft0 && x+1 < xright0) + mask |= MASK_TOP_RIGHT; + EMIT_QUAD( setup, x, setup->span.y, mask ); + } + break; + + case 0x2: + /* only odd line written (quad bottom row) */ + minleft = block(xleft1); + maxright = block(xright1); + for (x = minleft; x <= maxright; x += 2) { + uint mask = 0x0; + if (x >= xleft1 && x < xright1) + mask |= MASK_BOTTOM_LEFT; + if (x+1 >= xleft1 && x+1 < xright1) + mask |= MASK_BOTTOM_RIGHT; + EMIT_QUAD( setup, x, setup->span.y, mask ); + } + break; + + default: + return; + } + + setup->span.y = 0; + setup->span.y_flags = 0; + setup->span.right[0] = 0; + setup->span.right[1] = 0; +} + + +#if DEBUG_VERTS +static void print_vertex(const struct setup_context *setup, + const float (*v)[4]) +{ + int i; + debug_printf(" Vertex: (%p)\n", v); + for (i = 0; i < setup->quad.nr_attrs; i++) { + debug_printf(" %d: %f %f %f %f\n", i, + v[i][0], v[i][1], v[i][2], v[i][3]); + } +} +#endif + +/** + * \return FALSE if coords are inf/nan (cull the tri), TRUE otherwise + */ +static boolean setup_sort_vertices( struct setup_context *setup, + float det, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + setup->vprovoke = v2; + + /* determine bottom to top order of vertices */ + { + float y0 = v0[0][1]; + float y1 = v1[0][1]; + float y2 = v2[0][1]; + if (y0 <= y1) { + if (y1 <= y2) { + /* y0<=y1<=y2 */ + setup->vmin = v0; + setup->vmid = v1; + setup->vmax = v2; + } + else if (y2 <= y0) { + /* y2<=y0<=y1 */ + setup->vmin = v2; + setup->vmid = v0; + setup->vmax = v1; + } + else { + /* y0<=y2<=y1 */ + setup->vmin = v0; + setup->vmid = v2; + setup->vmax = v1; + } + } + else { + if (y0 <= y2) { + /* y1<=y0<=y2 */ + setup->vmin = v1; + setup->vmid = v0; + setup->vmax = v2; + } + else if (y2 <= y1) { + /* y2<=y1<=y0 */ + setup->vmin = v2; + setup->vmid = v1; + setup->vmax = v0; + } + else { + /* y1<=y2<=y0 */ + setup->vmin = v1; + setup->vmid = v2; + setup->vmax = v0; + } + } + } + + setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0]; + setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1]; + setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; + setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; + setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0]; + setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1]; + + /* + * Compute triangle's area. Use 1/area to compute partial + * derivatives of attributes later. + * + * The area will be the same as prim->det, but the sign may be + * different depending on how the vertices get sorted above. + * + * To determine whether the primitive is front or back facing we + * use the prim->det value because its sign is correct. + */ + { + const float area = (setup->emaj.dx * setup->ebot.dy - + setup->ebot.dx * setup->emaj.dy); + + setup->oneoverarea = 1.0f / area; + + /* + debug_printf("%s one-over-area %f area %f det %f\n", + __FUNCTION__, setup->oneoverarea, area, det ); + */ + if (is_inf_or_nan(setup->oneoverarea)) + return FALSE; + } + + /* We need to know if this is a front or back-facing triangle for: + * - the GLSL gl_FrontFacing fragment attribute (bool) + * - two-sided stencil test + */ + setup->quad.input.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); + + return TRUE; +} + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + * The value value comes from vertex[slot][i]. + * The result will be put into setup->coef[slot].a0[i]. + * \param slot which attribute slot + * \param i which component of the slot (0..3) + */ +static void const_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + assert(i <= 3); + + coef->dadx[i] = 0; + coef->dady[i] = 0; + + /* need provoking vertex info! + */ + coef->a0[i] = setup->vprovoke[vertSlot][i]; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void tri_linear_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; + float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; + float a = setup->ebot.dy * majda - botda * setup->emaj.dy; + float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + + assert(i <= 3); + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); + + /* + debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", + slot, "xyzw"[i], + setup->coef[slot].a0[i], + setup->coef[slot].dadx[i], + setup->coef[slot].dady[i]); + */ +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void tri_persp_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + /* premultiply by 1/w (v[0][3] is always W): + */ + float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3]; + float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3]; + float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3]; + float botda = mida - mina; + float majda = maxa - mina; + float a = setup->ebot.dy * majda - botda * setup->emaj.dy; + float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + + /* + debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, + setup->vmin[vertSlot][i], + setup->vmid[vertSlot][i], + setup->vmax[vertSlot][i] + ); + */ + assert(i <= 3); + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (mina - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial, though Y has to be inverted for OpenGL. + * Z and W are copied from posCoef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coeff(struct setup_context *setup, uint slot) +{ + /*X*/ + setup->coef[slot].a0[0] = 0; + setup->coef[slot].dadx[0] = 1.0; + setup->coef[slot].dady[0] = 0.0; + /*Y*/ + if (setup->softpipe->rasterizer->origin_lower_left) { + /* y=0=bottom */ + const int winHeight = setup->softpipe->framebuffer.height; + setup->coef[slot].a0[1] = (float) (winHeight - 1); + setup->coef[slot].dady[1] = -1.0; + } + else { + /* y=0=top */ + setup->coef[slot].a0[1] = 0.0; + setup->coef[slot].dady[1] = 1.0; + } + setup->coef[slot].dadx[1] = 0.0; + /*Z*/ + setup->coef[slot].a0[2] = setup->posCoef.a0[2]; + setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; + setup->coef[slot].dady[2] = setup->posCoef.dady[2]; + /*W*/ + setup->coef[slot].a0[3] = setup->posCoef.a0[3]; + setup->coef[slot].dadx[3] = setup->posCoef.dadx[3]; + setup->coef[slot].dady[3] = setup->posCoef.dady[3]; +} + + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. + */ +static void setup_tri_coefficients( struct setup_context *setup ) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* z and w are done by linear interpolation: + */ + tri_linear_coeff(setup, &setup->posCoef, 0, 2); + tri_linear_coeff(setup, &setup->posCoef, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->attrib[fragSlot].src_index; + uint j; + + switch (vinfo->attrib[fragSlot].interp_mode) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } +} + + + +static void setup_tri_edges( struct setup_context *setup ) +{ + float vmin_x = setup->vmin[0][0] + 0.5f; + float vmid_x = setup->vmid[0][0] + 0.5f; + + float vmin_y = setup->vmin[0][1] - 0.5f; + float vmid_y = setup->vmid[0][1] - 0.5f; + float vmax_y = setup->vmax[0][1] - 0.5f; + + setup->emaj.sy = ceilf(vmin_y); + setup->emaj.lines = (int) ceilf(vmax_y - setup->emaj.sy); + setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; + setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; + + setup->etop.sy = ceilf(vmid_y); + setup->etop.lines = (int) ceilf(vmax_y - setup->etop.sy); + setup->etop.dxdy = setup->etop.dx / setup->etop.dy; + setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; + + setup->ebot.sy = ceilf(vmin_y); + setup->ebot.lines = (int) ceilf(vmid_y - setup->ebot.sy); + setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; + setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; +} + + +/** + * Render the upper or lower half of a triangle. + * Scissoring/cliprect is applied here too. + */ +static void subtriangle( struct setup_context *setup, + struct edge *eleft, + struct edge *eright, + unsigned lines ) +{ + const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + int y, start_y, finish_y; + int sy = (int)eleft->sy; + + assert((int)eleft->sy == (int) eright->sy); + + /* clip top/bottom */ + start_y = sy; + finish_y = sy + lines; + + if (start_y < miny) + start_y = miny; + + if (finish_y > maxy) + finish_y = maxy; + + start_y -= sy; + finish_y -= sy; + + /* + debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); + */ + + for (y = start_y; y < finish_y; y++) { + + /* avoid accumulating adds as floats don't have the precision to + * accurately iterate large triangle edges that way. luckily we + * can just multiply these days. + * + * this is all drowned out by the attribute interpolation anyway. + */ + int left = (int)(eleft->sx + y * eleft->dxdy); + int right = (int)(eright->sx + y * eright->dxdy); + + /* clip left/right */ + if (left < minx) + left = minx; + if (right > maxx) + right = maxx; + + if (left < right) { + int _y = sy + y; + if (block(_y) != setup->span.y) { + flush_spans(setup); + setup->span.y = block(_y); + } + + setup->span.left[_y&1] = left; + setup->span.right[_y&1] = right; + setup->span.y_flags |= 1<<(_y&1); + } + } + + + /* save the values so that emaj can be restarted: + */ + eleft->sx += lines * eleft->dxdy; + eright->sx += lines * eright->dxdy; + eleft->sy += lines; + eright->sy += lines; +} + + +/** + * Recalculate prim's determinant. This is needed as we don't have + * get this information through the vbuf_render interface & we must + * calculate it here. + */ +static float +calc_det( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + return ex * fy - ey * fx; +} + + +/** + * Do setup for triangle rasterization, then render the triangle. + */ +void setup_tri( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + float det; + +#if DEBUG_VERTS + debug_printf("Setup triangle:\n"); + print_vertex(setup, v0); + print_vertex(setup, v1); + print_vertex(setup, v2); +#endif + + if (setup->softpipe->no_rast) + return; + + det = calc_det(v0, v1, v2); + /* + debug_printf("%s\n", __FUNCTION__ ); + */ + +#if DEBUG_FRAGS + setup->numFragsEmitted = 0; + setup->numFragsWritten = 0; +#endif + + if (cull_tri( setup, det )) + return; + + if (!setup_sort_vertices( setup, det, v0, v1, v2 )) + return; + setup_tri_coefficients( setup ); + setup_tri_edges( setup ); + + setup->quad.input.prim = PRIM_TRI; + + setup->span.y = 0; + setup->span.y_flags = 0; + setup->span.right[0] = 0; + setup->span.right[1] = 0; + /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ + + /* init_constant_attribs( setup ); */ + + if (setup->oneoverarea < 0.0) { + /* emaj on left: + */ + subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); + subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); + } + else { + /* emaj on right: + */ + subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); + subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); + } + + flush_spans( setup ); + + WAIT_FOR_COMPLETION(setup); + +#if DEBUG_FRAGS + printf("Tri: %u frags emitted, %u written\n", + setup->numFragsEmitted, + setup->numFragsWritten); +#endif +} + + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a line. + */ +static void +line_linear_coeff(struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; + const float dadx = da * setup->emaj.dx * setup->oneoverarea; + const float dady = da * setup->emaj.dy * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a line. + */ +static void +line_persp_coeff(struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + /* XXX double-check/verify this arithmetic */ + const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; + const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3]; + const float da = a1 - a0; + const float dadx = da * setup->emaj.dx * setup->oneoverarea; + const float dady = da * setup->emaj.dy * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmax are initialized. + */ +static INLINE boolean +setup_line_coefficients(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + float area; + + /* use setup->vmin, vmax to point to vertices */ + setup->vprovoke = v1; + setup->vmin = v0; + setup->vmax = v1; + + setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; + setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; + + /* NOTE: this is not really area but something proportional to it */ + area = setup->emaj.dx * setup->emaj.dx + setup->emaj.dy * setup->emaj.dy; + if (area == 0.0f || is_inf_or_nan(area)) + return FALSE; + setup->oneoverarea = 1.0f / area; + + /* z and w are done by linear interpolation: + */ + line_linear_coeff(setup, &setup->posCoef, 0, 2); + line_linear_coeff(setup, &setup->posCoef, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->attrib[fragSlot].src_index; + uint j; + + switch (vinfo->attrib[fragSlot].interp_mode) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } + return TRUE; +} + + +/** + * Plot a pixel in a line segment. + */ +static INLINE void +plot(struct setup_context *setup, int x, int y) +{ + const int iy = y & 1; + const int ix = x & 1; + const int quadX = x - ix; + const int quadY = y - iy; + const int mask = (1 << ix) << (2 * iy); + + if (quadX != setup->quad.input.x0 || + quadY != setup->quad.input.y0) + { + /* flush prev quad, start new quad */ + + if (setup->quad.input.x0 != -1) + CLIP_EMIT_QUAD(setup); + + setup->quad.input.x0 = quadX; + setup->quad.input.y0 = quadY; + setup->quad.inout.mask = 0x0; + } + + setup->quad.inout.mask |= mask; +} + + +/** + * Do setup for line rasterization, then render the line. + * Single-pixel width, no stipple, etc. We rely on the 'draw' module + * to handle stippling and wide lines. + */ +void +setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + int x0 = (int) v0[0][0]; + int x1 = (int) v1[0][0]; + int y0 = (int) v0[0][1]; + int y1 = (int) v1[0][1]; + int dx = x1 - x0; + int dy = y1 - y0; + int xstep, ystep; + +#if DEBUG_VERTS + debug_printf("Setup line:\n"); + print_vertex(setup, v0); + print_vertex(setup, v1); +#endif + + if (setup->softpipe->no_rast) + return; + + if (dx == 0 && dy == 0) + return; + + if (!setup_line_coefficients(setup, v0, v1)) + return; + + assert(v0[0][0] < 1.0e9); + assert(v0[0][1] < 1.0e9); + assert(v1[0][0] < 1.0e9); + assert(v1[0][1] < 1.0e9); + + if (dx < 0) { + dx = -dx; /* make positive */ + xstep = -1; + } + else { + xstep = 1; + } + + if (dy < 0) { + dy = -dy; /* make positive */ + ystep = -1; + } + else { + ystep = 1; + } + + assert(dx >= 0); + assert(dy >= 0); + + setup->quad.input.x0 = setup->quad.input.y0 = -1; + setup->quad.inout.mask = 0x0; + setup->quad.input.prim = PRIM_LINE; + /* XXX temporary: set coverage to 1.0 so the line appears + * if AA mode happens to be enabled. + */ + setup->quad.input.coverage[0] = + setup->quad.input.coverage[1] = + setup->quad.input.coverage[2] = + setup->quad.input.coverage[3] = 1.0; + + if (dx > dy) { + /*** X-major line ***/ + int i; + const int errorInc = dy + dy; + int error = errorInc - dx; + const int errorDec = error - dx; + + for (i = 0; i < dx; i++) { + plot(setup, x0, y0); + + x0 += xstep; + if (error < 0) { + error += errorInc; + } + else { + error += errorDec; + y0 += ystep; + } + } + } + else { + /*** Y-major line ***/ + int i; + const int errorInc = dx + dx; + int error = errorInc - dy; + const int errorDec = error - dy; + + for (i = 0; i < dy; i++) { + plot(setup, x0, y0); + + y0 += ystep; + if (error < 0) { + error += errorInc; + } + else { + error += errorDec; + x0 += xstep; + } + } + } + + /* draw final quad */ + if (setup->quad.inout.mask) { + CLIP_EMIT_QUAD(setup); + } + + WAIT_FOR_COMPLETION(setup); +} + + +static void +point_persp_coeff(struct setup_context *setup, + const float (*vert)[4], + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + assert(i <= 3); + coef->dadx[i] = 0.0F; + coef->dady[i] = 0.0F; + coef->a0[i] = vert[vertSlot][i] * vert[0][3]; +} + + +/** + * Do setup for point rasterization, then render the point. + * Round or square points... + * XXX could optimize a lot for 1-pixel points. + */ +void +setup_point( struct setup_context *setup, + const float (*v0)[4] ) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const int sizeAttr = setup->softpipe->psize_slot; + const float size + = sizeAttr > 0 ? v0[sizeAttr][0] + : setup->softpipe->rasterizer->point_size; + const float halfSize = 0.5F * size; + const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth; + const float x = v0[0][0]; /* Note: data[0] is always position */ + const float y = v0[0][1]; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + +#if DEBUG_VERTS + debug_printf("Setup point:\n"); + print_vertex(setup, v0); +#endif + + if (softpipe->no_rast) + return; + + /* For points, all interpolants are constant-valued. + * However, for point sprites, we'll need to setup texcoords appropriately. + * XXX: which coefficients are the texcoords??? + * We may do point sprites as textured quads... + * + * KW: We don't know which coefficients are texcoords - ultimately + * the choice of what interpolation mode to use for each attribute + * should be determined by the fragment program, using + * per-attribute declaration statements that include interpolation + * mode as a parameter. So either the fragment program will have + * to be adjusted for pointsprite vs normal point behaviour, or + * otherwise a special interpolation mode will have to be defined + * which matches the required behaviour for point sprites. But - + * the latter is not a feature of normal hardware, and as such + * probably should be ruled out on that basis. + */ + setup->vprovoke = v0; + + /* setup Z, W */ + const_coeff(setup, &setup->posCoef, 0, 2); + const_coeff(setup, &setup->posCoef, 0, 3); + + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->attrib[fragSlot].src_index; + uint j; + + switch (vinfo->attrib[fragSlot].interp_mode) { + case INTERP_CONSTANT: + /* fall-through */ + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + point_persp_coeff(setup, setup->vprovoke, + &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.input.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } + + setup->quad.input.prim = PRIM_POINT; + + if (halfSize <= 0.5 && !round) { + /* special case for 1-pixel points */ + const int ix = ((int) x) & 1; + const int iy = ((int) y) & 1; + setup->quad.input.x0 = (int) x - ix; + setup->quad.input.y0 = (int) y - iy; + setup->quad.inout.mask = (1 << ix) << (2 * iy); + CLIP_EMIT_QUAD(setup); + } + else { + if (round) { + /* rounded points */ + const int ixmin = block((int) (x - halfSize)); + const int ixmax = block((int) (x + halfSize)); + const int iymin = block((int) (y - halfSize)); + const int iymax = block((int) (y + halfSize)); + const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ + const float rmax = halfSize + 0.7071F; + const float rmin2 = MAX2(0.0F, rmin * rmin); + const float rmax2 = rmax * rmax; + const float cscale = 1.0F / (rmax2 - rmin2); + int ix, iy; + + for (iy = iymin; iy <= iymax; iy += 2) { + for (ix = ixmin; ix <= ixmax; ix += 2) { + float dx, dy, dist2, cover; + + setup->quad.inout.mask = 0x0; + + dx = (ix + 0.5f) - x; + dy = (iy + 0.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.input.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_TOP_LEFT; + } + + dx = (ix + 1.5f) - x; + dy = (iy + 0.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.input.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_TOP_RIGHT; + } + + dx = (ix + 0.5f) - x; + dy = (iy + 1.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.input.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_BOTTOM_LEFT; + } + + dx = (ix + 1.5f) - x; + dy = (iy + 1.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.input.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); + setup->quad.inout.mask |= MASK_BOTTOM_RIGHT; + } + + if (setup->quad.inout.mask) { + setup->quad.input.x0 = ix; + setup->quad.input.y0 = iy; + CLIP_EMIT_QUAD(setup); + } + } + } + } + else { + /* square points */ + const int xmin = (int) (x + 0.75 - halfSize); + const int ymin = (int) (y + 0.25 - halfSize); + const int xmax = xmin + (int) size; + const int ymax = ymin + (int) size; + /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ + const int ixmin = block(xmin); + const int ixmax = block(xmax - 1); + const int iymin = block(ymin); + const int iymax = block(ymax - 1); + int ix, iy; + + /* + debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); + */ + for (iy = iymin; iy <= iymax; iy += 2) { + uint rowMask = 0xf; + if (iy < ymin) { + /* above the top edge */ + rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + } + if (iy + 1 >= ymax) { + /* below the bottom edge */ + rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); + } + + for (ix = ixmin; ix <= ixmax; ix += 2) { + uint mask = rowMask; + + if (ix < xmin) { + /* fragment is past left edge of point, turn off left bits */ + mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + } + if (ix + 1 >= xmax) { + /* past the right edge */ + mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + } + + setup->quad.inout.mask = mask; + setup->quad.input.x0 = ix; + setup->quad.input.y0 = iy; + CLIP_EMIT_QUAD(setup); + } + } + } + } + + WAIT_FOR_COMPLETION(setup); +} + +void setup_prepare( struct setup_context *setup ) +{ + struct softpipe_context *sp = setup->softpipe; + unsigned i; + + if (sp->dirty) { + softpipe_update_derived(sp); + } + + /* Mark surfaces as defined now */ + for (i = 0; i < sp->framebuffer.num_cbufs; i++){ + if (sp->framebuffer.cbufs[i]) { + sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + } + } + if (sp->framebuffer.zsbuf) { + sp->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + } + + /* Note: nr_attrs is only used for debugging (vertex printing) */ + setup->quad.nr_attrs = draw_num_vs_outputs(sp->draw); + + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + sp->quad[i].first->begin( sp->quad[i].first ); + } + + if (sp->reduced_api_prim == PIPE_PRIM_TRIANGLES && + sp->rasterizer->fill_cw == PIPE_POLYGON_MODE_FILL && + sp->rasterizer->fill_ccw == PIPE_POLYGON_MODE_FILL) { + /* we'll do culling */ + setup->winding = sp->rasterizer->cull_mode; + } + else { + /* 'draw' will do culling */ + setup->winding = PIPE_WINDING_NONE; + } +} + + + +void setup_destroy_context( struct setup_context *setup ) +{ + FREE( setup ); +} + + +/** + * Create a new primitive setup/render stage. + */ +struct setup_context *setup_create_context( struct softpipe_context *softpipe ) +{ + struct setup_context *setup = CALLOC_STRUCT(setup_context); +#if SP_NUM_QUAD_THREADS > 1 + uint i; +#endif + + setup->softpipe = softpipe; + + setup->quad.coef = setup->coef; + setup->quad.posCoef = &setup->posCoef; + +#if SP_NUM_QUAD_THREADS > 1 + setup->que.first = 0; + setup->que.last = 0; + pipe_mutex_init( setup->que.que_mutex ); + pipe_condvar_init( setup->que.que_notfull_condvar ); + pipe_condvar_init( setup->que.que_notempty_condvar ); + setup->que.jobs_added = 0; + setup->que.jobs_done = 0; + pipe_condvar_init( setup->que.que_done_condvar ); + for (i = 0; i < SP_NUM_QUAD_THREADS; i++) { + setup->threads[i].setup = setup; + setup->threads[i].id = i; + setup->threads[i].handle = pipe_thread_create( quad_thread, &setup->threads[i] ); + } +#endif + + return setup; +} + diff --git a/src/gallium/drivers/softpipe/sp_setup.h b/src/gallium/drivers/softpipe/sp_setup.h new file mode 100644 index 0000000000..d54f334428 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_setup.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#ifndef SP_SETUP_H +#define SP_SETUP_H + +struct setup_context; +struct softpipe_context; + +void +setup_tri( struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ); + +void +setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]); + +void +setup_point( struct setup_context *setup, + const float (*v0)[4] ); + + +struct setup_context *setup_create_context( struct softpipe_context *softpipe ); +void setup_prepare( struct setup_context *setup ); +void setup_destroy_context( struct setup_context *setup ); + +#endif diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h new file mode 100644 index 0000000000..476ef3dc8f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -0,0 +1,206 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_STATE_H +#define SP_STATE_H + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + + +#define SP_NEW_VIEWPORT 0x1 +#define SP_NEW_RASTERIZER 0x2 +#define SP_NEW_FS 0x4 +#define SP_NEW_BLEND 0x8 +#define SP_NEW_CLIP 0x10 +#define SP_NEW_SCISSOR 0x20 +#define SP_NEW_STIPPLE 0x40 +#define SP_NEW_FRAMEBUFFER 0x80 +#define SP_NEW_DEPTH_STENCIL_ALPHA 0x100 +#define SP_NEW_CONSTANTS 0x200 +#define SP_NEW_SAMPLER 0x400 +#define SP_NEW_TEXTURE 0x800 +#define SP_NEW_VERTEX 0x1000 +#define SP_NEW_VS 0x2000 +#define SP_NEW_QUERY 0x4000 + + +struct tgsi_sampler; +struct tgsi_exec_machine; +struct vertex_info; + + +/** + * Subclass of pipe_shader_state (though it doesn't really need to be). + * + * This is starting to look an awful lot like a quad pipeline stage... + */ +struct sp_fragment_shader { + struct pipe_shader_state shader; + + struct tgsi_shader_info info; + + void (*prepare)( const struct sp_fragment_shader *shader, + struct tgsi_exec_machine *machine, + struct tgsi_sampler *samplers); + + /* Run the shader - this interface will get cleaned up in the + * future: + */ + unsigned (*run)( const struct sp_fragment_shader *shader, + struct tgsi_exec_machine *machine, + struct quad_header *quad ); + + + void (*delete)( struct sp_fragment_shader * ); +}; + + +/** Subclass of pipe_shader_state */ +struct sp_vertex_shader { + struct pipe_shader_state shader; /* Note: this field not actually used */ + struct draw_vertex_shader *draw_data; +}; + + + +void * +softpipe_create_blend_state(struct pipe_context *, + const struct pipe_blend_state *); +void softpipe_bind_blend_state(struct pipe_context *, + void *); +void softpipe_delete_blend_state(struct pipe_context *, + void *); + +void * +softpipe_create_sampler_state(struct pipe_context *, + const struct pipe_sampler_state *); +void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); +void softpipe_delete_sampler_state(struct pipe_context *, void *); + +void * +softpipe_create_depth_stencil_state(struct pipe_context *, + const struct pipe_depth_stencil_alpha_state *); +void softpipe_bind_depth_stencil_state(struct pipe_context *, void *); +void softpipe_delete_depth_stencil_state(struct pipe_context *, void *); + +void * +softpipe_create_rasterizer_state(struct pipe_context *, + const struct pipe_rasterizer_state *); +void softpipe_bind_rasterizer_state(struct pipe_context *, void *); +void softpipe_delete_rasterizer_state(struct pipe_context *, void *); + +void softpipe_set_framebuffer_state( struct pipe_context *, + const struct pipe_framebuffer_state * ); + +void softpipe_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ); + +void softpipe_set_clip_state( struct pipe_context *, + const struct pipe_clip_state * ); + +void softpipe_set_constant_buffer(struct pipe_context *, + uint shader, uint index, + const struct pipe_constant_buffer *buf); + +void *softpipe_create_fs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_fs_state(struct pipe_context *, void *); +void softpipe_delete_fs_state(struct pipe_context *, void *); +void *softpipe_create_vs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_vs_state(struct pipe_context *, void *); +void softpipe_delete_vs_state(struct pipe_context *, void *); + +void softpipe_set_polygon_stipple( struct pipe_context *, + const struct pipe_poly_stipple * ); + +void softpipe_set_scissor_state( struct pipe_context *, + const struct pipe_scissor_state * ); + +void softpipe_set_sampler_textures( struct pipe_context *, + unsigned num, + struct pipe_texture ** ); + +void softpipe_set_viewport_state( struct pipe_context *, + const struct pipe_viewport_state * ); + +void softpipe_set_vertex_elements(struct pipe_context *, + unsigned count, + const struct pipe_vertex_element *); + +void softpipe_set_vertex_buffers(struct pipe_context *, + unsigned count, + const struct pipe_vertex_buffer *); + + +void softpipe_update_derived( struct softpipe_context *softpipe ); + + +boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); + +boolean softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); +boolean +softpipe_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count); + +void +softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); + + +void +softpipe_map_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_surfaces(struct softpipe_context *sp); + +void +softpipe_map_texture_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_texture_surfaces(struct softpipe_context *sp); + + +struct vertex_info * +softpipe_get_vertex_info(struct softpipe_context *softpipe); + +struct vertex_info * +softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); + + +#endif diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c new file mode 100644 index 0000000000..384fe559af --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -0,0 +1,98 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_state.h" + + +void * +softpipe_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + return mem_dup(blend, sizeof(*blend)); +} + +void softpipe_bind_blend_state( struct pipe_context *pipe, + void *blend ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->blend = (const struct pipe_blend_state *)blend; + + softpipe->dirty |= SP_NEW_BLEND; +} + +void softpipe_delete_blend_state(struct pipe_context *pipe, + void *blend) +{ + FREE( blend ); +} + + +void softpipe_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->blend_color = *blend_color; + + softpipe->dirty |= SP_NEW_BLEND; +} + + +/** XXX move someday? Or consolidate all these simple state setters + * into one file. + */ + + +void * +softpipe_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + return mem_dup(depth_stencil, sizeof(*depth_stencil)); +} + +void +softpipe_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->depth_stencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + + softpipe->dirty |= SP_NEW_DEPTH_STENCIL_ALPHA; +} + +void +softpipe_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) +{ + FREE( depth ); +} diff --git a/src/gallium/drivers/softpipe/sp_state_clip.c b/src/gallium/drivers/softpipe/sp_state_clip.c new file mode 100644 index 0000000000..4946c776e3 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_clip.c @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ +#include "sp_context.h" +#include "sp_state.h" +#include "draw/draw_context.h" + + +void softpipe_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + /* pass the clip state to the draw module */ + draw_set_clip_state(softpipe->draw, clip); +} + + +void softpipe_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + /* pass the viewport info to the draw module */ + draw_set_viewport_state(softpipe->draw, viewport); + + softpipe->viewport = *viewport; /* struct copy */ + softpipe->dirty |= SP_NEW_VIEWPORT; +} + + +void softpipe_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + draw_flush(softpipe->draw); + + softpipe->scissor = *scissor; /* struct copy */ + softpipe->dirty |= SP_NEW_SCISSOR; +} + + +void softpipe_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + draw_flush(softpipe->draw); + + softpipe->poly_stipple = *stipple; /* struct copy */ + softpipe->dirty |= SP_NEW_STIPPLE; +} diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c new file mode 100644 index 0000000000..6b6a4c3ff3 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -0,0 +1,210 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_private.h" +#include "sp_context.h" +#include "sp_state.h" + + +/** + * Mark the current vertex layout as "invalid". + * We'll validate the vertex layout later, when we start to actually + * render a point or line or tri. + */ +static void +invalidate_vertex_layout(struct softpipe_context *softpipe) +{ + softpipe->vertex_info.num_attribs = 0; +} + + +/** + * The vertex info describes how to convert the post-transformed vertices + * (simple float[][4]) used by the 'draw' module into vertices for + * rasterization. + * + * This function validates the vertex layout and returns a pointer to a + * vertex_info object. + */ +struct vertex_info * +softpipe_get_vertex_info(struct softpipe_context *softpipe) +{ + struct vertex_info *vinfo = &softpipe->vertex_info; + + if (vinfo->num_attribs == 0) { + /* compute vertex layout now */ + const struct sp_fragment_shader *spfs = softpipe->fs; + const enum interp_mode colorInterp + = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; + uint i; + + if (softpipe->vbuf) { + /* if using the post-transform vertex buffer, tell draw_vbuf to + * simply emit the whole post-xform vertex as-is: + */ + struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; + const uint num = draw_num_vs_outputs(softpipe->draw); + uint i; + + /* No longer any need to try and emit draw vertex_header info. + */ + vinfo_vbuf->num_attribs = 0; + for (i = 0; i < num; i++) { + draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); + } + draw_compute_vertex_size(vinfo_vbuf); + } + + /* + * Loop over fragment shader inputs, searching for the matching output + * from the vertex shader. + */ + vinfo->num_attribs = 0; + for (i = 0; i < spfs->info.num_inputs; i++) { + int src; + switch (spfs->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + src = draw_find_vs_output(softpipe->draw, + TGSI_SEMANTIC_POSITION, 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); + break; + + case TGSI_SEMANTIC_COLOR: + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_COLOR, + spfs->info.input_semantic_index[i]); + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + break; + + case TGSI_SEMANTIC_FOG: + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_FOG, 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + break; + + case TGSI_SEMANTIC_GENERIC: + /* this includes texcoords and varying vars */ + src = draw_find_vs_output(softpipe->draw, TGSI_SEMANTIC_GENERIC, + spfs->info.input_semantic_index[i]); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + break; + + default: + assert(0); + } + } + + softpipe->psize_slot = draw_find_vs_output(softpipe->draw, + TGSI_SEMANTIC_PSIZE, 0); + if (softpipe->psize_slot > 0) { + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, + softpipe->psize_slot); + } + + draw_compute_vertex_size(vinfo); + } + + return vinfo; +} + + +/** + * Called from vbuf module. + * + * Note that there's actually two different vertex layouts in softpipe. + * + * The normal one is computed in softpipe_get_vertex_info() above and is + * used by the point/line/tri "setup" code. + * + * The other one (this one) is only used by the vbuf module (which is + * not normally used by default but used in testing). For the vbuf module, + * we basically want to pass-through the draw module's vertex layout as-is. + * When the softpipe vbuf code begins drawing, the normal vertex layout + * will come into play again. + */ +struct vertex_info * +softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe) +{ + (void) softpipe_get_vertex_info(softpipe); + return &softpipe->vertex_info_vbuf; +} + + +/** + * Recompute cliprect from scissor bounds, scissor enable and surface size. + */ +static void +compute_cliprect(struct softpipe_context *sp) +{ + uint surfWidth = sp->framebuffer.width; + uint surfHeight = sp->framebuffer.height; + + if (sp->rasterizer->scissor) { + /* clip to scissor rect */ + sp->cliprect.minx = MAX2(sp->scissor.minx, 0); + sp->cliprect.miny = MAX2(sp->scissor.miny, 0); + sp->cliprect.maxx = MIN2(sp->scissor.maxx, surfWidth); + sp->cliprect.maxy = MIN2(sp->scissor.maxy, surfHeight); + } + else { + /* clip to surface bounds */ + sp->cliprect.minx = 0; + sp->cliprect.miny = 0; + sp->cliprect.maxx = surfWidth; + sp->cliprect.maxy = surfHeight; + } +} + + +/* Hopefully this will remain quite simple, otherwise need to pull in + * something like the state tracker mechanism. + */ +void softpipe_update_derived( struct softpipe_context *softpipe ) +{ + if (softpipe->dirty & (SP_NEW_RASTERIZER | + SP_NEW_FS | + SP_NEW_VS)) + invalidate_vertex_layout( softpipe ); + + if (softpipe->dirty & (SP_NEW_SCISSOR | + SP_NEW_DEPTH_STENCIL_ALPHA | + SP_NEW_FRAMEBUFFER)) + compute_cliprect(softpipe); + + if (softpipe->dirty & (SP_NEW_BLEND | + SP_NEW_DEPTH_STENCIL_ALPHA | + SP_NEW_FRAMEBUFFER | + SP_NEW_RASTERIZER | + SP_NEW_FS | + SP_NEW_QUERY)) + sp_build_quad_pipeline(softpipe); + + softpipe->dirty = 0; +} diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c new file mode 100644 index 0000000000..e5b609cf6c --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -0,0 +1,161 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_fs.h" + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" + + +void * +softpipe_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_fragment_shader *state; + + /* debug */ + if (softpipe->dump_fs) + tgsi_dump(templ->tokens, 0); + + /* codegen */ + state = softpipe_create_fs_llvm( softpipe, templ ); + if (!state) { + state = softpipe_create_fs_sse( softpipe, templ ); + if (!state) { + state = softpipe_create_fs_exec( softpipe, templ ); + } + } + + assert(state); + + /* get/save the summary info for this shader */ + tgsi_scan_shader(templ->tokens, &state->info); + + return state; +} + + +void +softpipe_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->fs = (struct sp_fragment_shader *) fs; + + softpipe->dirty |= SP_NEW_FS; +} + + +void +softpipe_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct sp_fragment_shader *state = fs; + + assert(fs != softpipe_context(pipe)->fs); + + state->delete( state ); +} + + +void * +softpipe_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_vertex_shader *state; + + state = CALLOC_STRUCT(sp_vertex_shader); + if (state == NULL ) { + return NULL; + } + + state->draw_data = draw_create_vertex_shader(softpipe->draw, templ); + if (state->draw_data == NULL) { + FREE( state ); + return NULL; + } + + return state; +} + + +void +softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + softpipe->vs = (const struct sp_vertex_shader *)vs; + + draw_bind_vertex_shader(softpipe->draw, + (softpipe->vs ? softpipe->vs->draw_data : NULL)); + + softpipe->dirty |= SP_NEW_VS; +} + + +void +softpipe_delete_vs_state(struct pipe_context *pipe, void *vs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + struct sp_vertex_shader *state = + (struct sp_vertex_shader *)vs; + + draw_delete_vertex_shader(softpipe->draw, state->draw_data); + FREE( state ); +} + + + +void +softpipe_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + /* note: reference counting */ + winsys_buffer_reference(ws, + &softpipe->constants[shader].buffer, + buf ? buf->buffer : NULL); + softpipe->constants[shader].size = buf ? buf->size : 0; + + softpipe->dirty |= SP_NEW_CONSTANTS; +} diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c new file mode 100644 index 0000000000..87b7219683 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -0,0 +1,62 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_defines.h" +#include "util/u_memory.h" +#include "sp_context.h" +#include "sp_state.h" +#include "draw/draw_context.h" + + + +void * +softpipe_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rast) +{ + return mem_dup(rast, sizeof(*rast)); +} + +void softpipe_bind_rasterizer_state(struct pipe_context *pipe, + void *setup) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + /* pass-through to draw module */ + draw_set_rasterizer_state(softpipe->draw, setup); + + softpipe->rasterizer = (struct pipe_rasterizer_state *)setup; + + softpipe->dirty |= SP_NEW_RASTERIZER; +} + +void softpipe_delete_rasterizer_state(struct pipe_context *pipe, + void *rasterizer) +{ + FREE( rasterizer ); +} + + diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c new file mode 100644 index 0000000000..99a28c0d7e --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: + * Brian Paul + */ + +#include "util/u_memory.h" +#include "pipe/p_inlines.h" + +#include "draw/draw_context.h" + +#include "sp_context.h" +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" +#include "draw/draw_context.h" + + + +void * +softpipe_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + return mem_dup(sampler, sizeof(*sampler)); +} + + +void +softpipe_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + unsigned i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == softpipe->num_samplers && + !memcmp(softpipe->sampler, sampler, num * sizeof(void *))) + return; + + draw_flush(softpipe->draw); + + for (i = 0; i < num; ++i) + softpipe->sampler[i] = sampler[i]; + for (i = num; i < PIPE_MAX_SAMPLERS; ++i) + softpipe->sampler[i] = NULL; + + softpipe->num_samplers = num; + + softpipe->dirty |= SP_NEW_SAMPLER; +} + + +void +softpipe_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == softpipe->num_textures && + !memcmp(softpipe->texture, texture, num * sizeof(struct pipe_texture *))) + return; + + draw_flush(softpipe->draw); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num ? texture[i] : NULL; + + pipe_texture_reference(&softpipe->texture[i], tex); + sp_tile_cache_set_texture(pipe, softpipe->tex_cache[i], tex); + } + + softpipe->num_textures = num; + + softpipe->dirty |= SP_NEW_TEXTURE; +} + + +void +softpipe_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + + diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c new file mode 100644 index 0000000000..ba8c9eece7 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -0,0 +1,108 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ +#include "pipe/p_inlines.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" + + +/** + * XXX this might get moved someday + * Set the framebuffer surface info: color buffers, zbuffer, stencil buffer. + * Here, we flush the old surfaces and update the tile cache to point to the new + * surfaces. + */ +void +softpipe_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct softpipe_context *sp = softpipe_context(pipe); + uint i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + /* check if changing cbuf */ + if (sp->framebuffer.cbufs[i] != fb->cbufs[i]) { + /* flush old */ + sp_flush_tile_cache(sp, sp->cbuf_cache[i]); + + /* assign new */ + sp->framebuffer.cbufs[i] = fb->cbufs[i]; + + /* update cache */ + sp_tile_cache_set_surface(sp->cbuf_cache[i], fb->cbufs[i]); + } + } + + sp->framebuffer.num_cbufs = fb->num_cbufs; + + /* zbuf changing? */ + if (sp->framebuffer.zsbuf != fb->zsbuf) { + /* flush old */ + sp_flush_tile_cache(sp, sp->zsbuf_cache); + + /* assign new */ + sp->framebuffer.zsbuf = fb->zsbuf; + + /* update cache */ + sp_tile_cache_set_surface(sp->zsbuf_cache, fb->zsbuf); + } + +#if 0 + /* XXX combined depth/stencil here */ + + /* sbuf changing? */ + if (sp->framebuffer.sbuf != fb->sbuf) { + /* flush old */ + sp_flush_tile_cache(sp, sp->sbuf_cache_sep); + + /* assign new */ + sp->framebuffer.sbuf = fb->sbuf; + + /* update cache */ + if (fb->sbuf != fb->zbuf) { + /* separate stencil buf */ + sp->sbuf_cache = sp->sbuf_cache_sep; + sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf); + } + else { + /* combined depth/stencil */ + sp->sbuf_cache = sp->zbuf_cache; + sp_tile_cache_set_surface(sp->sbuf_cache, fb->sbuf); + } + } +#endif + + sp->framebuffer.width = fb->width; + sp->framebuffer.height = fb->height; + + sp->dirty |= SP_NEW_FRAMEBUFFER; +} diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c new file mode 100644 index 0000000000..46b6991195 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_surface.h" + +#include "draw/draw_context.h" + + +void +softpipe_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *attribs) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(softpipe->vertex_element, attribs, + count * sizeof(struct pipe_vertex_element)); + softpipe->num_vertex_elements = count; + + softpipe->dirty |= SP_NEW_VERTEX; + + draw_set_vertex_elements(softpipe->draw, count, attribs); +} + + +void +softpipe_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(softpipe->vertex_buffer, buffers, count * sizeof(buffers[0])); + softpipe->num_vertex_buffers = count; + + softpipe->dirty |= SP_NEW_VERTEX; + + draw_set_vertex_buffers(softpipe->draw, count, buffers); +} diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c new file mode 100644 index 0000000000..6ade732698 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_rect.h" +#include "sp_context.h" + + + +void +sp_init_surface_functions(struct softpipe_context *sp) +{ + sp->pipe.surface_copy = util_surface_copy; + sp->pipe.surface_fill = util_surface_fill; +} diff --git a/src/gallium/drivers/softpipe/sp_surface.h b/src/gallium/drivers/softpipe/sp_surface.h new file mode 100644 index 0000000000..22de3ba43f --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_surface.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_SURFACE_H +#define SP_SURFACE_H + + +struct softpipe_context; + + +extern void +sp_init_surface_functions(struct softpipe_context *sp); + + +#endif /* SP_SURFACE_H */ diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c new file mode 100644 index 0000000000..49250ec084 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -0,0 +1,1086 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Texture sampling + * + * Authors: + * Brian Paul + */ + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_surface.h" +#include "sp_tex_sample.h" +#include "sp_tile_cache.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "tgsi/tgsi_exec.h" +#include "util/u_math.h" +#include "util/u_memory.h" + + +/* + * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes + * see 1-pixel bands of improperly weighted linear-filtered textures. + * The tests/texwrap.c demo is a good test. + * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0. + * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x). + */ +#define FRAC(f) ((f) - util_ifloor(f)) + + +/** + * Linear interpolation macro + */ +#define LERP(T, A, B) ( (A) + (T) * ((B) - (A)) ) + + +/** + * Do 2D/biliner interpolation of float values. + * v00, v10, v01 and v11 are typically four texture samples in a square/box. + * a and b are the horizontal and vertical interpolants. + * It's important that this function is inlined when compiled with + * optimization! If we find that's not true on some systems, convert + * to a macro. + */ +static INLINE float +lerp_2d(float a, float b, + float v00, float v10, float v01, float v11) +{ + const float temp0 = LERP(a, v00, v10); + const float temp1 = LERP(a, v01, v11); + return LERP(b, temp0, temp1); +} + + +/** + * If A is a signed integer, A % B doesn't give the right value for A < 0 + * (in terms of texture repeat). Just casting to unsigned fixes that. + */ +#define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B)) + + +/** + * Apply texture coord wrapping mode and return integer texture index. + * \param wrapMode PIPE_TEX_WRAP_x + * \param s the texcoord + * \param size the texture image size + * \return integer texture index + */ +static INLINE int +nearest_texcoord(unsigned wrapMode, float s, unsigned size) +{ + int i; + switch (wrapMode) { + case PIPE_TEX_WRAP_REPEAT: + /* s limited to [0,1) */ + /* i limited to [0,size-1] */ + i = util_ifloor(s * size); + i = REMAINDER(i, size); + return i; + case PIPE_TEX_WRAP_CLAMP: + /* s limited to [0,1] */ + /* i limited to [0,size-1] */ + if (s <= 0.0F) + i = 0; + else if (s >= 1.0F) + i = size - 1; + else + i = util_ifloor(s * size); + return i; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + if (s < min) + i = 0; + else if (s > max) + i = size - 1; + else + i = util_ifloor(s * size); + } + return i; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + { + /* s limited to [min,max] */ + /* i limited to [-1, size] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + if (s <= min) + i = -1; + else if (s >= max) + i = size; + else + i = util_ifloor(s * size); + } + return i; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + { + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + const int flr = util_ifloor(s); + float u; + if (flr & 1) + u = 1.0F - (s - (float) flr); + else + u = s - (float) flr; + if (u < min) + i = 0; + else if (u > max) + i = size - 1; + else + i = util_ifloor(u * size); + } + return i; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + { + /* s limited to [0,1] */ + /* i limited to [0,size-1] */ + const float u = fabsf(s); + if (u <= 0.0F) + i = 0; + else if (u >= 1.0F) + i = size - 1; + else + i = util_ifloor(u * size); + } + return i; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = 1.0F / (2.0F * size); + const float max = 1.0F - min; + const float u = fabsf(s); + if (u < min) + i = 0; + else if (u > max) + i = size - 1; + else + i = util_ifloor(u * size); + } + return i; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + { + /* s limited to [min,max] */ + /* i limited to [0, size-1] */ + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + const float u = fabsf(s); + if (u < min) + i = -1; + else if (u > max) + i = size; + else + i = util_ifloor(u * size); + } + return i; + default: + assert(0); + return 0; + } +} + + +/** + * Used to compute texel locations for linear sampling. + * \param wrapMode PIPE_TEX_WRAP_x + * \param s the texcoord + * \param size the texture image size + * \param i0 returns first texture index + * \param i1 returns second texture index (usually *i0 + 1) + * \param a returns blend factor/weight between texture indexes + */ +static INLINE void +linear_texcoord(unsigned wrapMode, float s, unsigned size, + int *i0, int *i1, float *a) +{ + float u; + switch (wrapMode) { + case PIPE_TEX_WRAP_REPEAT: + u = s * size - 0.5F; + *i0 = REMAINDER(util_ifloor(u), size); + *i1 = REMAINDER(*i0 + 1, size); + break; + case PIPE_TEX_WRAP_CLAMP: + if (s <= 0.0F) + u = 0.0F; + else if (s >= 1.0F) + u = (float) size; + else + u = s * size; + u -= 0.5F; + *i0 = util_ifloor(u); + *i1 = *i0 + 1; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + if (s <= 0.0F) + u = 0.0F; + else if (s >= 1.0F) + u = (float) size; + else + u = s * size; + u -= 0.5F; + *i0 = util_ifloor(u); + *i1 = *i0 + 1; + if (*i0 < 0) + *i0 = 0; + if (*i1 >= (int) size) + *i1 = size - 1; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + { + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + if (s <= min) + u = min * size; + else if (s >= max) + u = max * size; + else + u = s * size; + u -= 0.5F; + *i0 = util_ifloor(u); + *i1 = *i0 + 1; + } + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + { + const int flr = util_ifloor(s); + if (flr & 1) + u = 1.0F - (s - (float) flr); + else + u = s - (float) flr; + u = (u * size) - 0.5F; + *i0 = util_ifloor(u); + *i1 = *i0 + 1; + if (*i0 < 0) + *i0 = 0; + if (*i1 >= (int) size) + *i1 = size - 1; + } + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + u = fabsf(s); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + *i0 = util_ifloor(u); + *i1 = *i0 + 1; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + u = fabsf(s); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + *i0 = util_ifloor(u); + *i1 = *i0 + 1; + if (*i0 < 0) + *i0 = 0; + if (*i1 >= (int) size) + *i1 = size - 1; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + { + const float min = -1.0F / (2.0F * size); + const float max = 1.0F - min; + u = fabsf(s); + if (u <= min) + u = min * size; + else if (u >= max) + u = max * size; + else + u *= size; + u -= 0.5F; + *i0 = util_ifloor(u); + *i1 = *i0 + 1; + } + break; + default: + assert(0); + } + *a = FRAC(u); +} + + +/** + * For RECT textures / unnormalized texcoords + * Only a subset of wrap modes supported. + */ +static INLINE int +nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size) +{ + int i; + switch (wrapMode) { + case PIPE_TEX_WRAP_CLAMP: + i = util_ifloor(s); + return CLAMP(i, 0, (int) size-1); + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + /* fall-through */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) ); + default: + assert(0); + return 0; + } +} + + +/** + * For RECT textures / unnormalized texcoords. + * Only a subset of wrap modes supported. + */ +static INLINE void +linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size, + int *i0, int *i1, float *a) +{ + switch (wrapMode) { + case PIPE_TEX_WRAP_CLAMP: + /* Not exactly what the spec says, but it matches NVIDIA output */ + s = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f); + *i0 = util_ifloor(s); + *i1 = *i0 + 1; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + /* fall-through */ + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + s = CLAMP(s, 0.5F, (float) size - 0.5F); + s -= 0.5F; + *i0 = util_ifloor(s); + *i1 = *i0 + 1; + if (*i1 > (int) size - 1) + *i1 = size - 1; + break; + default: + assert(0); + } + *a = FRAC(s); +} + + +static unsigned +choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +{ + /* + major axis + direction target sc tc ma + ---------- ------------------------------- --- --- --- + +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx + -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx + +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry + -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry + +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz + -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz + */ + const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz); + unsigned face; + float sc, tc, ma; + + if (arx > ary && arx > arz) { + if (rx >= 0.0F) { + face = PIPE_TEX_FACE_POS_X; + sc = -rz; + tc = -ry; + ma = arx; + } + else { + face = PIPE_TEX_FACE_NEG_X; + sc = rz; + tc = -ry; + ma = arx; + } + } + else if (ary > arx && ary > arz) { + if (ry >= 0.0F) { + face = PIPE_TEX_FACE_POS_Y; + sc = rx; + tc = rz; + ma = ary; + } + else { + face = PIPE_TEX_FACE_NEG_Y; + sc = rx; + tc = -rz; + ma = ary; + } + } + else { + if (rz > 0.0F) { + face = PIPE_TEX_FACE_POS_Z; + sc = rx; + tc = -ry; + ma = arz; + } + else { + face = PIPE_TEX_FACE_NEG_Z; + sc = -rx; + tc = -ry; + ma = arz; + } + } + + *newS = ( sc / ma + 1.0F ) * 0.5F; + *newT = ( tc / ma + 1.0F ) * 0.5F; + + return face; +} + + +/** + * Examine the quad's texture coordinates to compute the partial + * derivatives w.r.t X and Y, then compute lambda (level of detail). + * + * This is only done for fragment shaders, not vertex shaders. + */ +static float +compute_lambda(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias) +{ + float rho, lambda; + + assert(sampler->state->normalized_coords); + + assert(s); + { + float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]; + float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; + dsdx = fabsf(dsdx); + dsdy = fabsf(dsdy); + rho = MAX2(dsdx, dsdy) * sampler->texture->width[0]; + } + if (t) { + float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; + float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]; + float max; + dtdx = fabsf(dtdx); + dtdy = fabsf(dtdy); + max = MAX2(dtdx, dtdy) * sampler->texture->height[0]; + rho = MAX2(rho, max); + } + if (p) { + float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]; + float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]; + float max; + dpdx = fabsf(dpdx); + dpdy = fabsf(dpdy); + max = MAX2(dpdx, dpdy) * sampler->texture->depth[0]; + rho = MAX2(rho, max); + } + + lambda = util_fast_log2(rho); + lambda += lodbias + sampler->state->lod_bias; + lambda = CLAMP(lambda, sampler->state->min_lod, sampler->state->max_lod); + + return lambda; +} + + +/** + * Do several things here: + * 1. Compute lambda from the texcoords, if needed + * 2. Determine if we're minifying or magnifying + * 3. If minifying, choose mipmap levels + * 4. Return image filter to use within mipmap images + */ +static void +choose_mipmap_levels(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + unsigned *level0, unsigned *level1, float *levelBlend, + unsigned *imgFilter) +{ + if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + /* no mipmap selection needed */ + *level0 = *level1 = CLAMP((int) sampler->state->min_lod, + 0, (int) sampler->texture->last_level); + + if (sampler->state->min_img_filter != sampler->state->mag_img_filter) { + /* non-mipmapped texture, but still need to determine if doing + * minification or magnification. + */ + float lambda = compute_lambda(sampler, s, t, p, lodbias); + if (lambda <= 0.0) { + *imgFilter = sampler->state->mag_img_filter; + } + else { + *imgFilter = sampler->state->min_img_filter; + } + } + else { + *imgFilter = sampler->state->mag_img_filter; + } + } + else { + float lambda; + + if (1) + /* fragment shader */ + lambda = compute_lambda(sampler, s, t, p, lodbias); + else + /* vertex shader */ + lambda = lodbias; /* not really a bias, but absolute LOD */ + + if (lambda <= 0.0) { /* XXX threshold depends on the filter */ + /* magnifying */ + *imgFilter = sampler->state->mag_img_filter; + *level0 = *level1 = 0; + } + else { + /* minifying */ + *imgFilter = sampler->state->min_img_filter; + + /* choose mipmap level(s) and compute the blend factor between them */ + if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + /* Nearest mipmap level */ + const int lvl = (int) (lambda + 0.5); + *level0 = + *level1 = CLAMP(lvl, 0, (int) sampler->texture->last_level); + } + else { + /* Linear interpolation between mipmap levels */ + const int lvl = (int) lambda; + *level0 = CLAMP(lvl, 0, (int) sampler->texture->last_level); + *level1 = CLAMP(lvl + 1, 0, (int) sampler->texture->last_level); + *levelBlend = FRAC(lambda); /* blending weight between levels */ + } + } + } +} + + +/** + * Get a texel from a texture, using the texture tile cache. + * + * \param face the cube face in 0..5 + * \param level the mipmap level + * \param x the x coord of texel within 2D image + * \param y the y coord of texel within 2D image + * \param z which slice of a 3D texture + * \param rgba the quad to put the texel/color into + * \param j which element of the rgba quad to write to + * + * XXX maybe move this into sp_tile_cache.c and merge with the + * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... + */ +static void +get_texel(struct tgsi_sampler *sampler, + unsigned face, unsigned level, int x, int y, int z, + float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) +{ + if (x < 0 || x >= (int) sampler->texture->width[level] || + y < 0 || y >= (int) sampler->texture->height[level] || + z < 0 || z >= (int) sampler->texture->depth[level]) { + rgba[0][j] = sampler->state->border_color[0]; + rgba[1][j] = sampler->state->border_color[1]; + rgba[2][j] = sampler->state->border_color[2]; + rgba[3][j] = sampler->state->border_color[3]; + } + else { + const int tx = x % TILE_SIZE; + const int ty = y % TILE_SIZE; + const struct softpipe_cached_tile *tile + = sp_get_cached_tile_tex(sampler->pipe, sampler->cache, + x, y, z, face, level); + rgba[0][j] = tile->data.color[ty][tx][0]; + rgba[1][j] = tile->data.color[ty][tx][1]; + rgba[2][j] = tile->data.color[ty][tx][2]; + rgba[3][j] = tile->data.color[ty][tx][3]; + if (0) + { + debug_printf("Get texel %f %f %f %f from %s\n", + rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], + pf_name(sampler->texture->format)); + } + } +} + + +/** + * Compare texcoord 'p' (aka R) against texture value 'rgba[0]' + * When we sampled the depth texture, the depth value was put into all + * RGBA channels. We look at the red channel here. + */ +static INLINE void +shadow_compare(uint compare_func, + float rgba[NUM_CHANNELS][QUAD_SIZE], + const float p[QUAD_SIZE], + uint j) +{ + int k; + switch (compare_func) { + case PIPE_FUNC_LESS: + k = p[j] < rgba[0][j]; + break; + case PIPE_FUNC_LEQUAL: + k = p[j] <= rgba[0][j]; + break; + case PIPE_FUNC_GREATER: + k = p[j] > rgba[0][j]; + break; + case PIPE_FUNC_GEQUAL: + k = p[j] >= rgba[0][j]; + break; + case PIPE_FUNC_EQUAL: + k = p[j] == rgba[0][j]; + break; + case PIPE_FUNC_NOTEQUAL: + k = p[j] != rgba[0][j]; + break; + case PIPE_FUNC_ALWAYS: + k = 1; + break; + case PIPE_FUNC_NEVER: + k = 0; + break; + default: + k = 0; + assert(0); + break; + } + + rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k; +} + + +/** + * Common code for sampling 1D/2D/cube textures. + * Could probably extend for 3D... + */ +static void +sp_get_samples_2d_common(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE], + const unsigned faces[4]) +{ + const uint compare_func = sampler->state->compare_func; + unsigned level0, level1, j, imgFilter; + int width, height; + float levelBlend; + + choose_mipmap_levels(sampler, s, t, p, lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + assert(sampler->state->normalized_coords); + + width = sampler->texture->width[level0]; + height = sampler->texture->height[level0]; + + assert(width > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + for (j = 0; j < QUAD_SIZE; j++) { + int x = nearest_texcoord(sampler->state->wrap_s, s[j], width); + int y = nearest_texcoord(sampler->state->wrap_t, t[j], height); + get_texel(sampler, faces[j], level0, x, y, 0, rgba, j); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, rgba, p, j); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + unsigned c; + x = x / 2; + y = y / 2; + get_texel(sampler, faces[j], level1, x, y, 0, rgba2, j); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + shadow_compare(compare_func, rgba2, p, j); + } + + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4], a, b; + int x0, y0, x1, y1, c; + linear_texcoord(sampler->state->wrap_s, s[j], width, &x0, &x1, &a); + linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &b); + get_texel(sampler, faces[j], level0, x0, y0, 0, tx, 0); + get_texel(sampler, faces[j], level0, x1, y0, 0, tx, 1); + get_texel(sampler, faces[j], level0, x0, y1, 0, tx, 2); + get_texel(sampler, faces[j], level0, x1, y1, 0, tx, 3); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, tx, p, 0); + shadow_compare(compare_func, tx, p, 1); + shadow_compare(compare_func, tx, p, 2); + shadow_compare(compare_func, tx, p, 3); + } + + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(a, b, tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + x0 = x0 / 2; + y0 = y0 / 2; + x1 = x1 / 2; + y1 = y1 / 2; + get_texel(sampler, faces[j], level1, x0, y0, 0, tx, 0); + get_texel(sampler, faces[j], level1, x1, y0, 0, tx, 1); + get_texel(sampler, faces[j], level1, x0, y1, 0, tx, 2); + get_texel(sampler, faces[j], level1, x1, y1, 0, tx, 3); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + shadow_compare(compare_func, tx, p, 0); + shadow_compare(compare_func, tx, p, 1); + shadow_compare(compare_func, tx, p, 2); + shadow_compare(compare_func, tx, p, 3); + } + + for (c = 0; c < 4; c++) { + rgba2[c][j] = lerp_2d(a, b, + tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + break; + default: + assert(0); + } +} + + +static void +sp_get_samples_1d(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + static const unsigned faces[4] = {0, 0, 0, 0}; + static const float tzero[4] = {0, 0, 0, 0}; + sp_get_samples_2d_common(sampler, s, tzero, NULL, lodbias, rgba, faces); +} + + +static void +sp_get_samples_2d(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + static const unsigned faces[4] = {0, 0, 0, 0}; + sp_get_samples_2d_common(sampler, s, t, p, lodbias, rgba, faces); +} + + +static void +sp_get_samples_3d(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + /* get/map pipe_surfaces corresponding to 3D tex slices */ + unsigned level0, level1, j, imgFilter; + int width, height, depth; + float levelBlend; + const uint face = 0; + + choose_mipmap_levels(sampler, s, t, p, lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + assert(sampler->state->normalized_coords); + + width = sampler->texture->width[level0]; + height = sampler->texture->height[level0]; + depth = sampler->texture->depth[level0]; + + assert(width > 0); + assert(height > 0); + assert(depth > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + for (j = 0; j < QUAD_SIZE; j++) { + int x = nearest_texcoord(sampler->state->wrap_s, s[j], width); + int y = nearest_texcoord(sampler->state->wrap_t, t[j], height); + int z = nearest_texcoord(sampler->state->wrap_r, p[j], depth); + get_texel(sampler, face, level0, x, y, z, rgba, j); + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + unsigned c; + x /= 2; + y /= 2; + z /= 2; + get_texel(sampler, face, level1, x, y, z, rgba2, j); + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = LERP(levelBlend, rgba2[c][j], rgba[c][j]); + } + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + for (j = 0; j < QUAD_SIZE; j++) { + float texel0[4][4], texel1[4][4]; + float xw, yw, zw; /* interpolation weights */ + int x0, x1, y0, y1, z0, z1, c; + linear_texcoord(sampler->state->wrap_s, s[j], width, &x0, &x1, &xw); + linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &yw); + linear_texcoord(sampler->state->wrap_r, p[j], depth, &z0, &z1, &zw); + get_texel(sampler, face, level0, x0, y0, z0, texel0, 0); + get_texel(sampler, face, level0, x1, y0, z0, texel0, 1); + get_texel(sampler, face, level0, x0, y1, z0, texel0, 2); + get_texel(sampler, face, level0, x1, y1, z0, texel0, 3); + get_texel(sampler, face, level0, x0, y0, z1, texel1, 0); + get_texel(sampler, face, level0, x1, y0, z1, texel1, 1); + get_texel(sampler, face, level0, x0, y1, z1, texel1, 2); + get_texel(sampler, face, level0, x1, y1, z1, texel1, 3); + + /* 3D lerp */ + for (c = 0; c < 4; c++) { + float ctemp0[4][4], ctemp1[4][4]; + ctemp0[c][j] = lerp_2d(xw, yw, + texel0[c][0], texel0[c][1], + texel0[c][2], texel0[c][3]); + ctemp1[c][j] = lerp_2d(xw, yw, + texel1[c][0], texel1[c][1], + texel1[c][2], texel1[c][3]); + rgba[c][j] = LERP(zw, ctemp0[c][j], ctemp1[c][j]); + } + + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + x0 /= 2; + y0 /= 2; + z0 /= 2; + x1 /= 2; + y1 /= 2; + z1 /= 2; + get_texel(sampler, face, level1, x0, y0, z0, texel0, 0); + get_texel(sampler, face, level1, x1, y0, z0, texel0, 1); + get_texel(sampler, face, level1, x0, y1, z0, texel0, 2); + get_texel(sampler, face, level1, x1, y1, z0, texel0, 3); + get_texel(sampler, face, level1, x0, y0, z1, texel1, 0); + get_texel(sampler, face, level1, x1, y0, z1, texel1, 1); + get_texel(sampler, face, level1, x0, y1, z1, texel1, 2); + get_texel(sampler, face, level1, x1, y1, z1, texel1, 3); + + /* 3D lerp */ + for (c = 0; c < 4; c++) { + float ctemp0[4][4], ctemp1[4][4]; + ctemp0[c][j] = lerp_2d(xw, yw, + texel0[c][0], texel0[c][1], + texel0[c][2], texel0[c][3]); + ctemp1[c][j] = lerp_2d(xw, yw, + texel1[c][0], texel1[c][1], + texel1[c][2], texel1[c][3]); + rgba2[c][j] = LERP(zw, ctemp0[c][j], ctemp1[c][j]); + } + + /* blend mipmap levels */ + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]); + } + } + } + break; + default: + assert(0); + } +} + + +static void +sp_get_samples_cube(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + unsigned faces[QUAD_SIZE], j; + float ssss[4], tttt[4]; + for (j = 0; j < QUAD_SIZE; j++) { + faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); + } + sp_get_samples_2d_common(sampler, ssss, tttt, NULL, lodbias, rgba, faces); +} + + +static void +sp_get_samples_rect(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + //sp_get_samples_2d_common(sampler, s, t, p, lodbias, rgba, faces); + static const uint face = 0; + const uint compare_func = sampler->state->compare_func; + unsigned level0, level1, j, imgFilter; + int width, height; + float levelBlend; + + choose_mipmap_levels(sampler, s, t, p, lodbias, + &level0, &level1, &levelBlend, &imgFilter); + + /* texture RECTS cannot be mipmapped */ + assert(level0 == level1); + + width = sampler->texture->width[level0]; + height = sampler->texture->height[level0]; + + assert(width > 0); + + switch (imgFilter) { + case PIPE_TEX_FILTER_NEAREST: + for (j = 0; j < QUAD_SIZE; j++) { + int x = nearest_texcoord_unnorm(sampler->state->wrap_s, s[j], width); + int y = nearest_texcoord_unnorm(sampler->state->wrap_t, t[j], height); + get_texel(sampler, face, level0, x, y, 0, rgba, j); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, rgba, p, j); + } + } + break; + case PIPE_TEX_FILTER_LINEAR: + case PIPE_TEX_FILTER_ANISO: + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4], a, b; + int x0, y0, x1, y1, c; + linear_texcoord_unnorm(sampler->state->wrap_s, s[j], width, &x0, &x1, &a); + linear_texcoord_unnorm(sampler->state->wrap_t, t[j], height, &y0, &y1, &b); + get_texel(sampler, face, level0, x0, y0, 0, tx, 0); + get_texel(sampler, face, level0, x1, y0, 0, tx, 1); + get_texel(sampler, face, level0, x0, y1, 0, tx, 2); + get_texel(sampler, face, level0, x1, y1, 0, tx, 3); + if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, tx, p, 0); + shadow_compare(compare_func, tx, p, 1); + shadow_compare(compare_func, tx, p, 2); + shadow_compare(compare_func, tx, p, 3); + } + + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(a, b, tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + } + break; + default: + assert(0); + } +} + + + + +/** + * Called via tgsi_sampler::get_samples() + * Use the sampler's state setting to get a filtered RGBA value + * from the sampler's texture. + * + * XXX we can implement many versions of this function, each + * tightly coded for a specific combination of sampler state + * (nearest + repeat), (bilinear mipmap + clamp), etc. + * + * The update_samplers() function in st_atom_sampler.c could create + * a new tgsi_sampler object for each state combo it finds.... + */ +void +sp_get_samples(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + if (!sampler->texture) + return; + + switch (sampler->texture->target) { + case PIPE_TEXTURE_1D: + assert(sampler->state->normalized_coords); + sp_get_samples_1d(sampler, s, t, p, lodbias, rgba); + break; + case PIPE_TEXTURE_2D: + if (sampler->state->normalized_coords) + sp_get_samples_2d(sampler, s, t, p, lodbias, rgba); + else + sp_get_samples_rect(sampler, s, t, p, lodbias, rgba); + break; + case PIPE_TEXTURE_3D: + assert(sampler->state->normalized_coords); + sp_get_samples_3d(sampler, s, t, p, lodbias, rgba); + break; + case PIPE_TEXTURE_CUBE: + assert(sampler->state->normalized_coords); + sp_get_samples_cube(sampler, s, t, p, lodbias, rgba); + break; + default: + assert(0); + } + +#if 0 /* DEBUG */ + { + int i; + printf("Sampled at %f, %f, %f:\n", s[0], t[0], p[0]); + for (i = 0; i < 4; i++) { + printf("Frag %d: %f %f %f %f\n", i, + rgba[0][i], + rgba[1][i], + rgba[2][i], + rgba[3][i]); + } + } +#endif +} + diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h new file mode 100644 index 0000000000..404bfd0c36 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -0,0 +1,17 @@ +#ifndef SP_TEX_SAMPLE_H +#define SP_TEX_SAMPLE_H + + +struct tgsi_sampler; + + +extern void +sp_get_samples(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + + +#endif /* SP_TEX_SAMPLE_H */ diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c new file mode 100644 index 0000000000..cb48035771 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -0,0 +1,356 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Michel Dänzer <michel@tungstengraphics.com> + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" +#include "sp_screen.h" + + +/* Simple, maximally packed layout. + */ + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +/* Conventional allocation path for non-display textures: + */ +static boolean +softpipe_texture_layout(struct pipe_screen *screen, + struct softpipe_texture * spt) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_texture *pt = &spt->base; + unsigned level; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + + unsigned buffer_size = 0; + + for (level = 0; level <= pt->last_level; level++) { + pt->width[level] = width; + pt->height[level] = height; + pt->depth[level] = depth; + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); + spt->stride[level] = pt->nblocksx[level]*pt->block.size; + + spt->level_offset[level] = buffer_size; + + buffer_size += (pt->nblocksy[level] * + ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * + spt->stride[level]); + + width = minify(width); + height = minify(height); + depth = minify(depth); + } + + spt->buffer = ws->buffer_create(ws, 32, + PIPE_BUFFER_USAGE_PIXEL, + buffer_size); + + return spt->buffer != NULL; +} + + + +/* Hack it up to use the old winsys->surface_alloc_storage() + * method for now: + */ +static boolean +softpipe_displaytarget_layout(struct pipe_screen *screen, + struct softpipe_texture * spt) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface surf; + unsigned flags = (PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); + + + memset(&surf, 0, sizeof(surf)); + + ws->surface_alloc_storage( ws, + &surf, + spt->base.width[0], + spt->base.height[0], + spt->base.format, + flags, + spt->base.tex_usage); + + /* Now extract the goodies: + */ + spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); + spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); + spt->stride[0] = surf.stride; + spt->buffer = surf.buffer; + + return spt->buffer != NULL; +} + + + + + +static struct pipe_texture * +softpipe_texture_create(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); + if (!spt) + return NULL; + + spt->base = *templat; + spt->base.refcount = 1; + spt->base.screen = screen; + + if (spt->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) { + if (!softpipe_displaytarget_layout(screen, spt)) + goto fail; + } + else { + if (!softpipe_texture_layout(screen, spt)) + goto fail; + } + + assert(spt->base.refcount == 1); + return &spt->base; + + fail: + FREE(spt); + return NULL; +} + + +static struct pipe_texture * +softpipe_texture_blanket(struct pipe_screen * screen, + const struct pipe_texture *base, + const unsigned *stride, + struct pipe_buffer *buffer) +{ + struct softpipe_texture *spt; + assert(screen); + + /* Only supports one type */ + if (base->target != PIPE_TEXTURE_2D || + base->last_level != 0 || + base->depth[0] != 1) { + return NULL; + } + + spt = CALLOC_STRUCT(softpipe_texture); + if (!spt) + return NULL; + + spt->base = *base; + spt->base.refcount = 1; + spt->base.screen = screen; + spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); + spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); + spt->stride[0] = stride[0]; + + pipe_buffer_reference(screen, &spt->buffer, buffer); + + return &spt->base; +} + + +static void +softpipe_texture_release(struct pipe_screen *screen, + struct pipe_texture **pt) +{ + if (!*pt) + return; + + if (--(*pt)->refcount <= 0) { + struct softpipe_texture *spt = softpipe_texture(*pt); + + pipe_buffer_reference(screen, &spt->buffer, NULL); + FREE(spt); + } + *pt = NULL; +} + + +static struct pipe_surface * +softpipe_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned usage) +{ + struct pipe_winsys *ws = screen->winsys; + struct softpipe_texture *spt = softpipe_texture(pt); + struct pipe_surface *ps; + + assert(level <= pt->last_level); + + ps = ws->surface_alloc(ws); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + pipe_buffer_reference(screen, &ps->buffer, spt->buffer); + ps->format = pt->format; + ps->block = pt->block; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = spt->stride[level]; + ps->offset = spt->level_offset[level]; + ps->usage = usage; + + /* Because we are softpipe, anything that the state tracker + * thought was going to be done with the GPU will actually get + * done with the CPU. Let's adjust the flags to take that into + * account. + */ + if (ps->usage & PIPE_BUFFER_USAGE_GPU_WRITE) { + /* GPU_WRITE means "render" and that can involve reads (blending) */ + ps->usage |= PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_CPU_READ; + } + + if (ps->usage & PIPE_BUFFER_USAGE_GPU_READ) + ps->usage |= PIPE_BUFFER_USAGE_CPU_READ; + + if (ps->usage & (PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_WRITE)) { + /* Mark the surface as dirty. The tile cache will look for this. */ + spt->modified = TRUE; + } + + pipe_texture_reference(&ps->texture, pt); + ps->face = face; + ps->level = level; + ps->zslice = zslice; + + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { + ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * + ps->nblocksy * + ps->stride; + } + else { + assert(face == 0); + assert(zslice == 0); + } + } + return ps; +} + + +static void +softpipe_tex_surface_release(struct pipe_screen *screen, + struct pipe_surface **s) +{ + /* Effectively do the texture_update work here - if texture images + * needed post-processing to put them into hardware layout, this is + * where it would happen. For softpipe, nothing to do. + */ + assert ((*s)->texture); + pipe_texture_reference(&(*s)->texture, NULL); + + screen->winsys->surface_release(screen->winsys, s); +} + + +static void * +softpipe_surface_map( struct pipe_screen *screen, + struct pipe_surface *surface, + unsigned flags ) +{ + ubyte *map; + + if (flags & ~surface->usage) { + assert(0); + return NULL; + } + + map = pipe_buffer_map( screen, surface->buffer, flags ); + if (map == NULL) + return NULL; + + /* May want to different things here depending on read/write nature + * of the map: + */ + if (surface->texture && + (flags & PIPE_BUFFER_USAGE_CPU_WRITE)) + { + /* Do something to notify sharing contexts of a texture change. + * In softpipe, that would mean flushing the texture cache. + */ + softpipe_screen(screen)->timestamp++; + } + + return map + surface->offset; +} + + +static void +softpipe_surface_unmap(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + pipe_buffer_unmap( screen, surface->buffer ); +} + + +void +softpipe_init_texture_funcs(struct softpipe_context *sp) +{ +} + + +void +softpipe_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = softpipe_texture_create; + screen->texture_blanket = softpipe_texture_blanket; + screen->texture_release = softpipe_texture_release; + + screen->get_tex_surface = softpipe_get_tex_surface; + screen->tex_surface_release = softpipe_tex_surface_release; + + screen->surface_map = softpipe_surface_map; + screen->surface_unmap = softpipe_surface_unmap; +} diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h new file mode 100644 index 0000000000..bf437a7c61 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -0,0 +1,70 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_TEXTURE_H +#define SP_TEXTURE_H + + +#include "pipe/p_state.h" + + +struct pipe_context; +struct pipe_screen; +struct softpipe_context; + + +struct softpipe_texture +{ + struct pipe_texture base; + + unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; + + boolean modified; +}; + + +/** cast wrapper */ +static INLINE struct softpipe_texture * +softpipe_texture(struct pipe_texture *pt) +{ + return (struct softpipe_texture *) pt; +} + + +extern void +softpipe_init_texture_funcs( struct softpipe_context *softpipe ); + +extern void +softpipe_init_screen_texture_funcs(struct pipe_screen *screen); + + +#endif /* SP_TEXTURE */ diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c new file mode 100644 index 0000000000..b50c984513 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -0,0 +1,614 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Framebuffer/surface tile caching. + * + * Author: + * Brian Paul + */ + +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "util/u_tile.h" +#include "sp_context.h" +#include "sp_surface.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" + +#define NUM_ENTRIES 32 + + +/** XXX move these */ +#define MAX_WIDTH 2048 +#define MAX_HEIGHT 2048 + + +struct softpipe_tile_cache +{ + struct pipe_screen *screen; + struct pipe_surface *surface; /**< the surface we're caching */ + void *surface_map; + struct pipe_texture *texture; /**< if caching a texture */ + struct softpipe_cached_tile entries[NUM_ENTRIES]; + uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; + float clear_color[4]; + uint clear_val; + boolean depth_stencil; /** Is the surface a depth/stencil format? */ + + struct pipe_surface *tex_surf; + void *tex_surf_map; + int tex_face, tex_level, tex_z; + + struct softpipe_cached_tile tile; /**< scratch tile for clears */ +}; + + +/** + * Return the position in the cache for the tile that contains win pos (x,y). + * We currently use a direct mapped cache so this is like a hack key. + * At some point we should investige something more sophisticated, like + * a LRU replacement policy. + */ +#define CACHE_POS(x, y) \ + (((x) / TILE_SIZE + ((y) / TILE_SIZE) * 5) % NUM_ENTRIES) + + + +/** + * Is the tile at (x,y) in cleared state? + */ +static INLINE uint +is_clear_flag_set(const uint *bitvec, int x, int y) +{ + int pos, bit; + x /= TILE_SIZE; + y /= TILE_SIZE; + pos = y * (MAX_WIDTH / TILE_SIZE) + x; + assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32); + bit = bitvec[pos / 32] & (1 << (pos & 31)); + return bit; +} + + +/** + * Mark the tile at (x,y) as not cleared. + */ +static INLINE void +clear_clear_flag(uint *bitvec, int x, int y) +{ + int pos; + x /= TILE_SIZE; + y /= TILE_SIZE; + pos = y * (MAX_WIDTH / TILE_SIZE) + x; + assert(pos / 32 < (MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32); + bitvec[pos / 32] &= ~(1 << (pos & 31)); +} + + +struct softpipe_tile_cache * +sp_create_tile_cache( struct pipe_screen *screen ) +{ + struct softpipe_tile_cache *tc; + uint pos; + + tc = CALLOC_STRUCT( softpipe_tile_cache ); + if (tc) { + tc->screen = screen; + for (pos = 0; pos < NUM_ENTRIES; pos++) { + tc->entries[pos].x = + tc->entries[pos].y = -1; + } + } + return tc; +} + + +void +sp_destroy_tile_cache(struct softpipe_tile_cache *tc) +{ + uint pos; + + for (pos = 0; pos < NUM_ENTRIES; pos++) { + /*assert(tc->entries[pos].x < 0);*/ + } + if (tc->surface) { + pipe_surface_reference(&tc->surface, NULL); + } + if (tc->tex_surf) { + pipe_surface_reference(&tc->tex_surf, NULL); + } + + FREE( tc ); +} + + +/** + * Specify the surface to cache. + */ +void +sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, + struct pipe_surface *ps) +{ + assert(!tc->texture); + + if (tc->surface_map) { + tc->screen->surface_unmap(tc->screen, tc->surface); + tc->surface_map = NULL; + } + + pipe_surface_reference(&tc->surface, ps); + + if (tc->surface) { + if (tc->surface_map) /* XXX: this is always NULL!? */ + tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface, + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); + + tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM || + ps->format == PIPE_FORMAT_X8Z24_UNORM || + ps->format == PIPE_FORMAT_Z24S8_UNORM || + ps->format == PIPE_FORMAT_Z24X8_UNORM || + ps->format == PIPE_FORMAT_Z16_UNORM || + ps->format == PIPE_FORMAT_Z32_UNORM || + ps->format == PIPE_FORMAT_S8_UNORM); + } +} + + +/** + * Return the surface being cached. + */ +struct pipe_surface * +sp_tile_cache_get_surface(struct softpipe_tile_cache *tc) +{ + return tc->surface; +} + + +void +sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc) +{ + if (tc->surface && !tc->surface_map) + tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface, + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_CPU_READ); + + if (tc->tex_surf && !tc->tex_surf_map) + tc->tex_surf_map = tc->screen->surface_map(tc->screen, tc->tex_surf, + PIPE_BUFFER_USAGE_CPU_READ); +} + + +void +sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc) +{ + if (tc->surface_map) { + tc->screen->surface_unmap(tc->screen, tc->surface); + tc->surface_map = NULL; + } + + if (tc->tex_surf_map) { + tc->screen->surface_unmap(tc->screen, tc->tex_surf); + tc->tex_surf_map = NULL; + } +} + + +/** + * Specify the texture to cache. + */ +void +sp_tile_cache_set_texture(struct pipe_context *pipe, + struct softpipe_tile_cache *tc, + struct pipe_texture *texture) +{ + uint i; + + assert(!tc->surface); + + pipe_texture_reference(&tc->texture, texture); + + if (tc->tex_surf_map) { + tc->screen->surface_unmap(tc->screen, tc->tex_surf); + tc->tex_surf_map = NULL; + } + pipe_surface_reference(&tc->tex_surf, NULL); + + /* mark as entries as invalid/empty */ + /* XXX we should try to avoid this when the teximage hasn't changed */ + for (i = 0; i < NUM_ENTRIES; i++) { + tc->entries[i].x = -1; + } + + tc->tex_face = -1; /* any invalid value here */ +} + + +/** + * Set pixels in a tile to the given clear color/value, float. + */ +static void +clear_tile_rgba(struct softpipe_cached_tile *tile, + enum pipe_format format, + const float clear_value[4]) +{ + if (clear_value[0] == 0.0 && + clear_value[1] == 0.0 && + clear_value[2] == 0.0 && + clear_value[3] == 0.0) { + memset(tile->data.color, 0, sizeof(tile->data.color)); + } + else { + uint i, j; + for (i = 0; i < TILE_SIZE; i++) { + for (j = 0; j < TILE_SIZE; j++) { + tile->data.color[i][j][0] = clear_value[0]; + tile->data.color[i][j][1] = clear_value[1]; + tile->data.color[i][j][2] = clear_value[2]; + tile->data.color[i][j][3] = clear_value[3]; + } + } + } +} + + +/** + * Set a tile to a solid value/color. + */ +static void +clear_tile(struct softpipe_cached_tile *tile, + enum pipe_format format, + uint clear_value) +{ + uint i, j; + + switch (pf_get_size(format)) { + case 1: + memset(tile->data.any, 0, TILE_SIZE * TILE_SIZE); + break; + case 2: + if (clear_value == 0) { + memset(tile->data.any, 0, 2 * TILE_SIZE * TILE_SIZE); + } + else { + for (i = 0; i < TILE_SIZE; i++) { + for (j = 0; j < TILE_SIZE; j++) { + tile->data.depth16[i][j] = (ushort) clear_value; + } + } + } + break; + case 4: + if (clear_value == 0) { + memset(tile->data.any, 0, 4 * TILE_SIZE * TILE_SIZE); + } + else { + for (i = 0; i < TILE_SIZE; i++) { + for (j = 0; j < TILE_SIZE; j++) { + tile->data.color32[i][j] = clear_value; + } + } + } + break; + default: + assert(0); + } +} + + +/** + * Actually clear the tiles which were flagged as being in a clear state. + */ +static void +sp_tile_cache_flush_clear(struct pipe_context *pipe, + struct softpipe_tile_cache *tc) +{ + struct pipe_surface *ps = tc->surface; + const uint w = tc->surface->width; + const uint h = tc->surface->height; + uint x, y; + uint numCleared = 0; + + /* clear the scratch tile to the clear value */ + clear_tile(&tc->tile, ps->format, tc->clear_val); + + /* push the tile to all positions marked as clear */ + for (y = 0; y < h; y += TILE_SIZE) { + for (x = 0; x < w; x += TILE_SIZE) { + if (is_clear_flag_set(tc->clear_flags, x, y)) { + pipe_put_tile_raw(ps, + x, y, TILE_SIZE, TILE_SIZE, + tc->tile.data.color32, 0/*STRIDE*/); + + /* do this? */ + clear_clear_flag(tc->clear_flags, x, y); + + numCleared++; + } + } + } +#if 0 + debug_printf("num cleared: %u\n", numCleared); +#endif +} + + +/** + * Flush the tile cache: write all dirty tiles back to the surface. + * any tiles "flagged" as cleared will be "really" cleared. + */ +void +sp_flush_tile_cache(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc) +{ + struct pipe_surface *ps = tc->surface; + int inuse = 0, pos; + + if (ps && ps->buffer) { + /* caching a drawing surface */ + for (pos = 0; pos < NUM_ENTRIES; pos++) { + struct softpipe_cached_tile *tile = tc->entries + pos; + if (tile->x >= 0) { + if (tc->depth_stencil) { + pipe_put_tile_raw(ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); + } + else { + pipe_put_tile_rgba(ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + } + tile->x = tile->y = -1; /* mark as empty */ + inuse++; + } + } + +#if TILE_CLEAR_OPTIMIZATION + sp_tile_cache_flush_clear(&softpipe->pipe, tc); +#endif + } + else if (tc->texture) { + /* caching a texture, mark all entries as empty */ + for (pos = 0; pos < NUM_ENTRIES; pos++) { + tc->entries[pos].x = -1; + } + tc->tex_face = -1; + } + +#if 0 + debug_printf("flushed tiles in use: %d\n", inuse); +#endif +} + + +/** + * Get a tile from the cache. + * \param x, y position of tile, in pixels + */ +struct softpipe_cached_tile * +sp_get_cached_tile(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc, int x, int y) +{ + struct pipe_surface *ps = tc->surface; + + /* tile pos in framebuffer: */ + const int tile_x = x & ~(TILE_SIZE - 1); + const int tile_y = y & ~(TILE_SIZE - 1); + + /* cache pos/entry: */ + const int pos = CACHE_POS(x, y); + struct softpipe_cached_tile *tile = tc->entries + pos; + + if (tile_x != tile->x || + tile_y != tile->y) { + + if (tile->x != -1) { + /* put dirty tile back in framebuffer */ + if (tc->depth_stencil) { + pipe_put_tile_raw(ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); + } + else { + pipe_put_tile_rgba(ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + } + } + + tile->x = tile_x; + tile->y = tile_y; + + if (is_clear_flag_set(tc->clear_flags, x, y)) { + /* don't get tile from framebuffer, just clear it */ + if (tc->depth_stencil) { + clear_tile(tile, ps->format, tc->clear_val); + } + else { + clear_tile_rgba(tile, ps->format, tc->clear_color); + } + clear_clear_flag(tc->clear_flags, x, y); + } + else { + /* get new tile data from surface */ + if (tc->depth_stencil) { + pipe_get_tile_raw(ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + tile->data.depth32, 0/*STRIDE*/); + } + else { + pipe_get_tile_rgba(ps, + tile->x, tile->y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + } + } + } + + return tile; +} + + +/** + * Given the texture face, level, zslice, x and y values, compute + * the cache entry position/index where we'd hope to find the + * cached texture tile. + * This is basically a direct-map cache. + * XXX There's probably lots of ways in which we can improve this. + */ +static INLINE uint +tex_cache_pos(int x, int y, int z, int face, int level) +{ + uint entry = x + y * 5 + z * 4 + face + level; + return entry % NUM_ENTRIES; +} + + +/** + * Similar to sp_get_cached_tile() but for textures. + * Tiles are read-only and indexed with more params. + */ +const struct softpipe_cached_tile * +sp_get_cached_tile_tex(struct pipe_context *pipe, + struct softpipe_tile_cache *tc, int x, int y, int z, + int face, int level) +{ + struct pipe_screen *screen = pipe->screen; + /* tile pos in framebuffer: */ + const int tile_x = x & ~(TILE_SIZE - 1); + const int tile_y = y & ~(TILE_SIZE - 1); + /* cache pos/entry: */ + const uint pos = tex_cache_pos(x / TILE_SIZE, y / TILE_SIZE, z, + face, level); + struct softpipe_cached_tile *tile = tc->entries + pos; + + if (tc->texture) { + struct softpipe_texture *spt = softpipe_texture(tc->texture); + if (spt->modified) { + /* texture was modified, force a cache reload */ + tile->x = -1; + spt->modified = FALSE; + } + } + + if (tile_x != tile->x || + tile_y != tile->y || + z != tile->z || + face != tile->face || + level != tile->level) { + /* cache miss */ + + /* check if we need to get a new surface */ + if (!tc->tex_surf || + tc->tex_face != face || + tc->tex_level != level || + tc->tex_z != z) { + /* get new surface (view into texture) */ + + if (tc->tex_surf_map) + tc->screen->surface_unmap(tc->screen, tc->tex_surf); + + tc->tex_surf = screen->get_tex_surface(screen, tc->texture, face, level, z, + PIPE_BUFFER_USAGE_CPU_READ); + tc->tex_surf_map = screen->surface_map(screen, tc->tex_surf, + PIPE_BUFFER_USAGE_CPU_READ); + + tc->tex_face = face; + tc->tex_level = level; + tc->tex_z = z; + } + + /* get tile from the surface (view into texture) */ + pipe_get_tile_rgba(tc->tex_surf, + tile_x, tile_y, TILE_SIZE, TILE_SIZE, + (float *) tile->data.color); + tile->x = tile_x; + tile->y = tile_y; + tile->z = z; + tile->face = face; + tile->level = level; + } + + return tile; +} + + +/** + * When a whole surface is being cleared to a value we can avoid + * fetching tiles above. + * Save the color and set a 'clearflag' for each tile of the screen. + */ +void +sp_tile_cache_clear(struct softpipe_tile_cache *tc, uint clearValue) +{ + uint r, g, b, a; + uint pos; + + tc->clear_val = clearValue; + + switch (tc->surface->format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + r = (clearValue >> 24) & 0xff; + g = (clearValue >> 16) & 0xff; + b = (clearValue >> 8) & 0xff; + a = (clearValue ) & 0xff; + break; + case PIPE_FORMAT_A8R8G8B8_UNORM: + r = (clearValue >> 16) & 0xff; + g = (clearValue >> 8) & 0xff; + b = (clearValue ) & 0xff; + a = (clearValue >> 24) & 0xff; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + r = (clearValue >> 8) & 0xff; + g = (clearValue >> 16) & 0xff; + b = (clearValue >> 24) & 0xff; + a = (clearValue ) & 0xff; + break; + default: + r = g = b = a = 0; + } + + tc->clear_color[0] = r / 255.0f; + tc->clear_color[1] = g / 255.0f; + tc->clear_color[2] = b / 255.0f; + tc->clear_color[3] = a / 255.0f; + +#if TILE_CLEAR_OPTIMIZATION + /* set flags to indicate all the tiles are cleared */ + memset(tc->clear_flags, 255, sizeof(tc->clear_flags)); +#else + /* disable the optimization */ + memset(tc->clear_flags, 0, sizeof(tc->clear_flags)); +#endif + + for (pos = 0; pos < NUM_ENTRIES; pos++) { + struct softpipe_cached_tile *tile = tc->entries + pos; + tile->x = tile->y = -1; + } +} diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h new file mode 100644 index 0000000000..bc96c941f6 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -0,0 +1,105 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef SP_TILE_CACHE_H +#define SP_TILE_CACHE_H + +#define TILE_CLEAR_OPTIMIZATION 1 + + +#include "pipe/p_compiler.h" + + +struct softpipe_context; +struct softpipe_tile_cache; + + +/** + * Cache tile size (width and height). This needs to be a power of two. + */ +#define TILE_SIZE 64 + + + +struct softpipe_cached_tile +{ + int x, y; /**< pos of tile in window coords */ + int z, face, level; /**< Extra texture indexes */ + union { + float color[TILE_SIZE][TILE_SIZE][4]; + uint color32[TILE_SIZE][TILE_SIZE]; + uint depth32[TILE_SIZE][TILE_SIZE]; + ushort depth16[TILE_SIZE][TILE_SIZE]; + ubyte stencil8[TILE_SIZE][TILE_SIZE]; + ubyte any[1]; + } data; +}; + + +extern struct softpipe_tile_cache * +sp_create_tile_cache( struct pipe_screen *screen ); + +extern void +sp_destroy_tile_cache(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, + struct pipe_surface *sps); + +extern struct pipe_surface * +sp_tile_cache_get_surface(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_set_texture(struct pipe_context *pipe, + struct softpipe_tile_cache *tc, + struct pipe_texture *texture); + +extern void +sp_flush_tile_cache(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc); + +extern void +sp_tile_cache_clear(struct softpipe_tile_cache *tc, uint clearValue); + +extern struct softpipe_cached_tile * +sp_get_cached_tile(struct softpipe_context *softpipe, + struct softpipe_tile_cache *tc, int x, int y); + +extern const struct softpipe_cached_tile * +sp_get_cached_tile_tex(struct pipe_context *pipe, + struct softpipe_tile_cache *tc, int x, int y, int z, + int face, int level); + + +#endif /* SP_TILE_CACHE_H */ + diff --git a/src/gallium/drivers/softpipe/sp_winsys.h b/src/gallium/drivers/softpipe/sp_winsys.h new file mode 100644 index 0000000000..4ab666486c --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_winsys.h @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* This is the interface that softpipe requires any window system + * hosting it to implement. This is the only include file in softpipe + * which is public. + */ + + +#ifndef SP_WINSYS_H +#define SP_WINSYS_H + + +#include "pipe/p_compiler.h" /* for boolean */ + + +#ifdef __cplusplus +extern "C" { +#endif + + +enum pipe_format; + +struct softpipe_winsys { + /** test if the given format is supported for front/back color bufs */ + boolean (*is_format_supported)( struct softpipe_winsys *sws, + enum pipe_format format ); + +}; + +struct pipe_screen; +struct pipe_winsys; +struct pipe_context; + + +struct pipe_context *softpipe_create( struct pipe_screen *, + struct pipe_winsys *, + void *unused ); + + +struct pipe_screen * +softpipe_create_screen(struct pipe_winsys *); + + +#ifdef __cplusplus +} +#endif + +#endif /* SP_WINSYS_H */ diff --git a/src/gallium/drivers/trace/README b/src/gallium/drivers/trace/README new file mode 100644 index 0000000000..f0e1cd596d --- /dev/null +++ b/src/gallium/drivers/trace/README @@ -0,0 +1,64 @@ + TRACE PIPE DRIVER + + += About = + +This directory contains a Gallium3D pipe driver which traces all incoming calls. + + += Build Instructions = + +To build, invoke scons on the top dir as + + scons statetrackers=mesa drivers=softpipe,i965simple,trace winsys=xlib + + += Usage = + +To use do + + ln -s libGL.so build/linux-x86-debug/gallium/winsys/xlib/libGL.so.1 + export LD_LIBRARY_PATH=$PWD/build/linux-x86-debug/gallium/winsys/xlib + +ensure the right libGL.so is being picked by doing + + ldd progs/trivial/tri + +and then try running + + GALLIUM_TRACE=tri.trace progs/trivial/tri + +which should create a tri.trace file, which is an XML file. You can view copying +trace.xsl to the same directory, and opening with a XSLT capable browser such as +Firefox or Internet Explorer. + + += Integrating = + +You can integrate the trace pipe driver either inside the state tracker or the +winsys. The procedure on both cases is the same. Let's assume you have a +pipe_screen and a pipe_context pair obtained by the usual means (variable and +function names are just for illustration purposes): + + real_screen = real_screen_create(...); + + real_context = real_context_create(...); + +The trace screen and pipe_context is then created by doing + + trace_screen = trace_screen_create(real_screen); + + trace_context = trace_context_create(trace_screen, real_context); + +You can then simply use trace_screen and trace_context instead of real_screen +and real_context. + +Do not call trace_winsys_create. Simply pass trace_screen->winsys or +trace_context->winsys in places you would pass winsys. + +You can create as many contexts you wish. Just ensure that you don't mistake +trace_screen with real_screen when creating them. + + +-- +Jose Fonseca <jrfonseca@tungstengraphics.com> diff --git a/src/gallium/drivers/trace/SConscript b/src/gallium/drivers/trace/SConscript new file mode 100644 index 0000000000..0a6bfb8f4c --- /dev/null +++ b/src/gallium/drivers/trace/SConscript @@ -0,0 +1,16 @@ +Import('*') + +env = env.Clone() + +trace = env.ConvenienceLibrary( + target = 'trace', + source = [ + 'tr_context.c', + 'tr_dump.c', + 'tr_screen.c', + 'tr_state.c', + 'tr_texture.c', + 'tr_winsys.c', + ]) + +Export('trace')
\ No newline at end of file diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c new file mode 100644 index 0000000000..1dd7719379 --- /dev/null +++ b/src/gallium/drivers/trace/tr_context.c @@ -0,0 +1,1072 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "pipe/p_screen.h" + +#include "tr_dump.h" +#include "tr_state.h" +#include "tr_screen.h" +#include "tr_texture.h" +#include "tr_winsys.h" +#include "tr_context.h" + + +static INLINE struct pipe_texture * +trace_texture_unwrap(struct trace_context *tr_ctx, + struct pipe_texture *texture) +{ + struct trace_screen *tr_scr = trace_screen(tr_ctx->base.screen); + struct trace_texture *tr_tex; + + if(!texture) + return NULL; + + tr_tex = trace_texture(tr_scr, texture); + + assert(tr_tex->texture); + assert(tr_tex->texture->screen == tr_scr->screen); + return tr_tex->texture; +} + + +static INLINE struct pipe_surface * +trace_surface_unwrap(struct trace_context *tr_ctx, + struct pipe_surface *surface) +{ + struct trace_screen *tr_scr = trace_screen(tr_ctx->base.screen); + struct trace_texture *tr_tex; + struct trace_surface *tr_surf; + + if(!surface) + return NULL; + + assert(surface->texture); + if(!surface->texture) + return surface; + + tr_tex = trace_texture(tr_scr, surface->texture); + tr_surf = trace_surface(tr_tex, surface); + + assert(tr_surf->surface); + assert(tr_surf->surface->texture->screen == tr_scr->screen); + return tr_surf->surface; +} + + +static INLINE void +trace_context_set_edgeflags(struct pipe_context *_pipe, + const unsigned *bitfield) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "set_edgeflags"); + + trace_dump_arg(ptr, pipe); + /* FIXME: we don't know how big this array is */ + trace_dump_arg(ptr, bitfield); + + pipe->set_edgeflags(pipe, bitfield);; + + trace_dump_call_end(); +} + + +static INLINE boolean +trace_context_draw_arrays(struct pipe_context *_pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + boolean result; + + trace_dump_call_begin("pipe_context", "draw_arrays"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, mode); + trace_dump_arg(uint, start); + trace_dump_arg(uint, count); + + result = pipe->draw_arrays(pipe, mode, start, count);; + + trace_dump_ret(bool, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE boolean +trace_context_draw_elements(struct pipe_context *_pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + boolean result; + + trace_winsys_user_buffer_update(_pipe->winsys, indexBuffer); + + trace_dump_call_begin("pipe_context", "draw_elements"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, indexBuffer); + trace_dump_arg(uint, indexSize); + trace_dump_arg(uint, mode); + trace_dump_arg(uint, start); + trace_dump_arg(uint, count); + + result = pipe->draw_elements(pipe, indexBuffer, indexSize, mode, start, count);; + + trace_dump_ret(bool, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE boolean +trace_context_draw_range_elements(struct pipe_context *_pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + boolean result; + + trace_winsys_user_buffer_update(_pipe->winsys, indexBuffer); + + trace_dump_call_begin("pipe_context", "draw_range_elements"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, indexBuffer); + trace_dump_arg(uint, indexSize); + trace_dump_arg(uint, minIndex); + trace_dump_arg(uint, maxIndex); + trace_dump_arg(uint, mode); + trace_dump_arg(uint, start); + trace_dump_arg(uint, count); + + result = pipe->draw_range_elements(pipe, + indexBuffer, + indexSize, minIndex, maxIndex, + mode, start, count); + + trace_dump_ret(bool, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE struct pipe_query * +trace_context_create_query(struct pipe_context *_pipe, + unsigned query_type) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_query *result; + + trace_dump_call_begin("pipe_context", "create_query"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, query_type); + + result = pipe->create_query(pipe, query_type);; + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_destroy_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "destroy_query"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, query); + + pipe->destroy_query(pipe, query);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_begin_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "begin_query"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, query); + + pipe->begin_query(pipe, query);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_end_query(struct pipe_context *_pipe, + struct pipe_query *query) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "end_query"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, query); + + pipe->end_query(pipe, query); + + trace_dump_call_end(); +} + + +static INLINE boolean +trace_context_get_query_result(struct pipe_context *_pipe, + struct pipe_query *query, + boolean wait, + uint64 *presult) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + uint64 result; + boolean _result; + + trace_dump_call_begin("pipe_context", "get_query_result"); + + trace_dump_arg(ptr, pipe); + + _result = pipe->get_query_result(pipe, query, wait, presult);; + result = *presult; + + trace_dump_arg(uint, result); + trace_dump_ret(bool, _result); + + trace_dump_call_end(); + + return _result; +} + + +static INLINE void * +trace_context_create_blend_state(struct pipe_context *_pipe, + const struct pipe_blend_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_blend_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(blend_state, state); + + result = pipe->create_blend_state(pipe, state);; + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_bind_blend_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_blend_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->bind_blend_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_blend_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_blend_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->delete_blend_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_sampler_state(struct pipe_context *_pipe, + const struct pipe_sampler_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_sampler_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(sampler_state, state); + + result = pipe->create_sampler_state(pipe, state);; + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_bind_sampler_states(struct pipe_context *_pipe, + unsigned num_states, void **states) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_sampler_states"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_states); + trace_dump_arg_array(ptr, states, num_states); + + pipe->bind_sampler_states(pipe, num_states, states);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_sampler_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_sampler_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->delete_sampler_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_rasterizer_state(struct pipe_context *_pipe, + const struct pipe_rasterizer_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_rasterizer_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(rasterizer_state, state); + + result = pipe->create_rasterizer_state(pipe, state);; + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_bind_rasterizer_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_rasterizer_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->bind_rasterizer_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_rasterizer_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_rasterizer_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->delete_rasterizer_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_depth_stencil_alpha_state(struct pipe_context *_pipe, + const struct pipe_depth_stencil_alpha_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_depth_stencil_alpha_state"); + + result = pipe->create_depth_stencil_alpha_state(pipe, state);; + + trace_dump_arg(ptr, pipe); + trace_dump_arg(depth_stencil_alpha_state, state); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_bind_depth_stencil_alpha_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_depth_stencil_alpha_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->bind_depth_stencil_alpha_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_depth_stencil_alpha_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_depth_stencil_alpha_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->delete_depth_stencil_alpha_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_fs_state(struct pipe_context *_pipe, + const struct pipe_shader_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_fs_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(shader_state, state); + + result = pipe->create_fs_state(pipe, state);; + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_bind_fs_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_fs_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->bind_fs_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_fs_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_fs_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->delete_fs_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void * +trace_context_create_vs_state(struct pipe_context *_pipe, + const struct pipe_shader_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + void * result; + + trace_dump_call_begin("pipe_context", "create_vs_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(shader_state, state); + + result = pipe->create_vs_state(pipe, state);; + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + return result; +} + + +static INLINE void +trace_context_bind_vs_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "bind_vs_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->bind_vs_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_delete_vs_state(struct pipe_context *_pipe, + void *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "delete_vs_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, state); + + pipe->delete_vs_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_blend_color(struct pipe_context *_pipe, + const struct pipe_blend_color *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "set_blend_color"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(blend_color, state); + + pipe->set_blend_color(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_clip_state(struct pipe_context *_pipe, + const struct pipe_clip_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "set_clip_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(clip_state, state); + + pipe->set_clip_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_constant_buffer(struct pipe_context *_pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buffer) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_winsys_user_buffer_update(_pipe->winsys, (struct pipe_buffer *)buffer); + + trace_dump_call_begin("pipe_context", "set_constant_buffer"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, shader); + trace_dump_arg(uint, index); + trace_dump_arg(constant_buffer, buffer); + + pipe->set_constant_buffer(pipe, shader, index, buffer);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_framebuffer_state(struct pipe_context *_pipe, + const struct pipe_framebuffer_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_framebuffer_state unwrapped_state; + unsigned i; + + /* Unwrap the input state */ + memcpy(&unwrapped_state, state, sizeof(unwrapped_state)); + for(i = 0; i < state->num_cbufs; ++i) + unwrapped_state.cbufs[i] = trace_surface_unwrap(tr_ctx, state->cbufs[i]); + for(i = state->num_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i) + unwrapped_state.cbufs[i] = NULL; + unwrapped_state.zsbuf = trace_surface_unwrap(tr_ctx, state->zsbuf); + state = &unwrapped_state; + + trace_dump_call_begin("pipe_context", "set_framebuffer_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(framebuffer_state, state); + + pipe->set_framebuffer_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_polygon_stipple(struct pipe_context *_pipe, + const struct pipe_poly_stipple *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "set_polygon_stipple"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(poly_stipple, state); + + pipe->set_polygon_stipple(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_scissor_state(struct pipe_context *_pipe, + const struct pipe_scissor_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "set_scissor_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(scissor_state, state); + + pipe->set_scissor_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_viewport_state(struct pipe_context *_pipe, + const struct pipe_viewport_state *state) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "set_viewport_state"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(viewport_state, state); + + pipe->set_viewport_state(pipe, state);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_sampler_textures(struct pipe_context *_pipe, + unsigned num_textures, + struct pipe_texture **textures) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + struct pipe_texture *unwrapped_textures[PIPE_MAX_SAMPLERS]; + unsigned i; + + for(i = 0; i < num_textures; ++i) + unwrapped_textures[i] = trace_texture_unwrap(tr_ctx, textures[i]); + textures = unwrapped_textures; + + trace_dump_call_begin("pipe_context", "set_sampler_textures"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_textures); + trace_dump_arg_array(ptr, textures, num_textures); + + pipe->set_sampler_textures(pipe, num_textures, textures);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_vertex_buffers(struct pipe_context *_pipe, + unsigned num_buffers, + const struct pipe_vertex_buffer *buffers) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + unsigned i; + + for(i = 0; i < num_buffers; ++i) + trace_winsys_user_buffer_update(_pipe->winsys, buffers[i].buffer); + + trace_dump_call_begin("pipe_context", "set_vertex_buffers"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_buffers); + + trace_dump_arg_begin("buffers"); + trace_dump_struct_array(vertex_buffer, buffers, num_buffers); + trace_dump_arg_end(); + + pipe->set_vertex_buffers(pipe, num_buffers, buffers);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_set_vertex_elements(struct pipe_context *_pipe, + unsigned num_elements, + const struct pipe_vertex_element *elements) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "set_vertex_elements"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, num_elements); + + trace_dump_arg_begin("elements"); + trace_dump_struct_array(vertex_element, elements, num_elements); + trace_dump_arg_end(); + + pipe->set_vertex_elements(pipe, num_elements, elements);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_surface_copy(struct pipe_context *_pipe, + boolean do_flip, + struct pipe_surface *dest, + unsigned destx, unsigned desty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + dest = trace_surface_unwrap(tr_ctx, dest); + src = trace_surface_unwrap(tr_ctx, src); + + trace_dump_call_begin("pipe_context", "surface_copy"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(bool, do_flip); + trace_dump_arg(ptr, dest); + trace_dump_arg(uint, destx); + trace_dump_arg(uint, desty); + trace_dump_arg(ptr, src); + trace_dump_arg(uint, srcx); + trace_dump_arg(uint, srcy); + trace_dump_arg(uint, width); + trace_dump_arg(uint, height); + + pipe->surface_copy(pipe, do_flip, + dest, destx, desty, + src, srcx, srcy, width, height); + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_surface_fill(struct pipe_context *_pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + unsigned value) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + dst = trace_surface_unwrap(tr_ctx, dst); + + trace_dump_call_begin("pipe_context", "surface_fill"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, dst); + trace_dump_arg(uint, dstx); + trace_dump_arg(uint, dsty); + trace_dump_arg(uint, width); + trace_dump_arg(uint, height); + + pipe->surface_fill(pipe, dst, dstx, dsty, width, height, value);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_clear(struct pipe_context *_pipe, + struct pipe_surface *surface, + unsigned clearValue) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + surface = trace_surface_unwrap(tr_ctx, surface); + + trace_dump_call_begin("pipe_context", "clear"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(ptr, surface); + trace_dump_arg(uint, clearValue); + + pipe->clear(pipe, surface, clearValue);; + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_flush(struct pipe_context *_pipe, + unsigned flags, + struct pipe_fence_handle **fence) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "flush"); + + trace_dump_arg(ptr, pipe); + trace_dump_arg(uint, flags); + + pipe->flush(pipe, flags, fence);; + + if(fence) + trace_dump_ret(ptr, *fence); + + trace_dump_call_end(); +} + + +static INLINE void +trace_context_destroy(struct pipe_context *_pipe) +{ + struct trace_context *tr_ctx = trace_context(_pipe); + struct pipe_context *pipe = tr_ctx->pipe; + + trace_dump_call_begin("pipe_context", "destroy"); + + trace_dump_arg(ptr, pipe); + + pipe->destroy(pipe); + + trace_dump_call_end(); + + FREE(tr_ctx); +} + + +struct pipe_context * +trace_context_create(struct pipe_screen *screen, + struct pipe_context *pipe) +{ + struct trace_context *tr_ctx; + + if(!pipe) + goto error1; + + if(!trace_dump_enabled()) + goto error1; + + tr_ctx = CALLOC_STRUCT(trace_context); + if(!tr_ctx) + goto error1; + + tr_ctx->base.winsys = screen->winsys; + tr_ctx->base.screen = screen; + tr_ctx->base.destroy = trace_context_destroy; + tr_ctx->base.set_edgeflags = trace_context_set_edgeflags; + tr_ctx->base.draw_arrays = trace_context_draw_arrays; + tr_ctx->base.draw_elements = trace_context_draw_elements; + tr_ctx->base.draw_range_elements = trace_context_draw_range_elements; + tr_ctx->base.create_query = trace_context_create_query; + tr_ctx->base.destroy_query = trace_context_destroy_query; + tr_ctx->base.begin_query = trace_context_begin_query; + tr_ctx->base.end_query = trace_context_end_query; + tr_ctx->base.get_query_result = trace_context_get_query_result; + tr_ctx->base.create_blend_state = trace_context_create_blend_state; + tr_ctx->base.bind_blend_state = trace_context_bind_blend_state; + tr_ctx->base.delete_blend_state = trace_context_delete_blend_state; + tr_ctx->base.create_sampler_state = trace_context_create_sampler_state; + tr_ctx->base.bind_sampler_states = trace_context_bind_sampler_states; + tr_ctx->base.delete_sampler_state = trace_context_delete_sampler_state; + tr_ctx->base.create_rasterizer_state = trace_context_create_rasterizer_state; + tr_ctx->base.bind_rasterizer_state = trace_context_bind_rasterizer_state; + tr_ctx->base.delete_rasterizer_state = trace_context_delete_rasterizer_state; + tr_ctx->base.create_depth_stencil_alpha_state = trace_context_create_depth_stencil_alpha_state; + tr_ctx->base.bind_depth_stencil_alpha_state = trace_context_bind_depth_stencil_alpha_state; + tr_ctx->base.delete_depth_stencil_alpha_state = trace_context_delete_depth_stencil_alpha_state; + tr_ctx->base.create_fs_state = trace_context_create_fs_state; + tr_ctx->base.bind_fs_state = trace_context_bind_fs_state; + tr_ctx->base.delete_fs_state = trace_context_delete_fs_state; + tr_ctx->base.create_vs_state = trace_context_create_vs_state; + tr_ctx->base.bind_vs_state = trace_context_bind_vs_state; + tr_ctx->base.delete_vs_state = trace_context_delete_vs_state; + tr_ctx->base.set_blend_color = trace_context_set_blend_color; + tr_ctx->base.set_clip_state = trace_context_set_clip_state; + tr_ctx->base.set_constant_buffer = trace_context_set_constant_buffer; + tr_ctx->base.set_framebuffer_state = trace_context_set_framebuffer_state; + tr_ctx->base.set_polygon_stipple = trace_context_set_polygon_stipple; + tr_ctx->base.set_scissor_state = trace_context_set_scissor_state; + tr_ctx->base.set_viewport_state = trace_context_set_viewport_state; + tr_ctx->base.set_sampler_textures = trace_context_set_sampler_textures; + tr_ctx->base.set_vertex_buffers = trace_context_set_vertex_buffers; + tr_ctx->base.set_vertex_elements = trace_context_set_vertex_elements; + tr_ctx->base.surface_copy = trace_context_surface_copy; + tr_ctx->base.surface_fill = trace_context_surface_fill; + tr_ctx->base.clear = trace_context_clear; + tr_ctx->base.flush = trace_context_flush; + + tr_ctx->pipe = pipe; + + trace_dump_call_begin("", "pipe_context_create"); + trace_dump_arg_begin("screen"); + trace_dump_ptr(pipe->screen); + trace_dump_arg_end(); + trace_dump_ret(ptr, pipe); + trace_dump_call_end(); + + return &tr_ctx->base; + +error1: + return pipe; +} diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h new file mode 100644 index 0000000000..7831900ec2 --- /dev/null +++ b/src/gallium/drivers/trace/tr_context.h @@ -0,0 +1,68 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_CONTEXT_H_ +#define TR_CONTEXT_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_context.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct trace_context +{ + struct pipe_context base; + + struct pipe_context *pipe; +}; + + +static INLINE struct trace_context * +trace_context(struct pipe_context *pipe) +{ + assert(pipe); + return (struct trace_context *)pipe; +} + + + +struct pipe_context * +trace_context_create(struct pipe_screen *screen, + struct pipe_context *pipe); + + +#ifdef __cplusplus +} +#endif + +#endif /* TR_CONTEXT_H_ */ diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c new file mode 100644 index 0000000000..a0ead0ded3 --- /dev/null +++ b/src/gallium/drivers/trace/tr_dump.c @@ -0,0 +1,404 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Trace dumping functions. + * + * For now we just use standard XML for dumping the trace calls, as this is + * simple to write, parse, and visually inspect, but the actual representation + * is abstracted out of this file, so that we can switch to a binary + * representation if/when it becomes justified. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) +#include <stdlib.h> +#endif + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "util/u_memory.h" +#include "util/u_string.h" +#include "util/u_stream.h" + +#include "tr_dump.h" + + +static struct util_stream *stream = NULL; +static unsigned refcount = 0; + + +static INLINE void +trace_dump_write(const char *buf, size_t size) +{ + if(stream) + util_stream_write(stream, buf, size); +} + + +static INLINE void +trace_dump_writes(const char *s) +{ + trace_dump_write(s, strlen(s)); +} + + +static INLINE void +trace_dump_writef(const char *format, ...) +{ + static char buf[1024]; + unsigned len; + va_list ap; + va_start(ap, format); + len = util_vsnprintf(buf, sizeof(buf), format, ap); + va_end(ap); + trace_dump_write(buf, len); +} + + +static INLINE void +trace_dump_escape(const char *str) +{ + const unsigned char *p = (const unsigned char *)str; + unsigned char c; + while((c = *p++) != 0) { + if(c == '<') + trace_dump_writes("<"); + else if(c == '>') + trace_dump_writes(">"); + else if(c == '&') + trace_dump_writes("&"); + else if(c == '\'') + trace_dump_writes("'"); + else if(c == '\"') + trace_dump_writes("""); + else if(c >= 0x20 && c <= 0x7e) + trace_dump_writef("%c", c); + else + trace_dump_writef("&#%u;", c); + } +} + + +static INLINE void +trace_dump_indent(unsigned level) +{ + unsigned i; + for(i = 0; i < level; ++i) + trace_dump_writes("\t"); +} + + +static INLINE void +trace_dump_newline(void) +{ + trace_dump_writes("\n"); +} + + +static INLINE void +trace_dump_tag(const char *name) +{ + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes("/>"); +} + + +static INLINE void +trace_dump_tag_begin(const char *name) +{ + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes(">"); +} + +static INLINE void +trace_dump_tag_begin1(const char *name, + const char *attr1, const char *value1) +{ + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes(" "); + trace_dump_writes(attr1); + trace_dump_writes("='"); + trace_dump_escape(value1); + trace_dump_writes("'>"); +} + + +static INLINE void +trace_dump_tag_begin2(const char *name, + const char *attr1, const char *value1, + const char *attr2, const char *value2) +{ + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes(" "); + trace_dump_writes(attr1); + trace_dump_writes("=\'"); + trace_dump_escape(value1); + trace_dump_writes("\' "); + trace_dump_writes(attr2); + trace_dump_writes("=\'"); + trace_dump_escape(value2); + trace_dump_writes("\'>"); +} + + +static INLINE void +trace_dump_tag_begin3(const char *name, + const char *attr1, const char *value1, + const char *attr2, const char *value2, + const char *attr3, const char *value3) +{ + trace_dump_writes("<"); + trace_dump_writes(name); + trace_dump_writes(" "); + trace_dump_writes(attr1); + trace_dump_writes("=\'"); + trace_dump_escape(value1); + trace_dump_writes("\' "); + trace_dump_writes(attr2); + trace_dump_writes("=\'"); + trace_dump_escape(value2); + trace_dump_writes("\' "); + trace_dump_writes(attr3); + trace_dump_writes("=\'"); + trace_dump_escape(value3); + trace_dump_writes("\'>"); +} + + +static INLINE void +trace_dump_tag_end(const char *name) +{ + trace_dump_writes("</"); + trace_dump_writes(name); + trace_dump_writes(">"); +} + +static void +trace_dump_trace_close(void) +{ + if(stream) { + trace_dump_writes("</trace>\n"); + util_stream_close(stream); + stream = NULL; + refcount = 0; + } +} + +boolean trace_dump_trace_begin() +{ + const char *filename; + + filename = debug_get_option("GALLIUM_TRACE", NULL); + if(!filename) + return FALSE; + + if(!stream) { + + stream = util_stream_create(filename, 0); + if(!stream) + return FALSE; + + trace_dump_writes("<?xml version='1.0' encoding='UTF-8'?>\n"); + trace_dump_writes("<?xml-stylesheet type='text/xsl' href='trace.xsl'?>\n"); + trace_dump_writes("<trace version='0.1'>\n"); + +#if defined(PIPE_OS_LINUX) + /* Linux applications rarely cleanup GL / Gallium resources so catch + * application exit here */ + atexit(trace_dump_trace_close); +#endif + } + + ++refcount; + + return TRUE; +} + +boolean trace_dump_enabled(void) +{ + return stream ? TRUE : FALSE; +} + +void trace_dump_trace_end(void) +{ + if(stream) + if(!--refcount) + trace_dump_trace_close(); +} + +void trace_dump_call_begin(const char *klass, const char *method) +{ + trace_dump_indent(1); + trace_dump_tag_begin2("call", "class", klass, "method", method); + trace_dump_newline(); +} + +void trace_dump_call_end(void) +{ + trace_dump_indent(1); + trace_dump_tag_end("call"); + trace_dump_newline(); + util_stream_flush(stream); +} + +void trace_dump_arg_begin(const char *name) +{ + trace_dump_indent(2); + trace_dump_tag_begin1("arg", "name", name); +} + +void trace_dump_arg_end(void) +{ + trace_dump_tag_end("arg"); + trace_dump_newline(); +} + +void trace_dump_ret_begin(void) +{ + trace_dump_indent(2); + trace_dump_tag_begin("ret"); +} + +void trace_dump_ret_end(void) +{ + trace_dump_tag_end("ret"); + trace_dump_newline(); +} + +void trace_dump_bool(int value) +{ + trace_dump_writef("<bool>%c</bool>", value ? '1' : '0'); +} + +void trace_dump_int(long long int value) +{ + trace_dump_writef("<int>%lli</int>", value); +} + +void trace_dump_uint(long long unsigned value) +{ + trace_dump_writef("<uint>%llu</uint>", value); +} + +void trace_dump_float(double value) +{ + trace_dump_writef("<float>%g</float>", value); +} + +void trace_dump_bytes(const void *data, + long unsigned size) +{ + static const char hex_table[16] = "0123456789ABCDEF"; + const uint8_t *p = data; + long unsigned i; + trace_dump_writes("<bytes>"); + for(i = 0; i < size; ++i) { + uint8_t byte = *p++; + char hex[2]; + hex[0] = hex_table[byte >> 4]; + hex[1] = hex_table[byte & 0xf]; + trace_dump_write(hex, 2); + } + trace_dump_writes("</bytes>"); +} + +void trace_dump_string(const char *str) +{ + trace_dump_writes("<string>"); + trace_dump_escape(str); + trace_dump_writes("</string>"); +} + +void trace_dump_enum(const char *value) +{ + trace_dump_writes("<enum>"); + trace_dump_escape(value); + trace_dump_writes("</enum>"); +} + +void trace_dump_array_begin(void) +{ + trace_dump_writes("<array>"); +} + +void trace_dump_array_end(void) +{ + trace_dump_writes("</array>"); +} + +void trace_dump_elem_begin(void) +{ + trace_dump_writes("<elem>"); +} + +void trace_dump_elem_end(void) +{ + trace_dump_writes("</elem>"); +} + +void trace_dump_struct_begin(const char *name) +{ + trace_dump_writef("<struct name='%s'>", name); +} + +void trace_dump_struct_end(void) +{ + trace_dump_writes("</struct>"); +} + +void trace_dump_member_begin(const char *name) +{ + trace_dump_writef("<member name='%s'>", name); +} + +void trace_dump_member_end(void) +{ + trace_dump_writes("</member>"); +} + +void trace_dump_null(void) +{ + trace_dump_writes("<null/>"); +} + +void trace_dump_ptr(const void *value) +{ + if(value) + trace_dump_writef("<ptr>0x%08lx</ptr>", (unsigned long)(uintptr_t)value); + else + trace_dump_null(); +} diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h new file mode 100644 index 0000000000..76a53731b3 --- /dev/null +++ b/src/gallium/drivers/trace/tr_dump.h @@ -0,0 +1,132 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Trace data dumping primitives. + */ + +#ifndef TR_DUMP_H +#define TR_DUMP_H + + +#include "pipe/p_compiler.h" + + +boolean trace_dump_trace_begin(void); +boolean trace_dump_enabled(void); +void trace_dump_trace_end(void); +void trace_dump_call_begin(const char *klass, const char *method); +void trace_dump_call_end(void); +void trace_dump_arg_begin(const char *name); +void trace_dump_arg_end(void); +void trace_dump_ret_begin(void); +void trace_dump_ret_end(void); +void trace_dump_bool(int value); +void trace_dump_int(long long int value); +void trace_dump_uint(long long unsigned value); +void trace_dump_float(double value); +void trace_dump_bytes(const void *data, long unsigned size); +void trace_dump_string(const char *str); +void trace_dump_enum(const char *value); +void trace_dump_array_begin(void); +void trace_dump_array_end(void); +void trace_dump_elem_begin(void); +void trace_dump_elem_end(void); +void trace_dump_struct_begin(const char *name); +void trace_dump_struct_end(void); +void trace_dump_member_begin(const char *name); +void trace_dump_member_end(void); +void trace_dump_null(void); +void trace_dump_ptr(const void *value); + + +/* + * Code saving macros. + */ + +#define trace_dump_arg(_type, _arg) \ + do { \ + trace_dump_arg_begin(#_arg); \ + trace_dump_##_type(_arg); \ + trace_dump_arg_end(); \ + } while(0) + +#define trace_dump_ret(_type, _arg) \ + do { \ + trace_dump_ret_begin(); \ + trace_dump_##_type(_arg); \ + trace_dump_ret_end(); \ + } while(0) + +#define trace_dump_array(_type, _obj, _size) \ + do { \ + unsigned long idx; \ + trace_dump_array_begin(); \ + for(idx = 0; idx < (_size); ++idx) { \ + trace_dump_elem_begin(); \ + trace_dump_##_type((_obj)[idx]); \ + trace_dump_elem_end(); \ + } \ + trace_dump_array_end(); \ + } while(0) + +#define trace_dump_struct_array(_type, _obj, _size) \ + do { \ + unsigned long idx; \ + trace_dump_array_begin(); \ + for(idx = 0; idx < (_size); ++idx) { \ + trace_dump_elem_begin(); \ + trace_dump_##_type(&(_obj)[idx]); \ + trace_dump_elem_end(); \ + } \ + trace_dump_array_end(); \ + } while(0) + +#define trace_dump_member(_type, _obj, _member) \ + do { \ + trace_dump_member_begin(#_member); \ + trace_dump_##_type((_obj)->_member); \ + trace_dump_member_end(); \ + } while(0) + +#define trace_dump_arg_array(_type, _arg, _size) \ + do { \ + trace_dump_arg_begin(#_arg); \ + trace_dump_array(_type, _arg, _size); \ + trace_dump_arg_end(); \ + } while(0) + +#define trace_dump_member_array(_type, _obj, _member) \ + do { \ + trace_dump_member_begin(#_member); \ + trace_dump_array(_type, (_obj)->_member, sizeof((_obj)->_member)/sizeof((_obj)->_member[0])); \ + trace_dump_member_end(); \ + } while(0) + + +#endif /* TR_DUMP_H */ diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c new file mode 100644 index 0000000000..8789f86b1a --- /dev/null +++ b/src/gallium/drivers/trace/tr_screen.c @@ -0,0 +1,469 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" + +#include "tr_dump.h" +#include "tr_state.h" +#include "tr_winsys.h" +#include "tr_texture.h" +#include "tr_screen.h" + + +static const char * +trace_screen_get_name(struct pipe_screen *_screen) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + const char *result; + + trace_dump_call_begin("pipe_screen", "get_name"); + + trace_dump_arg(ptr, screen); + + result = screen->get_name(screen); + + trace_dump_ret(string, result); + + trace_dump_call_end(); + + return result; +} + + +static const char * +trace_screen_get_vendor(struct pipe_screen *_screen) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + const char *result; + + trace_dump_call_begin("pipe_screen", "get_vendor"); + + trace_dump_arg(ptr, screen); + + result = screen->get_vendor(screen); + + trace_dump_ret(string, result); + + trace_dump_call_end(); + + return result; +} + + +static int +trace_screen_get_param(struct pipe_screen *_screen, + int param) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + int result; + + trace_dump_call_begin("pipe_screen", "get_param"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(int, param); + + result = screen->get_param(screen, param); + + trace_dump_ret(int, result); + + trace_dump_call_end(); + + return result; +} + + +static float +trace_screen_get_paramf(struct pipe_screen *_screen, + int param) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + float result; + + trace_dump_call_begin("pipe_screen", "get_paramf"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(int, param); + + result = screen->get_paramf(screen, param); + + trace_dump_ret(float, result); + + trace_dump_call_end(); + + return result; +} + + +static boolean +trace_screen_is_format_supported(struct pipe_screen *_screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + boolean result; + + trace_dump_call_begin("pipe_screen", "is_format_supported"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(format, format); + trace_dump_arg(int, target); + trace_dump_arg(uint, tex_usage); + trace_dump_arg(uint, geom_flags); + + result = screen->is_format_supported(screen, format, target, tex_usage, geom_flags); + + trace_dump_ret(bool, result); + + trace_dump_call_end(); + + return result; +} + + +static struct pipe_texture * +trace_screen_texture_create(struct pipe_screen *_screen, + const struct pipe_texture *templat) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct pipe_texture *result; + + trace_dump_call_begin("pipe_screen", "texture_create"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(template, templat); + + result = screen->texture_create(screen, templat); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + result = trace_texture_create(tr_scr, result); + + return result; +} + + +static struct pipe_texture * +trace_screen_texture_blanket(struct pipe_screen *_screen, + const struct pipe_texture *templat, + const unsigned *ppitch, + struct pipe_buffer *buffer) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + unsigned pitch = *ppitch; + struct pipe_texture *result; + + trace_dump_call_begin("pipe_screen", "texture_blanket"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(template, templat); + trace_dump_arg(uint, pitch); + trace_dump_arg(ptr, buffer); + + result = screen->texture_blanket(screen, templat, ppitch, buffer); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + result = trace_texture_create(tr_scr, result); + + return result; +} + + +static void +trace_screen_texture_release(struct pipe_screen *_screen, + struct pipe_texture **ptexture) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct trace_texture *tr_tex; + struct pipe_texture *texture; + + assert(ptexture); + if(*ptexture) { + tr_tex = trace_texture(tr_scr, *ptexture); + texture = tr_tex->texture; + assert(texture->screen == screen); + } + else + texture = NULL; + + if (*ptexture) { + if (!--(*ptexture)->refcount) { + trace_dump_call_begin("pipe_screen", "texture_destroy"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, texture); + + trace_texture_destroy(tr_scr, *ptexture); + + trace_dump_call_end(); + } + + *ptexture = NULL; + } +} + + +static struct pipe_surface * +trace_screen_get_tex_surface(struct pipe_screen *_screen, + struct pipe_texture *texture, + unsigned face, unsigned level, + unsigned zslice, + unsigned usage) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct trace_texture *tr_tex; + struct pipe_surface *result; + + assert(texture); + tr_tex = trace_texture(tr_scr, texture); + texture = tr_tex->texture; + assert(texture->screen == screen); + + trace_dump_call_begin("pipe_screen", "get_tex_surface"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, texture); + trace_dump_arg(uint, face); + trace_dump_arg(uint, level); + trace_dump_arg(uint, zslice); + trace_dump_arg(uint, usage); + + result = screen->get_tex_surface(screen, texture, face, level, zslice, usage); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + result = trace_surface_create(tr_tex, result); + + return result; +} + + +static void +trace_screen_tex_surface_release(struct pipe_screen *_screen, + struct pipe_surface **psurface) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct trace_texture *tr_tex; + struct trace_surface *tr_surf; + struct pipe_surface *surface; + + assert(psurface); + if(*psurface) { + tr_tex = trace_texture(tr_scr, (*psurface)->texture); + tr_surf = trace_surface(tr_tex, *psurface); + surface = tr_surf->surface; + } + else + surface = NULL; + + if (*psurface) { + if (!--(*psurface)->refcount) { + trace_dump_call_begin("pipe_screen", "tex_surface_destroy"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, surface); + + trace_surface_destroy(tr_tex, *psurface); + + trace_dump_call_end(); + } + + *psurface = NULL; + } +} + + +static void * +trace_screen_surface_map(struct pipe_screen *_screen, + struct pipe_surface *surface, + unsigned flags) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct trace_texture *tr_tex; + struct trace_surface *tr_surf; + void *map; + + tr_tex = trace_texture(tr_scr, surface->texture); + tr_surf = trace_surface(tr_tex, surface); + surface = tr_surf->surface; + + map = screen->surface_map(screen, surface, flags); + if(map) { + if(flags & PIPE_BUFFER_USAGE_CPU_WRITE) { + assert(!tr_surf->map); + tr_surf->map = map; + } + } + + return map; +} + + +static void +trace_screen_surface_unmap(struct pipe_screen *_screen, + struct pipe_surface *surface) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct trace_texture *tr_tex; + struct trace_surface *tr_surf; + + tr_tex = trace_texture(tr_scr, surface->texture); + tr_surf = trace_surface(tr_tex, surface); + surface = tr_surf->surface; + + if(tr_surf->map) { + size_t size = surface->nblocksy * surface->stride; + + trace_dump_call_begin("pipe_winsys", "surface_write"); + + trace_dump_arg(ptr, screen); + + trace_dump_arg(ptr, surface); + + trace_dump_arg_begin("data"); + trace_dump_bytes(tr_surf->map, size); + trace_dump_arg_end(); + + trace_dump_arg_begin("stride"); + trace_dump_uint(surface->stride); + trace_dump_arg_end(); + + trace_dump_arg_begin("size"); + trace_dump_uint(size); + trace_dump_arg_end(); + + trace_dump_call_end(); + + tr_surf->map = NULL; + } + + screen->surface_unmap(screen, surface); +} + + +static void +trace_screen_destroy(struct pipe_screen *_screen) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + + trace_dump_call_begin("pipe_screen", "destroy"); + + trace_dump_arg(ptr, screen); + + screen->destroy(screen); + + trace_dump_call_end(); + + trace_dump_trace_end(); + + FREE(tr_scr); +} + + +struct pipe_screen * +trace_screen_create(struct pipe_screen *screen) +{ + struct trace_screen *tr_scr; + struct pipe_winsys *winsys; + + if(!screen) + goto error1; + + if(!trace_dump_trace_begin()) + goto error1; + + tr_scr = CALLOC_STRUCT(trace_screen); + if(!tr_scr) + goto error2; + + winsys = trace_winsys_create(screen->winsys); + if(!winsys) + goto error3; + + tr_scr->base.winsys = winsys; + tr_scr->base.destroy = trace_screen_destroy; + tr_scr->base.get_name = trace_screen_get_name; + tr_scr->base.get_vendor = trace_screen_get_vendor; + tr_scr->base.get_param = trace_screen_get_param; + tr_scr->base.get_paramf = trace_screen_get_paramf; + tr_scr->base.is_format_supported = trace_screen_is_format_supported; + tr_scr->base.texture_create = trace_screen_texture_create; + tr_scr->base.texture_blanket = trace_screen_texture_blanket; + tr_scr->base.texture_release = trace_screen_texture_release; + tr_scr->base.get_tex_surface = trace_screen_get_tex_surface; + tr_scr->base.tex_surface_release = trace_screen_tex_surface_release; + tr_scr->base.surface_map = trace_screen_surface_map; + tr_scr->base.surface_unmap = trace_screen_surface_unmap; + + tr_scr->screen = screen; + + trace_dump_call_begin("", "pipe_screen_create"); + trace_dump_arg_begin("winsys"); + trace_dump_ptr(screen->winsys); + trace_dump_arg_end(); + trace_dump_ret(ptr, screen); + trace_dump_call_end(); + + return &tr_scr->base; + +error3: + FREE(tr_scr); +error2: + trace_dump_trace_end(); +error1: + return screen; +} + + +struct trace_screen * +trace_screen(struct pipe_screen *screen) +{ + assert(screen); + assert(screen->destroy == trace_screen_destroy); + return (struct trace_screen *)screen; +} diff --git a/src/gallium/drivers/trace/tr_screen.h b/src/gallium/drivers/trace/tr_screen.h new file mode 100644 index 0000000000..93fefdb9a5 --- /dev/null +++ b/src/gallium/drivers/trace/tr_screen.h @@ -0,0 +1,60 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_SCREEN_H_ +#define TR_SCREEN_H_ + + +#include "pipe/p_screen.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct trace_screen +{ + struct pipe_screen base; + + struct pipe_screen *screen; +}; + + +struct trace_screen * +trace_screen(struct pipe_screen *screen); + + +struct pipe_screen * +trace_screen_create(struct pipe_screen *screen); + + +#ifdef __cplusplus +} +#endif + +#endif /* TR_SCREEN_H_ */ diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c new file mode 100644 index 0000000000..986d939e0c --- /dev/null +++ b/src/gallium/drivers/trace/tr_state.c @@ -0,0 +1,464 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "util/u_memory.h" +#include "tgsi/tgsi_dump.h" + +#include "tr_dump.h" +#include "tr_state.h" + + +void trace_dump_format(enum pipe_format format) +{ + trace_dump_enum(pf_name(format) ); +} + + +void trace_dump_block(const struct pipe_format_block *block) +{ + trace_dump_struct_begin("pipe_format_block"); + trace_dump_member(uint, block, size); + trace_dump_member(uint, block, width); + trace_dump_member(uint, block, height); + trace_dump_struct_end(); +} + + +void trace_dump_template(const struct pipe_texture *templat) +{ + if(!templat) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_texture"); + + trace_dump_member(int, templat, target); + trace_dump_member(format, templat, format); + + trace_dump_member_begin("width"); + trace_dump_array(uint, templat->width, 1); + trace_dump_member_end(); + + trace_dump_member_begin("height"); + trace_dump_array(uint, templat->height, 1); + trace_dump_member_end(); + + trace_dump_member_begin("depth"); + trace_dump_array(uint, templat->depth, 1); + trace_dump_member_end(); + + trace_dump_member_begin("block"); + trace_dump_block(&templat->block); + trace_dump_member_end(); + + trace_dump_member(uint, templat, last_level); + trace_dump_member(uint, templat, tex_usage); + + trace_dump_struct_end(); +} + + +void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_rasterizer_state"); + + trace_dump_member(bool, state, flatshade); + trace_dump_member(bool, state, light_twoside); + trace_dump_member(uint, state, front_winding); + trace_dump_member(uint, state, cull_mode); + trace_dump_member(uint, state, fill_cw); + trace_dump_member(uint, state, fill_ccw); + trace_dump_member(bool, state, offset_cw); + trace_dump_member(bool, state, offset_ccw); + trace_dump_member(bool, state, scissor); + trace_dump_member(bool, state, poly_smooth); + trace_dump_member(bool, state, poly_stipple_enable); + trace_dump_member(bool, state, point_smooth); + trace_dump_member(bool, state, point_sprite); + trace_dump_member(bool, state, point_size_per_vertex); + trace_dump_member(bool, state, multisample); + trace_dump_member(bool, state, line_smooth); + trace_dump_member(bool, state, line_stipple_enable); + trace_dump_member(uint, state, line_stipple_factor); + trace_dump_member(uint, state, line_stipple_pattern); + trace_dump_member(bool, state, line_last_pixel); + trace_dump_member(bool, state, bypass_clipping); + trace_dump_member(bool, state, bypass_vs); + trace_dump_member(bool, state, origin_lower_left); + trace_dump_member(bool, state, flatshade_first); + trace_dump_member(bool, state, gl_rasterization_rules); + + trace_dump_member(float, state, line_width); + trace_dump_member(float, state, point_size); + trace_dump_member(float, state, point_size_min); + trace_dump_member(float, state, point_size_max); + trace_dump_member(float, state, offset_units); + trace_dump_member(float, state, offset_scale); + + trace_dump_member_array(uint, state, sprite_coord_mode); + + trace_dump_struct_end(); +} + + +void trace_dump_poly_stipple(const struct pipe_poly_stipple *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_poly_stipple"); + + trace_dump_member_begin("stipple"); + trace_dump_array(uint, + state->stipple, + Elements(state->stipple)); + trace_dump_member_end(); + + trace_dump_struct_end(); +} + + +void trace_dump_viewport_state(const struct pipe_viewport_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_viewport_state"); + + trace_dump_member_array(float, state, scale); + trace_dump_member_array(float, state, translate); + + trace_dump_struct_end(); +} + + +void trace_dump_scissor_state(const struct pipe_scissor_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_scissor_state"); + + trace_dump_member(uint, state, minx); + trace_dump_member(uint, state, miny); + trace_dump_member(uint, state, maxx); + trace_dump_member(uint, state, maxy); + + trace_dump_struct_end(); +} + + +void trace_dump_clip_state(const struct pipe_clip_state *state) +{ + unsigned i; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_clip_state"); + + trace_dump_member_begin("ucp"); + trace_dump_array_begin(); + for(i = 0; i < PIPE_MAX_CLIP_PLANES; ++i) { + trace_dump_elem_begin(); + trace_dump_array(float, state->ucp[i], 4); + trace_dump_elem_end(); + } + trace_dump_array_end(); + trace_dump_member_end(); + + trace_dump_member(uint, state, nr); + + trace_dump_struct_end(); +} + + +void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_constant_buffer"); + + trace_dump_member(ptr, state, buffer); + trace_dump_member(uint, state, size); + + trace_dump_struct_end(); +} + + +void trace_dump_shader_state(const struct pipe_shader_state *state) +{ + static char str[8192]; + + if(!state) { + trace_dump_null(); + return; + } + + tgsi_dump_str(state->tokens, 0, str, sizeof(str)); + + trace_dump_struct_begin("pipe_shader_state"); + + trace_dump_member_begin("tokens"); + trace_dump_string(str); + trace_dump_member_end(); + + trace_dump_struct_end(); +} + + +void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state) +{ + unsigned i; + + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_depth_stencil_alpha_state"); + + trace_dump_member_begin("depth"); + trace_dump_struct_begin("pipe_depth_state"); + trace_dump_member(bool, &state->depth, enabled); + trace_dump_member(bool, &state->depth, writemask); + trace_dump_member(uint, &state->depth, func); + trace_dump_member(bool, &state->depth, occlusion_count); + trace_dump_struct_end(); + trace_dump_member_end(); + + trace_dump_member_begin("stencil"); + trace_dump_array_begin(); + for(i = 0; i < Elements(state->stencil); ++i) { + trace_dump_elem_begin(); + trace_dump_struct_begin("pipe_stencil_state"); + trace_dump_member(bool, &state->stencil[i], enabled); + trace_dump_member(uint, &state->stencil[i], func); + trace_dump_member(uint, &state->stencil[i], fail_op); + trace_dump_member(uint, &state->stencil[i], zpass_op); + trace_dump_member(uint, &state->stencil[i], zfail_op); + trace_dump_member(uint, &state->stencil[i], ref_value); + trace_dump_member(uint, &state->stencil[i], value_mask); + trace_dump_member(uint, &state->stencil[i], write_mask); + trace_dump_struct_end(); + trace_dump_elem_end(); + } + trace_dump_array_end(); + trace_dump_member_end(); + + trace_dump_member_begin("alpha"); + trace_dump_struct_begin("pipe_alpha_state"); + trace_dump_member(bool, &state->alpha, enabled); + trace_dump_member(uint, &state->alpha, func); + trace_dump_member(float, &state->alpha, ref); + trace_dump_struct_end(); + trace_dump_member_end(); + + trace_dump_struct_end(); +} + + +void trace_dump_blend_state(const struct pipe_blend_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_blend_state"); + + trace_dump_member(bool, state, blend_enable); + + trace_dump_member(uint, state, rgb_func); + trace_dump_member(uint, state, rgb_src_factor); + trace_dump_member(uint, state, rgb_dst_factor); + + trace_dump_member(uint, state, alpha_func); + trace_dump_member(uint, state, alpha_src_factor); + trace_dump_member(uint, state, alpha_dst_factor); + + trace_dump_member(bool, state, logicop_enable); + trace_dump_member(uint, state, logicop_func); + + trace_dump_member(uint, state, colormask); + trace_dump_member(bool, state, dither); + + trace_dump_struct_end(); +} + + +void trace_dump_blend_color(const struct pipe_blend_color *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_blend_color"); + + trace_dump_member_array(float, state, color); + + trace_dump_struct_end(); +} + + +void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state) +{ + trace_dump_struct_begin("pipe_framebuffer_state"); + + trace_dump_member(uint, state, width); + trace_dump_member(uint, state, height); + trace_dump_member(uint, state, num_cbufs); + trace_dump_member_array(ptr, state, cbufs); + trace_dump_member(ptr, state, zsbuf); + + trace_dump_struct_end(); +} + + +void trace_dump_sampler_state(const struct pipe_sampler_state *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_sampler_state"); + + trace_dump_member(uint, state, wrap_s); + trace_dump_member(uint, state, wrap_t); + trace_dump_member(uint, state, wrap_r); + trace_dump_member(uint, state, min_img_filter); + trace_dump_member(uint, state, min_mip_filter); + trace_dump_member(uint, state, mag_img_filter); + trace_dump_member(bool, state, compare_mode); + trace_dump_member(uint, state, compare_func); + trace_dump_member(bool, state, normalized_coords); + trace_dump_member(uint, state, prefilter); + trace_dump_member(float, state, shadow_ambient); + trace_dump_member(float, state, lod_bias); + trace_dump_member(float, state, min_lod); + trace_dump_member(float, state, max_lod); + trace_dump_member_array(float, state, border_color); + trace_dump_member(float, state, max_anisotropy); + + trace_dump_struct_end(); +} + + +void trace_dump_surface(const struct pipe_surface *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_surface"); + + trace_dump_member(ptr, state, buffer); + trace_dump_member(format, state, format); + trace_dump_member(uint, state, status); + trace_dump_member(uint, state, clear_value); + trace_dump_member(uint, state, width); + trace_dump_member(uint, state, height); + + trace_dump_member_begin("block"); + trace_dump_block(&state->block); + trace_dump_member_end(); + + trace_dump_member(uint, state, nblocksx); + trace_dump_member(uint, state, nblocksy); + trace_dump_member(uint, state, stride); + trace_dump_member(uint, state, layout); + trace_dump_member(uint, state, offset); + trace_dump_member(uint, state, refcount); + trace_dump_member(uint, state, usage); + + trace_dump_member(ptr, state, texture); + trace_dump_member(uint, state, face); + trace_dump_member(uint, state, level); + trace_dump_member(uint, state, zslice); + + trace_dump_struct_end(); +} + + +void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_vertex_buffer"); + + trace_dump_member(uint, state, pitch); + trace_dump_member(uint, state, max_index); + trace_dump_member(uint, state, buffer_offset); + trace_dump_member(ptr, state, buffer); + + trace_dump_struct_end(); +} + + +void trace_dump_vertex_element(const struct pipe_vertex_element *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_vertex_element"); + + trace_dump_member(uint, state, src_offset); + + trace_dump_member(uint, state, vertex_buffer_index); + trace_dump_member(uint, state, nr_components); + + trace_dump_member(format, state, src_format); + + trace_dump_struct_end(); +} diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h new file mode 100644 index 0000000000..5ae533dc66 --- /dev/null +++ b/src/gallium/drivers/trace/tr_state.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_STATE_H +#define TR_STATE_H + +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" + + +void trace_dump_format(enum pipe_format format); + +void trace_dump_block(const struct pipe_format_block *block); + +void trace_dump_template(const struct pipe_texture *templat); + + +void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state); + +void trace_dump_poly_stipple(const struct pipe_poly_stipple *state); + +void trace_dump_viewport_state(const struct pipe_viewport_state *state); + +void trace_dump_scissor_state(const struct pipe_scissor_state *state); + +void trace_dump_clip_state(const struct pipe_clip_state *state); + +void trace_dump_constant_buffer(const struct pipe_constant_buffer *state); + +void trace_dump_token(const struct tgsi_token *token); + +void trace_dump_shader_state(const struct pipe_shader_state *state); + +void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_state *state); + +void trace_dump_blend_state(const struct pipe_blend_state *state); + +void trace_dump_blend_color(const struct pipe_blend_color *state); + +void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state); + +void trace_dump_sampler_state(const struct pipe_sampler_state *state); + +void trace_dump_surface(const struct pipe_surface *state); + +void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state); + +void trace_dump_vertex_element(const struct pipe_vertex_element *state); + + +#endif /* TR_STATE_H */ diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c new file mode 100644 index 0000000000..440a78704a --- /dev/null +++ b/src/gallium/drivers/trace/tr_texture.c @@ -0,0 +1,112 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_inlines.h" +#include "util/u_hash_table.h" +#include "util/u_memory.h" + +#include "tr_screen.h" +#include "tr_texture.h" + + +struct pipe_texture * +trace_texture_create(struct trace_screen *tr_scr, + struct pipe_texture *texture) +{ + struct trace_texture *tr_tex; + + if(!texture) + goto error; + + assert(texture->screen == tr_scr->screen); + + tr_tex = CALLOC_STRUCT(trace_texture); + if(!tr_tex) + goto error; + + memcpy(&tr_tex->base, texture, sizeof(struct pipe_texture)); + tr_tex->base.screen = &tr_scr->base; + tr_tex->texture = texture; + + return &tr_tex->base; + +error: + pipe_texture_reference(&texture, NULL); + return NULL; +} + + +void +trace_texture_destroy(struct trace_screen *tr_scr, + struct pipe_texture *texture) +{ + struct trace_texture *tr_tex = trace_texture(tr_scr, texture); + pipe_texture_reference(&tr_tex->texture, NULL); + FREE(tr_tex); +} + + +struct pipe_surface * +trace_surface_create(struct trace_texture *tr_tex, + struct pipe_surface *surface) +{ + struct trace_surface *tr_surf; + + if(!surface) + goto error; + + assert(surface->texture == tr_tex->texture); + + tr_surf = CALLOC_STRUCT(trace_surface); + if(!tr_surf) + goto error; + + memcpy(&tr_surf->base, surface, sizeof(struct pipe_surface)); + + tr_surf->base.winsys = tr_tex->base.screen->winsys; + tr_surf->base.texture = NULL; + pipe_texture_reference(&tr_surf->base.texture, &tr_tex->base); + tr_surf->surface = surface; + + return &tr_surf->base; + +error: + pipe_surface_reference(&surface, NULL); + return NULL; +} + + +void +trace_surface_destroy(struct trace_texture *tr_tex, + struct pipe_surface *surface) +{ + struct trace_surface *tr_surf = trace_surface(tr_tex, surface); + pipe_texture_reference(&tr_surf->base.texture, NULL); + pipe_surface_reference(&tr_surf->surface, NULL); + FREE(tr_surf); +} + diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h new file mode 100644 index 0000000000..9e72edb8a3 --- /dev/null +++ b/src/gallium/drivers/trace/tr_texture.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_TEXTURE_H_ +#define TR_TEXTURE_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + +#include "tr_screen.h" + + +struct trace_texture +{ + struct pipe_texture base; + + struct pipe_texture *texture; +}; + + +struct trace_surface +{ + struct pipe_surface base; + + struct pipe_surface *surface; + + void *map; +}; + + +static INLINE struct trace_texture * +trace_texture(struct trace_screen *tr_scr, + struct pipe_texture *texture) +{ + if(!texture) + return NULL; + assert(texture->screen == &tr_scr->base); + return (struct trace_texture *)texture; +} + + +static INLINE struct trace_surface * +trace_surface(struct trace_texture *tr_tex, + struct pipe_surface *surface) +{ + if(!surface) + return NULL; + assert(surface->texture == &tr_tex->base); + return (struct trace_surface *)surface; +} + + +struct pipe_texture * +trace_texture_create(struct trace_screen *tr_scr, + struct pipe_texture *texture); + +void +trace_texture_destroy(struct trace_screen *tr_scr, + struct pipe_texture *texture); + +struct pipe_surface * +trace_surface_create(struct trace_texture *tr_tex, + struct pipe_surface *surface); + +void +trace_surface_destroy(struct trace_texture *tr_tex, + struct pipe_surface *surface); + + +#endif /* TR_TEXTURE_H_ */ diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c new file mode 100644 index 0000000000..177835854e --- /dev/null +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -0,0 +1,497 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "util/u_hash_table.h" + +#include "tr_dump.h" +#include "tr_state.h" +#include "tr_screen.h" +#include "tr_texture.h" +#include "tr_winsys.h" + + +static unsigned trace_buffer_hash(void *buffer) +{ + return (unsigned)(uintptr_t)buffer; +} + + +static int trace_buffer_compare(void *buffer1, void *buffer2) +{ + return (char *)buffer2 - (char *)buffer1; +} + + +static const char * +trace_winsys_get_name(struct pipe_winsys *_winsys) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + const char *result; + + trace_dump_call_begin("pipe_winsys", "get_name"); + + trace_dump_arg(ptr, winsys); + + result = winsys->get_name(winsys); + + trace_dump_ret(string, result); + + trace_dump_call_end(); + + return result; +} + + +static void +trace_winsys_flush_frontbuffer(struct pipe_winsys *_winsys, + struct pipe_surface *surface, + void *context_private) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + + assert(surface); + if(surface->texture) { + struct trace_screen *tr_scr = trace_screen(surface->texture->screen); + struct trace_texture *tr_tex = trace_texture(tr_scr, surface->texture); + struct trace_surface *tr_surf = trace_surface(tr_tex, surface); + surface = tr_surf->surface; + } + + trace_dump_call_begin("pipe_winsys", "flush_frontbuffer"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, surface); + /* XXX: hide, as there is nothing we can do with this + trace_dump_arg(ptr, context_private); + */ + + winsys->flush_frontbuffer(winsys, surface, context_private); + + trace_dump_call_end(); +} + + +static struct pipe_surface * +trace_winsys_surface_alloc(struct pipe_winsys *_winsys) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_surface *result; + + trace_dump_call_begin("pipe_winsys", "surface_alloc"); + + trace_dump_arg(ptr, winsys); + + result = winsys->surface_alloc(winsys); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + assert(!result || !result->texture); + + return result; +} + + +static int +trace_winsys_surface_alloc_storage(struct pipe_winsys *_winsys, + struct pipe_surface *surface, + unsigned width, unsigned height, + enum pipe_format format, + unsigned flags, + unsigned tex_usage) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + int result; + + assert(surface && !surface->texture); + + trace_dump_call_begin("pipe_winsys", "surface_alloc_storage"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, surface); + trace_dump_arg(uint, width); + trace_dump_arg(uint, height); + trace_dump_arg(format, format); + trace_dump_arg(uint, flags); + trace_dump_arg(uint, tex_usage); + + result = winsys->surface_alloc_storage(winsys, + surface, + width, height, + format, + flags, + tex_usage); + + trace_dump_ret(int, result); + + trace_dump_call_end(); + + return result; +} + + +static void +trace_winsys_surface_release(struct pipe_winsys *_winsys, + struct pipe_surface **psurface) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_surface *surface = *psurface; + + assert(psurface && *psurface && !(*psurface)->texture); + + trace_dump_call_begin("pipe_winsys", "surface_release"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, surface); + + winsys->surface_release(winsys, psurface); + + trace_dump_call_end(); +} + + +static struct pipe_buffer * +trace_winsys_buffer_create(struct pipe_winsys *_winsys, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_buffer *buffer; + + trace_dump_call_begin("pipe_winsys", "buffer_create"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(uint, alignment); + trace_dump_arg(uint, usage); + trace_dump_arg(uint, size); + + buffer = winsys->buffer_create(winsys, alignment, usage, size); + + trace_dump_ret(ptr, buffer); + + trace_dump_call_end(); + + /* Zero the buffer to avoid dumping uninitialized memory */ + if(buffer->usage & PIPE_BUFFER_USAGE_CPU_WRITE) { + void *map; + map = winsys->buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + if(map) { + memset(map, 0, buffer->size); + winsys->buffer_unmap(winsys, buffer); + } + } + + return buffer; +} + + +static struct pipe_buffer * +trace_winsys_user_buffer_create(struct pipe_winsys *_winsys, + void *data, + unsigned size) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_buffer *result; + + trace_dump_call_begin("pipe_winsys", "user_buffer_create"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg_begin("data"); + trace_dump_bytes(data, size); + trace_dump_arg_end(); + trace_dump_arg(uint, size); + + result = winsys->user_buffer_create(winsys, data, size); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + /* XXX: Mark the user buffers. (we should wrap pipe_buffers, but is is + * impossible to do so while texture-less surfaces are still around */ + if(result) { + assert(!(result->usage & TRACE_BUFFER_USAGE_USER)); + result->usage |= TRACE_BUFFER_USAGE_USER; + } + + return result; +} + + +void +trace_winsys_user_buffer_update(struct pipe_winsys *_winsys, + struct pipe_buffer *buffer) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + const void *map; + + if(buffer && buffer->usage & TRACE_BUFFER_USAGE_USER) { + map = winsys->buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_READ); + if(map) { + trace_dump_call_begin("pipe_winsys", "buffer_write"); + + trace_dump_arg(ptr, winsys); + + trace_dump_arg(ptr, buffer); + + trace_dump_arg_begin("data"); + trace_dump_bytes(map, buffer->size); + trace_dump_arg_end(); + + trace_dump_arg_begin("size"); + trace_dump_uint(buffer->size); + trace_dump_arg_end(); + + trace_dump_call_end(); + + winsys->buffer_unmap(winsys, buffer); + } + } +} + + +static void * +trace_winsys_buffer_map(struct pipe_winsys *_winsys, + struct pipe_buffer *buffer, + unsigned usage) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + void *map; + + map = winsys->buffer_map(winsys, buffer, usage); + if(map) { + if(usage & PIPE_BUFFER_USAGE_CPU_WRITE) { + assert(!hash_table_get(tr_ws->buffer_maps, buffer)); + hash_table_set(tr_ws->buffer_maps, buffer, map); + } + } + + return map; +} + + +static void +trace_winsys_buffer_unmap(struct pipe_winsys *_winsys, + struct pipe_buffer *buffer) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + const void *map; + + map = hash_table_get(tr_ws->buffer_maps, buffer); + if(map) { + trace_dump_call_begin("pipe_winsys", "buffer_write"); + + trace_dump_arg(ptr, winsys); + + trace_dump_arg(ptr, buffer); + + trace_dump_arg_begin("data"); + trace_dump_bytes(map, buffer->size); + trace_dump_arg_end(); + + trace_dump_arg_begin("size"); + trace_dump_uint(buffer->size); + trace_dump_arg_end(); + + trace_dump_call_end(); + + hash_table_remove(tr_ws->buffer_maps, buffer); + } + + winsys->buffer_unmap(winsys, buffer); +} + + +static void +trace_winsys_buffer_destroy(struct pipe_winsys *_winsys, + struct pipe_buffer *buffer) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + + trace_dump_call_begin("pipe_winsys", "buffer_destroy"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, buffer); + + winsys->buffer_destroy(winsys, buffer); + + trace_dump_call_end(); +} + + +static void +trace_winsys_fence_reference(struct pipe_winsys *_winsys, + struct pipe_fence_handle **pdst, + struct pipe_fence_handle *src) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + struct pipe_fence_handle *dst = *pdst; + + trace_dump_call_begin("pipe_winsys", "fence_reference"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, dst); + trace_dump_arg(ptr, src); + + winsys->fence_reference(winsys, pdst, src); + + trace_dump_call_end(); +} + + +static int +trace_winsys_fence_signalled(struct pipe_winsys *_winsys, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + int result; + + trace_dump_call_begin("pipe_winsys", "fence_signalled"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, fence); + trace_dump_arg(uint, flag); + + result = winsys->fence_signalled(winsys, fence, flag); + + trace_dump_ret(int, result); + + trace_dump_call_end(); + + return result; +} + + +static int +trace_winsys_fence_finish(struct pipe_winsys *_winsys, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + int result; + + trace_dump_call_begin("pipe_winsys", "fence_finish"); + + trace_dump_arg(ptr, winsys); + trace_dump_arg(ptr, fence); + trace_dump_arg(uint, flag); + + result = winsys->fence_finish(winsys, fence, flag); + + trace_dump_ret(int, result); + + trace_dump_call_end(); + + return result; +} + + +static void +trace_winsys_destroy(struct pipe_winsys *_winsys) +{ + struct trace_winsys *tr_ws = trace_winsys(_winsys); + struct pipe_winsys *winsys = tr_ws->winsys; + + trace_dump_call_begin("pipe_winsys", "destroy"); + + trace_dump_arg(ptr, winsys); + + /* + winsys->destroy(winsys); + */ + + trace_dump_call_end(); + + hash_table_destroy(tr_ws->buffer_maps); + + FREE(tr_ws); +} + + +struct pipe_winsys * +trace_winsys_create(struct pipe_winsys *winsys) +{ + struct trace_winsys *tr_ws; + + if(!winsys) + goto error1; + + tr_ws = CALLOC_STRUCT(trace_winsys); + if(!tr_ws) + goto error1; + + tr_ws->base.destroy = trace_winsys_destroy; + tr_ws->base.get_name = trace_winsys_get_name; + tr_ws->base.flush_frontbuffer = trace_winsys_flush_frontbuffer; + tr_ws->base.surface_alloc = trace_winsys_surface_alloc; + tr_ws->base.surface_alloc_storage = trace_winsys_surface_alloc_storage; + tr_ws->base.surface_release = trace_winsys_surface_release; + tr_ws->base.buffer_create = trace_winsys_buffer_create; + tr_ws->base.user_buffer_create = trace_winsys_user_buffer_create; + tr_ws->base.buffer_map = trace_winsys_buffer_map; + tr_ws->base.buffer_unmap = trace_winsys_buffer_unmap; + tr_ws->base.buffer_destroy = trace_winsys_buffer_destroy; + tr_ws->base.fence_reference = trace_winsys_fence_reference; + tr_ws->base.fence_signalled = trace_winsys_fence_signalled; + tr_ws->base.fence_finish = trace_winsys_fence_finish; + + tr_ws->winsys = winsys; + + tr_ws->buffer_maps = hash_table_create(trace_buffer_hash, + trace_buffer_compare); + if(!tr_ws->buffer_maps) + goto error2; + + trace_dump_call_begin("", "pipe_winsys_create"); + trace_dump_ret(ptr, winsys); + trace_dump_call_end(); + + return &tr_ws->base; + +error2: + FREE(tr_ws); +error1: + return winsys; +} diff --git a/src/gallium/drivers/trace/tr_winsys.h b/src/gallium/drivers/trace/tr_winsys.h new file mode 100644 index 0000000000..062ddf66a0 --- /dev/null +++ b/src/gallium/drivers/trace/tr_winsys.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TR_WINSYS_H_ +#define TR_WINSYS_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_winsys.h" + + +/** + * It often happens that new data is written directly to the user buffers + * without mapping/unmapping. This flag marks user buffers, so that their + * contents can be dumpped before being used by the pipe context. + */ +#define TRACE_BUFFER_USAGE_USER (1 << 31) + + +struct hash_table; + + +struct trace_winsys +{ + struct pipe_winsys base; + + struct pipe_winsys *winsys; + + struct hash_table *buffer_maps; +}; + + +static INLINE struct trace_winsys * +trace_winsys(struct pipe_winsys *winsys) +{ + assert(winsys); + return (struct trace_winsys *)winsys; +} + + + +struct pipe_winsys * +trace_winsys_create(struct pipe_winsys *winsys); + + +void +trace_winsys_user_buffer_update(struct pipe_winsys *winsys, + struct pipe_buffer *buffer); + + +#endif /* TR_WINSYS_H_ */ diff --git a/src/gallium/drivers/trace/trace.xsl b/src/gallium/drivers/trace/trace.xsl new file mode 100644 index 0000000000..9cd621e7ab --- /dev/null +++ b/src/gallium/drivers/trace/trace.xsl @@ -0,0 +1,185 @@ +<?xml version="1.0"?> + +<!-- + +Copyright 2008 Tungsten Graphics, Inc. + +This program is free software: you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. + +!--> + +<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> + + <xsl:output method="html" /> + + <xsl:strip-space elements="*" /> + + <xsl:template match="/trace"> + <html> + <head> + <title>Gallium Trace</title> + </head> + <style> + body { + font-family: verdana, sans-serif; + font-size: 11px; + font-weight: normal; + text-align : left; + } + + .fun { + font-weight: bold; + } + + .var { + font-style: italic; + } + + .typ { + display: none; + } + + .lit { + color: #0000ff; + } + + .ptr { + color: #008000; + } + </style> + <body> + <ol class="calls"> + <xsl:apply-templates/> + </ol> + </body> + </html> + </xsl:template> + + <xsl:template match="call"> + <li> + <span class="fun"> + <xsl:value-of select="@class"/> + <xsl:text>::</xsl:text> + <xsl:value-of select="@method"/> + </span> + <xsl:text>(</xsl:text> + <xsl:apply-templates select="arg"/> + <xsl:text>)</xsl:text> + <xsl:apply-templates select="ret"/> + </li> + </xsl:template> + + <xsl:template match="arg|member"> + <xsl:apply-templates select="@name"/> + <xsl:text> = </xsl:text> + <xsl:apply-templates /> + <xsl:if test="position() != last()"> + <xsl:text>, </xsl:text> + </xsl:if> + </xsl:template> + + <xsl:template match="ret"> + <xsl:text> = </xsl:text> + <xsl:apply-templates /> + </xsl:template> + + <xsl:template match="bool|int|uint|float|enum"> + <span class="lit"> + <xsl:value-of select="text()"/> + </span> + </xsl:template> + + <xsl:template match="bytes"> + <span class="lit"> + <xsl:text>...</xsl:text> + </span> + </xsl:template> + + <xsl:template match="string"> + <span class="lit"> + <xsl:text>"</xsl:text> + <xsl:call-template name="break"> + <xsl:with-param name="text" select="text()"/> + </xsl:call-template> + <xsl:text>"</xsl:text> + </span> + </xsl:template> + + <xsl:template match="array|struct"> + <xsl:text>{</xsl:text> + <xsl:apply-templates /> + <xsl:text>}</xsl:text> + </xsl:template> + + <xsl:template match="elem"> + <xsl:apply-templates /> + <xsl:if test="position() != last()"> + <xsl:text>, </xsl:text> + </xsl:if> + </xsl:template> + + <xsl:template match="null"> + <span class="ptr"> + <xsl:text>NULL</xsl:text> + </span> + </xsl:template> + + <xsl:template match="ptr"> + <span class="ptr"> + <xsl:value-of select="text()"/> + </span> + </xsl:template> + + <xsl:template match="@name"> + <span class="var"> + <xsl:value-of select="."/> + </span> + </xsl:template> + + <xsl:template name="break"> + <xsl:param name="text" select="."/> + <xsl:choose> + <xsl:when test="contains($text, '
')"> + <xsl:value-of select="substring-before($text, '
')"/> + <br/> + <xsl:call-template name="break"> + <xsl:with-param name="text" select="substring-after($text, '
')"/> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$text"/> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template name="replace"> + <xsl:param name="text"/> + <xsl:param name="from"/> + <xsl:param name="to"/> + <xsl:choose> + <xsl:when test="contains($text,$from)"> + <xsl:value-of select="concat(substring-before($text,$from),$to)"/> + <xsl:call-template name="replace"> + <xsl:with-param name="text" select="substring-after($text,$from)"/> + <xsl:with-param name="from" select="$from"/> + <xsl:with-param name="to" select="$to"/> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <xsl:value-of select="$text"/> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + +</xsl:transform> diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h new file mode 100644 index 0000000000..7bcebd3d6b --- /dev/null +++ b/src/gallium/include/pipe/p_compiler.h @@ -0,0 +1,157 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef P_COMPILER_H +#define P_COMPILER_H + + +#include "p_config.h" + +#ifndef XFree86Server +#include <stdlib.h> +#include <string.h> +#else +#include "xf86_ansic.h" +#include "xf86_libc.h" +#endif + + +#if defined(_WIN32) && !defined(__WIN32__) +#define __WIN32__ +#endif + +#if defined(_MSC_VER) + +/* Avoid 'expression is always true' warning */ +#pragma warning(disable: 4296) + +#endif /* _MSC_VER */ + + +#if defined(_MSC_VER) + +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +#ifndef __eglplatform_h_ +typedef __int32 int32_t; +#endif +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; + +#if defined(_WIN64) +typedef __int64 intptr_t; +typedef unsigned __int64 uintptr_t; +#else +typedef __int32 intptr_t; +typedef unsigned __int32 uintptr_t; +#endif + +#define INT64_C(__val) __val##i64 +#define UINT64_C(__val) __val##ui64 + +#ifndef __cplusplus +#define false 0 +#define true 1 +#define bool _Bool +typedef int _Bool; +#define __bool_true_false_are_defined 1 +#endif /* !__cplusplus */ + +#else +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif +#include <stdint.h> +#include <stdbool.h> +#endif + + +typedef unsigned int uint; +typedef unsigned char ubyte; +typedef unsigned short ushort; +typedef uint64_t uint64; + +#if 0 +#define boolean bool +#else +typedef unsigned char boolean; +#endif +#ifndef TRUE +#define TRUE true +#endif +#ifndef FALSE +#define FALSE false +#endif + + +/* Function inlining */ +#ifdef __cplusplus +# define INLINE inline +#elif defined(__GNUC__) +# define INLINE __inline__ +#elif defined(_MSC_VER) +# define INLINE __inline +#elif defined(__ICL) +# define INLINE __inline +#elif defined(__INTEL_COMPILER) +# define INLINE inline +#elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) +# define INLINE __inline +#else +# define INLINE +#endif + + +/* This should match linux gcc cdecl semantics everywhere, so that we + * just codegen one calling convention on all platforms. + */ +#ifdef _MSC_VER +#define PIPE_CDECL __cdecl +#else +#define PIPE_CDECL +#endif + + + +#if defined(__GNUC__) +#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) )) +#define ALIGN16_ASSIGN(NAME) NAME##___aligned +#define ALIGN16_ATTRIB __attribute__(( aligned( 16 ) )) +#define ALIGN8_ATTRIB __attribute__(( aligned( 8 ) )) +#else +#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___unaligned[SIZE + 1] +#define ALIGN16_ASSIGN(NAME) align16(NAME##___unaligned) +#define ALIGN16_ATTRIB +#define ALIGN8_ATTRIB +#endif + + + +#endif /* P_COMPILER_H */ diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h new file mode 100644 index 0000000000..05cbd2fc4d --- /dev/null +++ b/src/gallium/include/pipe/p_config.h @@ -0,0 +1,148 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Gallium configuration defines. + * + * This header file sets several defines based on the compiler, processor + * architecture, and operating system being used. These defines should be used + * throughout the code to facilitate porting to new platforms. It is likely that + * this file is auto-generated by an autoconf-like tool at some point, as some + * things cannot be determined by pre-defined environment alone. + * + * See also: + * - http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html + * - echo | gcc -dM -E - | sort + * - http://msdn.microsoft.com/en-us/library/b0084kay.aspx + * + * @author José Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef P_CONFIG_H_ +#define P_CONFIG_H_ + + +/* + * Compiler + */ + +#if defined(__GNUC__) +#define PIPE_CC_GCC +#endif + +/* + * Meaning of _MSC_VER value: + * - 1400: Visual C++ 2005 + * - 1310: Visual C++ .NET 2003 + * - 1300: Visual C++ .NET 2002 + * + * __MSC__ seems to be an old macro -- it is not pre-defined on recent MSVC + * versions. + */ +#if defined(_MSC_VER) || defined(__MSC__) +#define PIPE_CC_MSVC +#endif + +#if defined(__ICL) +#define PIPE_CC_ICL +#endif + + +/* + * Processor architecture + */ + +#if defined(__i386__) /* gcc */ || defined(_M_IX86) /* msvc */ || defined(_X86_) || defined(__386__) || defined(i386) +#define PIPE_ARCH_X86 +#endif + +#if defined(__x86_64__) /* gcc */ || defined(_M_X64) /* msvc */ || defined(_M_AMD64) /* msvc */ +#define PIPE_ARCH_X86_64 +#endif + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#if defined(PIPE_CC_GCC) && !defined(__SSE2__) +/* #warning SSE2 support requires -msse -msse2 compiler options */ +#else +#define PIPE_ARCH_SSE +#endif +#endif + +#if defined(__PPC__) +#define PIPE_ARCH_PPC +#if defined(__PPC64__) +#define PIPE_ARCH_PPC_64 +#endif +#endif + + +/* + * Operating system family. + * + * See subsystem below for a more fine-grained distinction. + */ + +#if defined(__linux__) +#define PIPE_OS_LINUX +#endif + +#if defined(_WIN32) || defined(WIN32) +#define PIPE_OS_WINDOWS +#endif + + +/* + * Subsystem. + * + * NOTE: There is no way to auto-detect most of these. + */ + +#if defined(PIPE_OS_LINUX) +#define PIPE_SUBSYSTEM_DRI +#endif /* PIPE_OS_LINUX */ + +#if defined(PIPE_OS_WINDOWS) +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) +/* Windows 2000/XP Display Driver */ +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +/* Windows 2000/XP Miniport Driver */ +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) +/* Windows User-space Library */ +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) +/* Windows CE 5.0/6.0 */ +#else +#ifdef _WIN32_WCE +#define PIPE_SUBSYSTEM_WINDOWS_CE +#else /* !_WIN32_WCE */ +#error No PIPE_SUBSYSTEM_WINDOWS_xxx subsystem defined. +#endif /* !_WIN32_WCE */ +#endif +#endif /* PIPE_OS_WINDOWS */ + + +#endif /* P_CONFIG_H_ */ diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h new file mode 100644 index 0000000000..2646706ff2 --- /dev/null +++ b/src/gallium/include/pipe/p_context.h @@ -0,0 +1,226 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef PIPE_CONTEXT_H +#define PIPE_CONTEXT_H + +#include "p_state.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pipe_screen; +struct pipe_fence_handle; +struct pipe_state_cache; +struct pipe_query; + + +/** + * Gallium rendering context. Basically: + * - state setting functions + * - VBO drawing functions + * - surface functions + */ +struct pipe_context { + struct pipe_winsys *winsys; + struct pipe_screen *screen; + + void *priv; /**< context private data (for DRI for example) */ + void *draw; /**< private, for draw module (temporary?) */ + + void (*destroy)( struct pipe_context * ); + + + /* Possible interface for setting edgeflags. These aren't really + * vertex elements, so don't fit there. + */ + void (*set_edgeflags)( struct pipe_context *, + const unsigned *bitfield ); + + + /** + * VBO drawing (return false on fallbacks (temporary??)) + */ + /*@{*/ + boolean (*draw_arrays)( struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count); + + boolean (*draw_elements)( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); + + /* XXX: this is (probably) a temporary entrypoint, as the range + * information should be available from the vertex_buffer state. + * Using this to quickly evaluate a specialized path in the draw + * module. + */ + boolean (*draw_range_elements)( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); + /*@}*/ + + + /** + * Query objects + */ + /*@{*/ + struct pipe_query *(*create_query)( struct pipe_context *pipe, + unsigned query_type ); + + void (*destroy_query)(struct pipe_context *pipe, + struct pipe_query *q); + + void (*begin_query)(struct pipe_context *pipe, struct pipe_query *q); + void (*end_query)(struct pipe_context *pipe, struct pipe_query *q); + + boolean (*get_query_result)(struct pipe_context *pipe, + struct pipe_query *q, + boolean wait, + uint64 *result); + /*@}*/ + + /** + * State functions (create/bind/destroy state objects) + */ + /*@{*/ + void * (*create_blend_state)(struct pipe_context *, + const struct pipe_blend_state *); + void (*bind_blend_state)(struct pipe_context *, void *); + void (*delete_blend_state)(struct pipe_context *, void *); + + void * (*create_sampler_state)(struct pipe_context *, + const struct pipe_sampler_state *); + void (*bind_sampler_states)(struct pipe_context *, unsigned num, void **); + void (*delete_sampler_state)(struct pipe_context *, void *); + + void * (*create_rasterizer_state)(struct pipe_context *, + const struct pipe_rasterizer_state *); + void (*bind_rasterizer_state)(struct pipe_context *, void *); + void (*delete_rasterizer_state)(struct pipe_context *, void *); + + void * (*create_depth_stencil_alpha_state)(struct pipe_context *, + const struct pipe_depth_stencil_alpha_state *); + void (*bind_depth_stencil_alpha_state)(struct pipe_context *, void *); + void (*delete_depth_stencil_alpha_state)(struct pipe_context *, void *); + + void * (*create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*bind_fs_state)(struct pipe_context *, void *); + void (*delete_fs_state)(struct pipe_context *, void *); + + void * (*create_vs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*bind_vs_state)(struct pipe_context *, void *); + void (*delete_vs_state)(struct pipe_context *, void *); + /*@}*/ + + /** + * Parameter-like state (or properties) + */ + /*@{*/ + void (*set_blend_color)( struct pipe_context *, + const struct pipe_blend_color * ); + + void (*set_clip_state)( struct pipe_context *, + const struct pipe_clip_state * ); + + void (*set_constant_buffer)( struct pipe_context *, + uint shader, uint index, + const struct pipe_constant_buffer *buf ); + + void (*set_framebuffer_state)( struct pipe_context *, + const struct pipe_framebuffer_state * ); + + void (*set_polygon_stipple)( struct pipe_context *, + const struct pipe_poly_stipple * ); + + void (*set_scissor_state)( struct pipe_context *, + const struct pipe_scissor_state * ); + + void (*set_viewport_state)( struct pipe_context *, + const struct pipe_viewport_state * ); + + void (*set_sampler_textures)( struct pipe_context *, + unsigned num_textures, + struct pipe_texture ** ); + + void (*set_vertex_buffers)( struct pipe_context *, + unsigned num_buffers, + const struct pipe_vertex_buffer * ); + + void (*set_vertex_elements)( struct pipe_context *, + unsigned num_elements, + const struct pipe_vertex_element * ); + /*@}*/ + + + /** + * Surface functions + */ + /*@{*/ + void (*surface_copy)(struct pipe_context *pipe, + boolean do_flip,/**< flip surface contents vertically */ + struct pipe_surface *dest, + unsigned destx, unsigned desty, + struct pipe_surface *src, /* don't make this const - + need to map/unmap */ + unsigned srcx, unsigned srcy, + unsigned width, unsigned height); + + void (*surface_fill)(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, + unsigned value); + + void (*clear)(struct pipe_context *pipe, + struct pipe_surface *ps, + unsigned clearValue); + /*@}*/ + + + /** Flush rendering (flags = bitmask of PIPE_FLUSH_x tokens) */ + void (*flush)( struct pipe_context *pipe, + unsigned flags, + struct pipe_fence_handle **fence ); +}; + + +#ifdef __cplusplus +} +#endif + +#endif /* PIPE_CONTEXT_H */ diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h new file mode 100644 index 0000000000..cb6196aa9f --- /dev/null +++ b/src/gallium/include/pipe/p_debug.h @@ -0,0 +1,356 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Cross-platform debugging helpers. + * + * For now it just has assert and printf replacements, but it might be extended + * with stack trace reports and more advanced logging in the near future. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef P_DEBUG_H_ +#define P_DEBUG_H_ + + +#include <stdarg.h> + +#include "p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifdef DBG +#ifndef DEBUG +#define DEBUG 1 +#endif +#else +#ifndef NDEBUG +#define NDEBUG 1 +#endif +#endif + + +/* MSVC bebore VC7 does not have the __FUNCTION__ macro */ +#if defined(_MSC_VER) && _MSC_VER < 1300 +#define __FUNCTION__ "???" +#endif + + +void _debug_vprintf(const char *format, va_list ap); + + +static INLINE void +_debug_printf(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + _debug_vprintf(format, ap); + va_end(ap); +} + + +/** + * Print debug messages. + * + * The actual channel used to output debug message is platform specific. To + * avoid misformating or truncation, follow these rules of thumb: + * - output whole lines + * - avoid outputing large strings (512 bytes is the current maximum length + * that is guaranteed to be printed in all platforms) + */ +static INLINE void +debug_printf(const char *format, ...) +{ +#ifdef DEBUG + va_list ap; + va_start(ap, format); + _debug_vprintf(format, ap); + va_end(ap); +#else + (void) format; /* silence warning */ +#endif +} + + +#ifdef DEBUG +#define debug_vprintf(_format, _ap) _debug_vprintf(_format, _ap) +#else +#define debug_vprintf(_format, _ap) ((void)0) +#endif + + +#ifdef DEBUG +/** + * Dump a blob in hex to the same place that debug_printf sends its + * messages. + */ +void debug_print_blob( const char *name, const void *blob, unsigned size ); + +/* Print a message along with a prettified format string + */ +void debug_print_format(const char *msg, unsigned fmt ); +#else +#define debug_print_blob(_name, _blob, _size) ((void)0) +#define debug_print_format(_msg, _fmt) ((void)0) +#endif + + +void _debug_break(void); + + +/** + * Hard-coded breakpoint. + */ +#ifdef DEBUG +#if defined(PIPE_ARCH_X86) && defined(PIPE_CC_GCC) +#define debug_break() __asm("int3") +#elif defined(PIPE_ARCH_X86) && defined(PIPE_CC_MSVC) +#define debug_break() do { _asm {int 3} } while(0) +#else +#define debug_break() _debug_break() +#endif +#else /* !DEBUG */ +#define debug_break() ((void)0) +#endif /* !DEBUG */ + + +long +debug_get_num_option(const char *name, long dfault); + +void _debug_assert_fail(const char *expr, + const char *file, + unsigned line, + const char *function); + + +/** + * Assert macro + * + * Do not expect that the assert call terminates -- errors must be handled + * regardless of assert behavior. + */ +#ifdef DEBUG +#define debug_assert(expr) ((expr) ? (void)0 : _debug_assert_fail(#expr, __FILE__, __LINE__, __FUNCTION__)) +#else +#define debug_assert(expr) ((void)0) +#endif + + +/** Override standard assert macro */ +#ifdef assert +#undef assert +#endif +#define assert(expr) debug_assert(expr) + + +/** + * Output the current function name. + */ +#ifdef DEBUG +#define debug_checkpoint() \ + _debug_printf("%s\n", __FUNCTION__) +#else +#define debug_checkpoint() \ + ((void)0) +#endif + + +/** + * Output the full source code position. + */ +#ifdef DEBUG +#define debug_checkpoint_full() \ + _debug_printf("%s:%u:%s", __FILE__, __LINE__, __FUNCTION__) +#else +#define debug_checkpoint_full() \ + ((void)0) +#endif + + +/** + * Output a warning message. Muted on release version. + */ +#ifdef DEBUG +#define debug_warning(__msg) \ + _debug_printf("%s:%u:%s: warning: %s\n", __FILE__, __LINE__, __FUNCTION__, __msg) +#else +#define debug_warning(__msg) \ + ((void)0) +#endif + + +/** + * Output an error message. Not muted on release version. + */ +#ifdef DEBUG +#define debug_error(__msg) \ + _debug_printf("%s:%u:%s: error: %s\n", __FILE__, __LINE__, __FUNCTION__, __msg) +#else +#define debug_error(__msg) \ + _debug_printf("error: %s\n", __msg) +#endif + + +/** + * Used by debug_dump_enum and debug_dump_flags to describe symbols. + */ +struct debug_named_value +{ + const char *name; + unsigned long value; +}; + + +/** + * Some C pre-processor magic to simplify creating named values. + * + * Example: + * @code + * static const debug_named_value my_names[] = { + * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_X), + * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_Y), + * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_Z), + * DEBUG_NAMED_VALUE_END + * }; + * + * ... + * debug_printf("%s = %s\n", + * name, + * debug_dump_enum(my_names, my_value)); + * ... + * @endcode + */ +#define DEBUG_NAMED_VALUE(__symbol) {#__symbol, (unsigned long)__symbol} +#define DEBUG_NAMED_VALUE_END {NULL, 0} + + +/** + * Convert a enum value to a string. + */ +const char * +debug_dump_enum(const struct debug_named_value *names, + unsigned long value); + + +/** + * Convert binary flags value to a string. + */ +const char * +debug_dump_flags(const struct debug_named_value *names, + unsigned long value); + + +/** + * Get option. + * + * It is an alias for getenv on Linux. + * + * On Windows it reads C:\gallium.cfg, which is a text file with CR+LF line + * endings with one option per line as + * + * NAME=value + * + * This file must be terminated with an extra empty line. + */ +const char * +debug_get_option(const char *name, const char *dfault); + +boolean +debug_get_bool_option(const char *name, boolean dfault); + +long +debug_get_num_option(const char *name, long dfault); + +unsigned long +debug_get_flags_option(const char *name, + const struct debug_named_value *flags, + unsigned long dfault); + + +void * +debug_malloc(const char *file, unsigned line, const char *function, + size_t size); + +void +debug_free(const char *file, unsigned line, const char *function, + void *ptr); + +void * +debug_calloc(const char *file, unsigned line, const char *function, + size_t count, size_t size ); + +void * +debug_realloc(const char *file, unsigned line, const char *function, + void *old_ptr, size_t old_size, size_t new_size ); + +unsigned long +debug_memory_begin(void); + +void +debug_memory_end(unsigned long beginning); + + +#if defined(PROFILE) && defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +void +debug_profile_start(void); + +void +debug_profile_stop(void); + +#endif + + +#ifdef DEBUG +struct pipe_surface; +void debug_dump_image(const char *prefix, + unsigned format, unsigned cpp, + unsigned width, unsigned height, + unsigned stride, + const void *data); +void debug_dump_surface(const char *prefix, + struct pipe_surface *surface); +void debug_dump_surface_bmp(const char *filename, + struct pipe_surface *surface); +#else +#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0) +#define debug_dump_surface(prefix, surface) ((void)0) +#define debug_dump_surface_bmp(filename, surface) ((void)0) +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* P_DEBUG_H_ */ diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h new file mode 100644 index 0000000000..cda10a2f06 --- /dev/null +++ b/src/gallium/include/pipe/p_defines.h @@ -0,0 +1,294 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef PIPE_DEFINES_H +#define PIPE_DEFINES_H + +#include "p_format.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define PIPE_BLENDFACTOR_ONE 0x1 +#define PIPE_BLENDFACTOR_SRC_COLOR 0x2 +#define PIPE_BLENDFACTOR_SRC_ALPHA 0x3 +#define PIPE_BLENDFACTOR_DST_ALPHA 0x4 +#define PIPE_BLENDFACTOR_DST_COLOR 0x5 +#define PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define PIPE_BLENDFACTOR_CONST_COLOR 0x7 +#define PIPE_BLENDFACTOR_CONST_ALPHA 0x8 +#define PIPE_BLENDFACTOR_SRC1_COLOR 0x9 +#define PIPE_BLENDFACTOR_SRC1_ALPHA 0x0A +#define PIPE_BLENDFACTOR_ZERO 0x11 +#define PIPE_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define PIPE_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define PIPE_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define PIPE_BLENDFACTOR_INV_DST_COLOR 0x15 +#define PIPE_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define PIPE_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define PIPE_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define PIPE_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define PIPE_BLEND_ADD 0 +#define PIPE_BLEND_SUBTRACT 1 +#define PIPE_BLEND_REVERSE_SUBTRACT 2 +#define PIPE_BLEND_MIN 3 +#define PIPE_BLEND_MAX 4 + +#define PIPE_LOGICOP_CLEAR 0 +#define PIPE_LOGICOP_NOR 1 +#define PIPE_LOGICOP_AND_INVERTED 2 +#define PIPE_LOGICOP_COPY_INVERTED 3 +#define PIPE_LOGICOP_AND_REVERSE 4 +#define PIPE_LOGICOP_INVERT 5 +#define PIPE_LOGICOP_XOR 6 +#define PIPE_LOGICOP_NAND 7 +#define PIPE_LOGICOP_AND 8 +#define PIPE_LOGICOP_EQUIV 9 +#define PIPE_LOGICOP_NOOP 10 +#define PIPE_LOGICOP_OR_INVERTED 11 +#define PIPE_LOGICOP_COPY 12 +#define PIPE_LOGICOP_OR_REVERSE 13 +#define PIPE_LOGICOP_OR 14 +#define PIPE_LOGICOP_SET 15 + +#define PIPE_MASK_R 0x1 +#define PIPE_MASK_G 0x2 +#define PIPE_MASK_B 0x4 +#define PIPE_MASK_A 0x8 +#define PIPE_MASK_RGBA 0xf + + +/** + * Inequality functions. Used for depth test, stencil compare, alpha + * test, shadow compare, etc. + */ +#define PIPE_FUNC_NEVER 0 +#define PIPE_FUNC_LESS 1 +#define PIPE_FUNC_EQUAL 2 +#define PIPE_FUNC_LEQUAL 3 +#define PIPE_FUNC_GREATER 4 +#define PIPE_FUNC_NOTEQUAL 5 +#define PIPE_FUNC_GEQUAL 6 +#define PIPE_FUNC_ALWAYS 7 + +/** Polygon fill mode */ +#define PIPE_POLYGON_MODE_FILL 0 +#define PIPE_POLYGON_MODE_LINE 1 +#define PIPE_POLYGON_MODE_POINT 2 + +/** Polygon front/back window, also for culling */ +#define PIPE_WINDING_NONE 0 +#define PIPE_WINDING_CW 1 +#define PIPE_WINDING_CCW 2 +#define PIPE_WINDING_BOTH (PIPE_WINDING_CW | PIPE_WINDING_CCW) + +/** Stencil ops */ +#define PIPE_STENCIL_OP_KEEP 0 +#define PIPE_STENCIL_OP_ZERO 1 +#define PIPE_STENCIL_OP_REPLACE 2 +#define PIPE_STENCIL_OP_INCR 3 +#define PIPE_STENCIL_OP_DECR 4 +#define PIPE_STENCIL_OP_INCR_WRAP 5 +#define PIPE_STENCIL_OP_DECR_WRAP 6 +#define PIPE_STENCIL_OP_INVERT 7 + +/** Texture types */ +enum pipe_texture_target { + PIPE_TEXTURE_1D = 0, + PIPE_TEXTURE_2D = 1, + PIPE_TEXTURE_3D = 2, + PIPE_TEXTURE_CUBE = 3 +}; + +#define PIPE_TEX_FACE_POS_X 0 +#define PIPE_TEX_FACE_NEG_X 1 +#define PIPE_TEX_FACE_POS_Y 2 +#define PIPE_TEX_FACE_NEG_Y 3 +#define PIPE_TEX_FACE_POS_Z 4 +#define PIPE_TEX_FACE_NEG_Z 5 + +#define PIPE_TEX_WRAP_REPEAT 0 +#define PIPE_TEX_WRAP_CLAMP 1 +#define PIPE_TEX_WRAP_CLAMP_TO_EDGE 2 +#define PIPE_TEX_WRAP_CLAMP_TO_BORDER 3 +#define PIPE_TEX_WRAP_MIRROR_REPEAT 4 +#define PIPE_TEX_WRAP_MIRROR_CLAMP 5 +#define PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE 6 +#define PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER 7 + +/* Between mipmaps, ie mipfilter + */ +#define PIPE_TEX_MIPFILTER_NEAREST 0 +#define PIPE_TEX_MIPFILTER_LINEAR 1 +#define PIPE_TEX_MIPFILTER_NONE 2 + +/* Within a mipmap, ie min/mag filter + */ +#define PIPE_TEX_FILTER_NEAREST 0 +#define PIPE_TEX_FILTER_LINEAR 1 +#define PIPE_TEX_FILTER_ANISO 2 + + +#define PIPE_TEX_COMPARE_NONE 0 +#define PIPE_TEX_COMPARE_R_TO_TEXTURE 1 + +#define PIPE_TEX_FACE_POS_X 0 +#define PIPE_TEX_FACE_NEG_X 1 +#define PIPE_TEX_FACE_POS_Y 2 +#define PIPE_TEX_FACE_NEG_Y 3 +#define PIPE_TEX_FACE_POS_Z 4 +#define PIPE_TEX_FACE_NEG_Z 5 +#define PIPE_TEX_FACE_MAX 6 + +#define PIPE_TEXTURE_USAGE_RENDER_TARGET 0x1 +#define PIPE_TEXTURE_USAGE_DISPLAY_TARGET 0x2 /* ie a backbuffer */ +#define PIPE_TEXTURE_USAGE_PRIMARY 0x4 /* ie a frontbuffer */ +#define PIPE_TEXTURE_USAGE_DEPTH_STENCIL 0x8 +#define PIPE_TEXTURE_USAGE_SAMPLER 0x10 + +#define PIPE_TEXTURE_GEOM_NON_SQUARE 0x1 +#define PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO 0x2 + + +/** + * Surface layout + */ +#define PIPE_SURFACE_LAYOUT_LINEAR 0 + + +/** + * Surface status + */ +#define PIPE_SURFACE_STATUS_UNDEFINED 0 +#define PIPE_SURFACE_STATUS_DEFINED 1 +#define PIPE_SURFACE_STATUS_CLEAR 2 + + +/** + * Buffer usage flags + */ +#define PIPE_BUFFER_USAGE_CPU_READ (1 << 0) +#define PIPE_BUFFER_USAGE_CPU_WRITE (1 << 1) +#define PIPE_BUFFER_USAGE_GPU_READ (1 << 2) +#define PIPE_BUFFER_USAGE_GPU_WRITE (1 << 3) +#define PIPE_BUFFER_USAGE_PIXEL (1 << 4) +#define PIPE_BUFFER_USAGE_VERTEX (1 << 5) +#define PIPE_BUFFER_USAGE_INDEX (1 << 6) +#define PIPE_BUFFER_USAGE_CONSTANT (1 << 7) +/** Pipe driver custom usage flags should be greater or equal to this value */ +#define PIPE_BUFFER_USAGE_CUSTOM (1 << 16) + + +/** + * Flush types: + */ +#define PIPE_FLUSH_RENDER_CACHE 0x1 +#define PIPE_FLUSH_TEXTURE_CACHE 0x2 +#define PIPE_FLUSH_SWAPBUFFERS 0x4 +#define PIPE_FLUSH_FRAME 0x8 /**< Mark the end of a frame */ + + +/** + * Shaders + */ +#define PIPE_SHADER_VERTEX 0 +#define PIPE_SHADER_FRAGMENT 1 +#define PIPE_SHADER_TYPES 2 + + +/** + * Primitive types: + */ +#define PIPE_PRIM_POINTS 0 +#define PIPE_PRIM_LINES 1 +#define PIPE_PRIM_LINE_LOOP 2 +#define PIPE_PRIM_LINE_STRIP 3 +#define PIPE_PRIM_TRIANGLES 4 +#define PIPE_PRIM_TRIANGLE_STRIP 5 +#define PIPE_PRIM_TRIANGLE_FAN 6 +#define PIPE_PRIM_QUADS 7 +#define PIPE_PRIM_QUAD_STRIP 8 +#define PIPE_PRIM_POLYGON 9 + + +/** + * Query object types + */ +#define PIPE_QUERY_OCCLUSION_COUNTER 0 +#define PIPE_QUERY_PRIMITIVES_GENERATED 1 +#define PIPE_QUERY_PRIMITIVES_EMITTED 2 +#define PIPE_QUERY_TYPES 3 + + +/** + * Point sprite coord modes + */ +#define PIPE_SPRITE_COORD_NONE 0 +#define PIPE_SPRITE_COORD_UPPER_LEFT 1 +#define PIPE_SPRITE_COORD_LOWER_LEFT 2 + + +/** + * Implementation capabilities/limits + * Passed to pipe->get_param() + * XXX this will need some fine tuning... + */ +#define PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS 1 +#define PIPE_CAP_NPOT_TEXTURES 2 +#define PIPE_CAP_TWO_SIDED_STENCIL 3 +#define PIPE_CAP_GLSL 4 /* XXX need something better */ +#define PIPE_CAP_S3TC 5 +#define PIPE_CAP_ANISOTROPIC_FILTER 6 +#define PIPE_CAP_POINT_SPRITE 7 +#define PIPE_CAP_MAX_RENDER_TARGETS 8 +#define PIPE_CAP_OCCLUSION_QUERY 9 +#define PIPE_CAP_TEXTURE_SHADOW_MAP 10 +#define PIPE_CAP_MAX_TEXTURE_2D_LEVELS 11 +#define PIPE_CAP_MAX_TEXTURE_3D_LEVELS 12 +#define PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS 13 +#define PIPE_CAP_MAX_LINE_WIDTH 14 +#define PIPE_CAP_MAX_LINE_WIDTH_AA 15 +#define PIPE_CAP_MAX_POINT_WIDTH 16 +#define PIPE_CAP_MAX_POINT_WIDTH_AA 17 +#define PIPE_CAP_MAX_TEXTURE_ANISOTROPY 18 +#define PIPE_CAP_MAX_TEXTURE_LOD_BIAS 19 +#define PIPE_CAP_GUARD_BAND_LEFT 20 /*< float */ +#define PIPE_CAP_GUARD_BAND_TOP 21 /*< float */ +#define PIPE_CAP_GUARD_BAND_RIGHT 22 /*< float */ +#define PIPE_CAP_GUARD_BAND_BOTTOM 23 /*< float */ +#define PIPE_CAP_TEXTURE_MIRROR_CLAMP 24 +#define PIPE_CAP_TEXTURE_MIRROR_REPEAT 25 + + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/include/pipe/p_error.h b/src/gallium/include/pipe/p_error.h new file mode 100644 index 0000000000..b865b22635 --- /dev/null +++ b/src/gallium/include/pipe/p_error.h @@ -0,0 +1,65 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Gallium error codes. + * + * @author José Fonseca <jrfonseca@tungstengraphics.com> + */ + +#ifndef P_ERROR_H_ +#define P_ERROR_H_ + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Gallium error codes. + * + * - A zero value always means success. + * - A negative value always means failure. + * - The meaning of a positive value is function dependent. + */ +enum pipe_error { + PIPE_OK = 0, + PIPE_ERROR = -1, /**< Generic error */ + PIPE_ERROR_BAD_INPUT = -2, + PIPE_ERROR_OUT_OF_MEMORY = -3, + PIPE_ERROR_RETRY = -4 + /* TODO */ +}; + + +#ifdef __cplusplus +} +#endif + +#endif /* P_ERROR_H_ */ diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h new file mode 100644 index 0000000000..97a4c8c510 --- /dev/null +++ b/src/gallium/include/pipe/p_format.h @@ -0,0 +1,567 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef PIPE_FORMAT_H +#define PIPE_FORMAT_H + +#include "p_compiler.h" +#include "p_debug.h" + +#include "util/u_string.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * The pipe_format enum is a 32-bit wide bitfield that encodes all the + * information needed to uniquely describe a pixel format. + */ + +/** + * Possible format layouts are encoded in the first 2 bits. + * The interpretation of the remaining 30 bits depends on a particular + * format layout. + */ +#define PIPE_FORMAT_LAYOUT_RGBAZS 0 +#define PIPE_FORMAT_LAYOUT_YCBCR 1 +#define PIPE_FORMAT_LAYOUT_DXT 2 /**< XXX temporary? */ +#define PIPE_FORMAT_LAYOUT_MIXED 3 + +static INLINE uint pf_layout(uint f) /**< PIPE_FORMAT_LAYOUT_ */ +{ + return f & 0x3; +} + +/** + * RGBAZS Format Layout. + */ + +/** + * Format component selectors for RGBAZS & MIXED layout. + */ +#define PIPE_FORMAT_COMP_R 0 +#define PIPE_FORMAT_COMP_G 1 +#define PIPE_FORMAT_COMP_B 2 +#define PIPE_FORMAT_COMP_A 3 +#define PIPE_FORMAT_COMP_0 4 +#define PIPE_FORMAT_COMP_1 5 +#define PIPE_FORMAT_COMP_Z 6 +#define PIPE_FORMAT_COMP_S 7 + +/** + * Format types for RGBAZS layout. + */ +#define PIPE_FORMAT_TYPE_UNKNOWN 0 +#define PIPE_FORMAT_TYPE_FLOAT 1 /**< 16/32/64-bit/channel formats */ +#define PIPE_FORMAT_TYPE_UNORM 2 /**< uints, normalized to [0,1] */ +#define PIPE_FORMAT_TYPE_SNORM 3 /**< ints, normalized to [-1,1] */ +#define PIPE_FORMAT_TYPE_USCALED 4 /**< uints, not normalized */ +#define PIPE_FORMAT_TYPE_SSCALED 5 /**< ints, not normalized */ +#define PIPE_FORMAT_TYPE_SRGB 6 /**< sRGB colorspace */ +#define PIPE_FORMAT_TYPE_FIXED 7 /**< 16.16 fixed point */ + + +/** + * Because the destination vector is assumed to be RGBA FLOAT, we + * need to know how to swizzle and expand components from the source + * vector. + * Let's take U_A1_R5_G5_B5 as an example. X swizzle is A, X size + * is 1 bit and type is UNORM. So we take the most significant bit + * from source vector, convert 0 to 0.0 and 1 to 1.0 and save it + * in the last component of the destination RGBA component. + * Next, Y swizzle is R, Y size is 5 and type is UNORM. We normalize + * those 5 bits into [0.0; 1.0] range and put it into second + * component of the destination vector. Rinse and repeat for + * components Z and W. + * If any of size fields is zero, it means the source format contains + * less than four components. + * If any swizzle is 0 or 1, the corresponding destination component + * should be filled with 0.0 and 1.0, respectively. + */ +typedef uint pipe_format_rgbazs_t; + +static INLINE uint pf_get(pipe_format_rgbazs_t f, uint shift, uint mask) +{ + return (f >> shift) & mask; +} + +#define pf_swizzle_x(f) pf_get(f, 2, 0x7) /**< PIPE_FORMAT_COMP_ */ +#define pf_swizzle_y(f) pf_get(f, 5, 0x7) /**< PIPE_FORMAT_COMP_ */ +#define pf_swizzle_z(f) pf_get(f, 8, 0x7) /**< PIPE_FORMAT_COMP_ */ +#define pf_swizzle_w(f) pf_get(f, 11, 0x7) /**< PIPE_FORMAT_COMP_ */ +#define pf_swizzle_xyzw(f,i) pf_get(f, 2+((i)*3), 0x7) +#define pf_size_x(f) pf_get(f, 14, 0x7) /**< Size of X */ +#define pf_size_y(f) pf_get(f, 17, 0x7) /**< Size of Y */ +#define pf_size_z(f) pf_get(f, 20, 0x7) /**< Size of Z */ +#define pf_size_w(f) pf_get(f, 23, 0x7) /**< Size of W */ +#define pf_size_xyzw(f,i) pf_get(f, 14+((i)*3), 0x7) +#define pf_exp2(f) pf_get(f, 26, 0x7) /**< Scale size by 2 ^ exp2 */ +#define pf_type(f) pf_get(f, 29, 0x7) /**< PIPE_FORMAT_TYPE_ */ + +/** + * Helper macro to encode the above structure into a 32-bit value. + */ +#define _PIPE_FORMAT_RGBAZS( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, EXP2, TYPE ) (\ + (PIPE_FORMAT_LAYOUT_RGBAZS << 0) |\ + ((SWZ) << 2) |\ + ((SIZEX) << 14) |\ + ((SIZEY) << 17) |\ + ((SIZEZ) << 20) |\ + ((SIZEW) << 23) |\ + ((EXP2) << 26) |\ + ((TYPE) << 29) ) + +/** + * Helper macro to encode the swizzle part of the structure above. + */ +#define _PIPE_FORMAT_SWZ( SWZX, SWZY, SWZZ, SWZW ) (((SWZX) << 0) | ((SWZY) << 3) | ((SWZZ) << 6) | ((SWZW) << 9)) + +/** + * Shorthand macro for RGBAZS layout with component sizes in 1-bit units. + */ +#define _PIPE_FORMAT_RGBAZS_1( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, TYPE )\ + _PIPE_FORMAT_RGBAZS( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, 0, TYPE ) + +/** + * Shorthand macro for RGBAZS layout with component sizes in 2-bit units. + */ +#define _PIPE_FORMAT_RGBAZS_2( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, TYPE )\ + _PIPE_FORMAT_RGBAZS( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, 1, TYPE ) + +/** + * Shorthand macro for RGBAZS layout with component sizes in 8-bit units. + */ +#define _PIPE_FORMAT_RGBAZS_8( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, TYPE )\ + _PIPE_FORMAT_RGBAZS( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, 3, TYPE ) + +/** + * Shorthand macro for RGBAZS layout with component sizes in 64-bit units. + */ +#define _PIPE_FORMAT_RGBAZS_64( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, TYPE )\ + _PIPE_FORMAT_RGBAZS( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, 6, TYPE ) + +typedef uint pipe_format_mixed_t; + +/* NOTE: Use pf_swizzle_* and pf_size_* macros for swizzles and sizes. + */ + +#define pf_mixed_sign_x(f) pf_get( f, 26, 0x1 ) /*< Sign of X */ +#define pf_mixed_sign_y(f) pf_get( f, 27, 0x1 ) /*< Sign of Y */ +#define pf_mixed_sign_z(f) pf_get( f, 28, 0x1 ) /*< Sign of Z */ +#define pf_mixed_sign_w(f) pf_get( f, 29, 0x1 ) /*< Sign of W */ +#define pf_mixed_sign_xyzw(f, i) pf_get( f, 26 + (i), 0x1 ) +#define pf_mixed_normalized(f) pf_get( f, 30, 0x1 ) /*< Type is NORM (1) or SCALED (0) */ +#define pf_mixed_scale8(f) pf_get( f, 31, 0x1 ) /*< Scale size by either one (0) or eight (1) */ + +/** + * Helper macro to encode the above structure into a 32-bit value. + */ +#define _PIPE_FORMAT_MIXED( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, SIGNX, SIGNY, SIGNZ, SIGNW, NORMALIZED, SCALE8 ) (\ + (PIPE_FORMAT_LAYOUT_MIXED << 0) |\ + ((SWZ) << 2) |\ + ((SIZEX) << 14) |\ + ((SIZEY) << 17) |\ + ((SIZEZ) << 20) |\ + ((SIZEW) << 23) |\ + ((SIGNX) << 26) |\ + ((SIGNY) << 27) |\ + ((SIGNZ) << 28) |\ + ((SIGNW) << 29) |\ + ((NORMALIZED) << 30) |\ + ((SCALE8) << 31) ) + +/** + * Shorthand macro for common format swizzles. + */ +#define _PIPE_FORMAT_R001 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_1 ) +#define _PIPE_FORMAT_RG01 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_1 ) +#define _PIPE_FORMAT_RGB1 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_1 ) +#define _PIPE_FORMAT_RGBA _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_A ) +#define _PIPE_FORMAT_ARGB _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_A, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B ) +#define _PIPE_FORMAT_ABGR _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_A, PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_R ) +#define _PIPE_FORMAT_BGRA _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_A ) +#define _PIPE_FORMAT_1RGB _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_1, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B ) +#define _PIPE_FORMAT_1BGR _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_1, PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_R ) +#define _PIPE_FORMAT_BGR1 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_1 ) +#define _PIPE_FORMAT_0000 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) +#define _PIPE_FORMAT_000R _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_R ) +#define _PIPE_FORMAT_RRR1 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_1 ) +#define _PIPE_FORMAT_RRRR _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R ) +#define _PIPE_FORMAT_RRRG _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G ) +#define _PIPE_FORMAT_Z000 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) +#define _PIPE_FORMAT_0Z00 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) +#define _PIPE_FORMAT_SZ00 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_S, PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) +#define _PIPE_FORMAT_ZS00 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_S, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) +#define _PIPE_FORMAT_S000 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_S, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) + +/** + * YCBCR Format Layout. + */ + +/** + * This only contains a flag that indicates whether the format is reversed or + * not. + */ +typedef uint pipe_format_ycbcr_t; + +/** + * Helper macro to encode the above structure into a 32-bit value. + */ +#define _PIPE_FORMAT_YCBCR( REV ) (\ + (PIPE_FORMAT_LAYOUT_YCBCR << 0) |\ + ((REV) << 2) ) + +static INLINE uint pf_rev(pipe_format_ycbcr_t f) +{ + return (f >> 2) & 0x1; +} + + +/** + * Compresssed format layouts (this will probably change) + */ +#define _PIPE_FORMAT_DXT( LEVEL, RSIZE, GSIZE, BSIZE, ASIZE ) \ + ((PIPE_FORMAT_LAYOUT_DXT << 0) | \ + ((LEVEL) << 2) | \ + ((RSIZE) << 5) | \ + ((GSIZE) << 8) | \ + ((BSIZE) << 11) | \ + ((ASIZE) << 14) ) + + + +/** + * Texture/surface image formats (preliminary) + */ + +/* KW: Added lots of surface formats to support vertex element layout + * definitions, and eventually render-to-vertex-buffer. Could + * consider making float/int/uint/scaled/normalized a separate + * parameter, but on the other hand there are special cases like + * z24s8, compressed textures, ycbcr, etc that won't fit that model. + */ + +enum pipe_format { + PIPE_FORMAT_NONE = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_0000, 0, 0, 0, 0, PIPE_FORMAT_TYPE_UNKNOWN ), + PIPE_FORMAT_A8R8G8B8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_ARGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_X8R8G8B8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_1RGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_B8G8R8A8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGRA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_B8G8R8X8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGR1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_A1R5G5B5_UNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_ARGB, 1, 5, 5, 5, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_A4R4G4B4_UNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_ARGB, 4, 4, 4, 4, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R5G6B5_UNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_RGB1, 5, 6, 5, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_A2B10G10R10_UNORM = _PIPE_FORMAT_RGBAZS_2 ( _PIPE_FORMAT_ABGR, 1, 5, 5, 5, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_L8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte luminance */ + PIPE_FORMAT_A8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_000R, 0, 0, 0, 1, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte alpha */ + PIPE_FORMAT_I8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRR, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte intensity */ + PIPE_FORMAT_A8L8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte alpha, luminance */ + PIPE_FORMAT_L16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_UNORM ), /**< ushort luminance */ + PIPE_FORMAT_YCBCR = _PIPE_FORMAT_YCBCR( 0 ), + PIPE_FORMAT_YCBCR_REV = _PIPE_FORMAT_YCBCR( 1 ), + PIPE_FORMAT_Z16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 2, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_Z32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 4, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_Z32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 4, 0, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_S8Z24_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_SZ00, 1, 3, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_Z24S8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_ZS00, 3, 1, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_X8Z24_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_0Z00, 1, 3, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_Z24X8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 3, 1, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_S8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_S000, 1, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte stencil */ + PIPE_FORMAT_R64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_R64G64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_R64G64B64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_R64G64B64A64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_R32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_R32G32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_R32G32B32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_R32G32B32A32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_R32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R32G32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R32G32B32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R32G32B32A32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R32G32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R32G32B32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R32G32B32A32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R32G32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R32G32B32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R32G32B32A32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R32G32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R32G32B32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R32G32B32A32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R16G16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R16G16B16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R16G16B16A16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R16G16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R16G16B16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R16G16B16A16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R16G16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R16G16B16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R16G16B16A16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R16G16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R16G16B16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R16G16B16A16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R8G8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R8G8B8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R8G8B8A8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R8G8B8X8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R8G8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R8G8B8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R8G8B8A8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R8G8B8X8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R8G8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R8G8B8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R8G8B8A8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R8G8B8X8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_B6G5R5_SNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_BGR1, 6, 5, 5, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_A8B8G8R8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGRA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_X8B8G8R8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R8G8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R8G8B8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R8G8B8A8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R8G8B8X8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_FIXED ), + PIPE_FORMAT_R32G32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_FIXED ), + PIPE_FORMAT_R32G32B32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_FIXED ), + PIPE_FORMAT_R32G32B32A32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_FIXED ), + /* sRGB formats */ + PIPE_FORMAT_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_A8_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_R8G8B8A8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_R8G8B8X8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + + /* mixed formats */ + PIPE_FORMAT_X8UB8UG8SR8S_NORM = _PIPE_FORMAT_MIXED( _PIPE_FORMAT_1BGR, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1 ), + PIPE_FORMAT_B6UG5SR5S_NORM = _PIPE_FORMAT_MIXED( _PIPE_FORMAT_BGR1, 6, 5, 5, 0, 0, 1, 1, 0, 1, 0 ), + + /* compressed formats */ + PIPE_FORMAT_DXT1_RGB = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 0 ), + PIPE_FORMAT_DXT1_RGBA = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 8 ), + PIPE_FORMAT_DXT3_RGBA = _PIPE_FORMAT_DXT( 3, 8, 8, 8, 8 ), + PIPE_FORMAT_DXT5_RGBA = _PIPE_FORMAT_DXT( 5, 8, 8, 8, 8 ) +}; + +/** + * Builds pipe format name from format token. + */ +extern const char *pf_name( enum pipe_format format ); + +/** + * Return bits for a particular component. + * \param comp component index, starting at 0 + */ +static INLINE uint pf_get_component_bits( enum pipe_format format, uint comp ) +{ + uint size; + + if (pf_swizzle_x(format) == comp) { + size = pf_size_x(format); + } + else if (pf_swizzle_y(format) == comp) { + size = pf_size_y(format); + } + else if (pf_swizzle_z(format) == comp) { + size = pf_size_z(format); + } + else if (pf_swizzle_w(format) == comp) { + size = pf_size_w(format); + } + else { + size = 0; + } + if (pf_layout( format ) == PIPE_FORMAT_LAYOUT_RGBAZS) + return size << pf_exp2( format ); + return size << (pf_mixed_scale8( format ) * 3); +} + +/** + * Return total bits needed for the pixel format. + */ +static INLINE uint pf_get_bits( enum pipe_format format ) +{ + switch (pf_layout(format)) { + case PIPE_FORMAT_LAYOUT_RGBAZS: + case PIPE_FORMAT_LAYOUT_MIXED: + return + pf_get_component_bits( format, PIPE_FORMAT_COMP_0 ) + + pf_get_component_bits( format, PIPE_FORMAT_COMP_1 ) + + pf_get_component_bits( format, PIPE_FORMAT_COMP_R ) + + pf_get_component_bits( format, PIPE_FORMAT_COMP_G ) + + pf_get_component_bits( format, PIPE_FORMAT_COMP_B ) + + pf_get_component_bits( format, PIPE_FORMAT_COMP_A ) + + pf_get_component_bits( format, PIPE_FORMAT_COMP_Z ) + + pf_get_component_bits( format, PIPE_FORMAT_COMP_S ); + case PIPE_FORMAT_LAYOUT_YCBCR: + assert( format == PIPE_FORMAT_YCBCR || format == PIPE_FORMAT_YCBCR_REV ); + /* return effective bits per pixel */ + return 16; + default: + assert( 0 ); + return 0; + } +} + +/** + * Return bytes per pixel for the given format. + */ +static INLINE uint pf_get_size( enum pipe_format format ) +{ + assert(pf_get_bits(format) % 8 == 0); + return pf_get_bits(format) / 8; +} + +/** + * Describe accurately the pixel format. + * + * The chars-per-pixel concept falls apart with compressed and yuv images, where + * more than one pixel are coded in a single data block. This structure + * describes that block. + * + * Simple pixel formats are effectively a 1x1xcpp block. + */ +struct pipe_format_block +{ + /** Block size in bytes */ + unsigned size; + + /** Block width in pixels */ + unsigned width; + + /** Block height in pixels */ + unsigned height; +}; + +/** + * Describe pixel format's block. + * + * @sa http://msdn2.microsoft.com/en-us/library/ms796147.aspx + */ +static INLINE void +pf_get_block(enum pipe_format format, struct pipe_format_block *block) +{ + switch(format) { + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT1_RGB: + block->size = 8; + block->width = 4; + block->height = 4; + break; + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + block->size = 16; + block->width = 4; + block->height = 4; + break; + case PIPE_FORMAT_YCBCR: + case PIPE_FORMAT_YCBCR_REV: + block->size = 4; /* 2*cpp */ + block->width = 2; + block->height = 1; + break; + default: + block->size = pf_get_size(format); + block->width = 1; + block->height = 1; + break; + } +} + +static INLINE unsigned +pf_get_nblocksx(const struct pipe_format_block *block, unsigned x) +{ + return (x + block->width - 1)/block->width; +} + +static INLINE unsigned +pf_get_nblocksy(const struct pipe_format_block *block, unsigned y) +{ + return (y + block->height - 1)/block->height; +} + +static INLINE unsigned +pf_get_nblocks(const struct pipe_format_block *block, unsigned width, unsigned height) +{ + return pf_get_nblocksx(block, width)*pf_get_nblocksy(block, height); +} + +static INLINE boolean +pf_is_compressed( enum pipe_format format ) +{ + return pf_layout(format) == PIPE_FORMAT_LAYOUT_DXT ? TRUE : FALSE; +} + +static INLINE boolean +pf_is_ycbcr( enum pipe_format format ) +{ + return pf_layout(format) == PIPE_FORMAT_LAYOUT_YCBCR ? TRUE : FALSE; +} + +static INLINE boolean +pf_has_alpha( enum pipe_format format ) +{ + switch (pf_layout(format)) { + case PIPE_FORMAT_LAYOUT_RGBAZS: + case PIPE_FORMAT_LAYOUT_MIXED: + /* FIXME: pf_get_component_bits( PIPE_FORMAT_A8L8_UNORM, PIPE_FORMAT_COMP_A ) should not return 0 right? */ + if(format == PIPE_FORMAT_A8_UNORM || + format == PIPE_FORMAT_A8L8_UNORM || + format == PIPE_FORMAT_A8_L8_SRGB) + return TRUE; + return pf_get_component_bits( format, PIPE_FORMAT_COMP_A ) ? TRUE : FALSE; + case PIPE_FORMAT_LAYOUT_YCBCR: + return FALSE; + case PIPE_FORMAT_LAYOUT_DXT: + switch (format) { + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return TRUE; + default: + return FALSE; + } + default: + assert( 0 ); + return FALSE; + } +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h new file mode 100644 index 0000000000..d70de8e301 --- /dev/null +++ b/src/gallium/include/pipe/p_inlines.h @@ -0,0 +1,216 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef P_INLINES_H +#define P_INLINES_H + +#include "p_context.h" +#include "p_defines.h" +#include "p_screen.h" +#include "p_winsys.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* XXX: these are a kludge. will fix when all surfaces are views into + * textures, and free-floating winsys surfaces go away. + */ +static INLINE void * +pipe_surface_map( struct pipe_surface *surf, unsigned flags ) +{ + if (surf->texture) { + struct pipe_screen *screen = surf->texture->screen; + return surf->texture->screen->surface_map( screen, surf, flags ); + } + else { + struct pipe_winsys *winsys = surf->winsys; + char *map = (char *)winsys->buffer_map( winsys, surf->buffer, flags ); + if (map == NULL) + return NULL; + return (void *)(map + surf->offset); + } +} + +static INLINE void +pipe_surface_unmap( struct pipe_surface *surf ) +{ + if (surf->texture) { + struct pipe_screen *screen = surf->texture->screen; + surf->texture->screen->surface_unmap( screen, surf ); + } + else { + struct pipe_winsys *winsys = surf->winsys; + winsys->buffer_unmap( winsys, surf->buffer ); + } +} + + + +/** + * Set 'ptr' to point to 'surf' and update reference counting. + * The old thing pointed to, if any, will be unreferenced first. + * 'surf' may be NULL. + */ +static INLINE void +pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) +{ + /* bump the refcount first */ + if (surf) + surf->refcount++; + + if (*ptr) { + + /* There are currently two sorts of surfaces... This needs to be + * fixed so that all surfaces are views into a texture. + */ + if ((*ptr)->texture) { + struct pipe_screen *screen = (*ptr)->texture->screen; + screen->tex_surface_release( screen, ptr ); + } + else { + struct pipe_winsys *winsys = (*ptr)->winsys; + winsys->surface_release(winsys, ptr); + } + + assert(!*ptr); + } + + *ptr = surf; +} + + +/* XXX: thread safety issues! + */ +static INLINE void +winsys_buffer_reference(struct pipe_winsys *winsys, + struct pipe_buffer **ptr, + struct pipe_buffer *buf) +{ + if (buf) + buf->refcount++; + + if (*ptr && --(*ptr)->refcount == 0) + winsys->buffer_destroy( winsys, *ptr ); + + *ptr = buf; +} + + + +/** + * \sa pipe_surface_reference + */ +static INLINE void +pipe_texture_reference(struct pipe_texture **ptr, + struct pipe_texture *pt) +{ + assert(ptr); + + if (pt) + pt->refcount++; + + if (*ptr) { + struct pipe_screen *screen = (*ptr)->screen; + assert(screen); + screen->texture_release(screen, ptr); + + assert(!*ptr); + } + + *ptr = pt; +} + + +static INLINE void +pipe_texture_release(struct pipe_texture **ptr) +{ + struct pipe_screen *screen; + assert(ptr); + screen = (*ptr)->screen; + screen->texture_release(screen, ptr); + *ptr = NULL; +} + + +/** + * Convenience wrappers for winsys buffer functions. + */ + +static INLINE struct pipe_buffer * +pipe_buffer_create( struct pipe_screen *screen, + unsigned alignment, unsigned usage, unsigned size ) +{ + return screen->winsys->buffer_create(screen->winsys, alignment, usage, size); +} + +static INLINE struct pipe_buffer * +pipe_user_buffer_create( struct pipe_screen *screen, void *ptr, unsigned size ) +{ + return screen->winsys->user_buffer_create(screen->winsys, ptr, size); +} + +static INLINE void +pipe_buffer_destroy( struct pipe_screen *screen, struct pipe_buffer *buf ) +{ + screen->winsys->buffer_destroy(screen->winsys, buf); +} + +static INLINE void * +pipe_buffer_map(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned usage) +{ + return screen->winsys->buffer_map(screen->winsys, buf, usage); +} + +static INLINE void +pipe_buffer_unmap(struct pipe_screen *screen, + struct pipe_buffer *buf) +{ + screen->winsys->buffer_unmap(screen->winsys, buf); +} + +/* XXX when we're using this everywhere, get rid of + * winsys_buffer_reference() above. + */ +static INLINE void +pipe_buffer_reference(struct pipe_screen *screen, + struct pipe_buffer **ptr, + struct pipe_buffer *buf) +{ + winsys_buffer_reference(screen->winsys, ptr, buf); +} + + +#ifdef __cplusplus +} +#endif + +#endif /* P_INLINES_H */ diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h new file mode 100644 index 0000000000..3bedc75294 --- /dev/null +++ b/src/gallium/include/pipe/p_screen.h @@ -0,0 +1,139 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * + * Screen, Adapter or GPU + * + * These are driver functions/facilities that are context independent. + */ + + +#ifndef P_SCREEN_H +#define P_SCREEN_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + + + +#ifdef __cplusplus +extern "C" { +#endif + + + +/** + * Gallium screen/adapter context. Basically everything + * hardware-specific that doesn't actually require a rendering + * context. + */ +struct pipe_screen { + struct pipe_winsys *winsys; + + void (*destroy)( struct pipe_screen * ); + + + const char *(*get_name)( struct pipe_screen * ); + + const char *(*get_vendor)( struct pipe_screen * ); + + /** + * Query an integer-valued capability/parameter/limit + * \param param one of PIPE_CAP_x + */ + int (*get_param)( struct pipe_screen *, int param ); + + /** + * Query a float-valued capability/parameter/limit + * \param param one of PIPE_CAP_x + */ + float (*get_paramf)( struct pipe_screen *, int param ); + + /** + * Check if the given pipe_format is supported as a texture or + * drawing surface. + * \param tex_usage bitmask of PIPE_TEXTURE_USAGE_* + * \param flags bitmask of PIPE_TEXTURE_GEOM_* + */ + boolean (*is_format_supported)( struct pipe_screen *, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ); + + /** + * Create a new texture object, using the given template info. + */ + struct pipe_texture * (*texture_create)(struct pipe_screen *, + const struct pipe_texture *templat); + + /** + * Create a new texture object, using the given template info, but on top of + * existing memory. + * + * It is assumed that the buffer data is layed out according to the expected + * by the hardware. NULL will be returned if any inconsistency is found. + */ + struct pipe_texture * (*texture_blanket)(struct pipe_screen *, + const struct pipe_texture *templat, + const unsigned *pitch, + struct pipe_buffer *buffer); + + void (*texture_release)(struct pipe_screen *, + struct pipe_texture **pt); + + /** Get a surface which is a "view" into a texture */ + struct pipe_surface *(*get_tex_surface)(struct pipe_screen *, + struct pipe_texture *texture, + unsigned face, unsigned level, + unsigned zslice, + unsigned usage ); + + /* Surfaces allocated by the above must be released here: + */ + void (*tex_surface_release)( struct pipe_screen *, + struct pipe_surface ** ); + + + void *(*surface_map)( struct pipe_screen *, + struct pipe_surface *surface, + unsigned flags ); + + void (*surface_unmap)( struct pipe_screen *, + struct pipe_surface *surface ); + +}; + + +#ifdef __cplusplus +} +#endif + +#endif /* P_SCREEN_H */ diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h new file mode 100644 index 0000000000..78c20de3e2 --- /dev/null +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -0,0 +1,809 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_TOKEN_H +#define TGSI_TOKEN_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "p_compiler.h" + +struct tgsi_version +{ + unsigned MajorVersion : 8; + unsigned MinorVersion : 8; + unsigned Padding : 16; +}; + +struct tgsi_header +{ + unsigned HeaderSize : 8; + unsigned BodySize : 24; +}; + +#define TGSI_PROCESSOR_FRAGMENT 0 +#define TGSI_PROCESSOR_VERTEX 1 +#define TGSI_PROCESSOR_GEOMETRY 2 + +struct tgsi_processor +{ + unsigned Processor : 4; /* TGSI_PROCESSOR_ */ + unsigned Padding : 28; +}; + +#define TGSI_TOKEN_TYPE_DECLARATION 0 +#define TGSI_TOKEN_TYPE_IMMEDIATE 1 +#define TGSI_TOKEN_TYPE_INSTRUCTION 2 + +struct tgsi_token +{ + unsigned Type : 4; /* TGSI_TOKEN_TYPE_ */ + unsigned Size : 8; /* UINT */ + unsigned Padding : 19; + unsigned Extended : 1; /* BOOL */ +}; + +enum tgsi_file_type { + TGSI_FILE_NULL =0, + TGSI_FILE_CONSTANT =1, + TGSI_FILE_INPUT =2, + TGSI_FILE_OUTPUT =3, + TGSI_FILE_TEMPORARY =4, + TGSI_FILE_SAMPLER =5, + TGSI_FILE_ADDRESS =6, + TGSI_FILE_IMMEDIATE =7, + TGSI_FILE_COUNT /**< how many TGSI_FILE_ types */ +}; + + +#define TGSI_WRITEMASK_NONE 0x00 +#define TGSI_WRITEMASK_X 0x01 +#define TGSI_WRITEMASK_Y 0x02 +#define TGSI_WRITEMASK_XY 0x03 +#define TGSI_WRITEMASK_Z 0x04 +#define TGSI_WRITEMASK_XZ 0x05 +#define TGSI_WRITEMASK_YZ 0x06 +#define TGSI_WRITEMASK_XYZ 0x07 +#define TGSI_WRITEMASK_W 0x08 +#define TGSI_WRITEMASK_XW 0x09 +#define TGSI_WRITEMASK_YW 0x0A +#define TGSI_WRITEMASK_XYW 0x0B +#define TGSI_WRITEMASK_ZW 0x0C +#define TGSI_WRITEMASK_XZW 0x0D +#define TGSI_WRITEMASK_YZW 0x0E +#define TGSI_WRITEMASK_XYZW 0x0F + +#define TGSI_INTERPOLATE_CONSTANT 0 +#define TGSI_INTERPOLATE_LINEAR 1 +#define TGSI_INTERPOLATE_PERSPECTIVE 2 +#define TGSI_INTERPOLATE_COUNT 3 + +struct tgsi_declaration +{ + unsigned Type : 4; /* TGSI_TOKEN_TYPE_DECLARATION */ + unsigned Size : 8; /* UINT */ + unsigned File : 4; /* one of TGSI_FILE_x */ + unsigned UsageMask : 4; /* bitmask of TGSI_WRITEMASK_x flags */ + unsigned Interpolate : 4; /* TGSI_INTERPOLATE_ */ + unsigned Semantic : 1; /* BOOL, any semantic info? */ + unsigned Padding : 6; + unsigned Extended : 1; /* BOOL */ +}; + +struct tgsi_declaration_range +{ + unsigned First : 16; /* UINT */ + unsigned Last : 16; /* UINT */ +}; + +#define TGSI_SEMANTIC_POSITION 0 +#define TGSI_SEMANTIC_COLOR 1 +#define TGSI_SEMANTIC_BCOLOR 2 /**< back-face color */ +#define TGSI_SEMANTIC_FOG 3 +#define TGSI_SEMANTIC_PSIZE 4 +#define TGSI_SEMANTIC_GENERIC 5 +#define TGSI_SEMANTIC_NORMAL 6 +#define TGSI_SEMANTIC_COUNT 7 /**< number of semantic values */ + +struct tgsi_declaration_semantic +{ + unsigned SemanticName : 8; /* one of TGSI_SEMANTIC_ */ + unsigned SemanticIndex : 16; /* UINT */ + unsigned Padding : 8; +}; + +#define TGSI_IMM_FLOAT32 0 + +struct tgsi_immediate +{ + unsigned Type : 4; /* TGSI_TOKEN_TYPE_IMMEDIATE */ + unsigned Size : 8; /* UINT */ + unsigned DataType : 4; /* TGSI_IMM_ */ + unsigned Padding : 15; + unsigned Extended : 1; /* BOOL */ +}; + +struct tgsi_immediate_float32 +{ + float Float; +}; + +/* + * GL_NV_vertex_program + */ +#define TGSI_OPCODE_ARL 0 +#define TGSI_OPCODE_MOV 1 +#define TGSI_OPCODE_LIT 2 +#define TGSI_OPCODE_RCP 3 +#define TGSI_OPCODE_RSQ 4 +#define TGSI_OPCODE_EXP 5 +#define TGSI_OPCODE_LOG 6 +#define TGSI_OPCODE_MUL 7 +#define TGSI_OPCODE_ADD 8 +#define TGSI_OPCODE_DP3 9 +#define TGSI_OPCODE_DP4 10 +#define TGSI_OPCODE_DST 11 +#define TGSI_OPCODE_MIN 12 +#define TGSI_OPCODE_MAX 13 +#define TGSI_OPCODE_SLT 14 +#define TGSI_OPCODE_SGE 15 +#define TGSI_OPCODE_MAD 16 + +/* + * GL_ATI_fragment_shader + */ +#define TGSI_OPCODE_SUB 17 +#define TGSI_OPCODE_DOT3 TGSI_OPCODE_DP3 +#define TGSI_OPCODE_DOT4 TGSI_OPCODE_DP4 +#define TGSI_OPCODE_LERP 18 +#define TGSI_OPCODE_CND 19 +#define TGSI_OPCODE_CND0 20 +#define TGSI_OPCODE_DOT2ADD 21 + +/* + * GL_EXT_vertex_shader + */ +#define TGSI_OPCODE_INDEX 22 +#define TGSI_OPCODE_NEGATE 23 +#define TGSI_OPCODE_MADD TGSI_OPCODE_MAD +#define TGSI_OPCODE_FRAC 24 +#define TGSI_OPCODE_SETGE TGSI_OPCODE_SGE +#define TGSI_OPCODE_SETLT TGSI_OPCODE_SLT +#define TGSI_OPCODE_CLAMP 25 +#define TGSI_OPCODE_FLOOR 26 +#define TGSI_OPCODE_ROUND 27 +#define TGSI_OPCODE_EXPBASE2 28 +#define TGSI_OPCODE_LOGBASE2 29 +#define TGSI_OPCODE_POWER 30 +#define TGSI_OPCODE_RECIP TGSI_OPCODE_RCP +#define TGSI_OPCODE_RECIPSQRT TGSI_OPCODE_RSQ +#define TGSI_OPCODE_CROSSPRODUCT 31 +#define TGSI_OPCODE_MULTIPLYMATRIX 32 + +/* + * GL_NV_vertex_program1_1 + */ +#define TGSI_OPCODE_ABS 33 +#define TGSI_OPCODE_RCC 34 +#define TGSI_OPCODE_DPH 35 + +/* + * GL_NV_fragment_program + */ +#define TGSI_OPCODE_COS 36 +#define TGSI_OPCODE_DDX 37 +#define TGSI_OPCODE_DDY 38 +#define TGSI_OPCODE_EX2 TGSI_OPCODE_EXPBASE2 +#define TGSI_OPCODE_FLR TGSI_OPCODE_FLOOR +#define TGSI_OPCODE_FRC TGSI_OPCODE_FRAC +#define TGSI_OPCODE_KILP 39 /* predicated kill */ +#define TGSI_OPCODE_LG2 TGSI_OPCODE_LOGBASE2 +#define TGSI_OPCODE_LRP TGSI_OPCODE_LERP +#define TGSI_OPCODE_PK2H 40 +#define TGSI_OPCODE_PK2US 41 +#define TGSI_OPCODE_PK4B 42 +#define TGSI_OPCODE_PK4UB 43 +#define TGSI_OPCODE_POW TGSI_OPCODE_POWER +#define TGSI_OPCODE_RFL 44 +#define TGSI_OPCODE_SEQ 45 +#define TGSI_OPCODE_SFL 46 +#define TGSI_OPCODE_SGT 47 +#define TGSI_OPCODE_SIN 48 +#define TGSI_OPCODE_SLE 49 +#define TGSI_OPCODE_SNE 50 +#define TGSI_OPCODE_STR 51 +#define TGSI_OPCODE_TEX 52 +#define TGSI_OPCODE_TXD 53 +#define TGSI_OPCODE_TXP 54 +#define TGSI_OPCODE_UP2H 55 +#define TGSI_OPCODE_UP2US 56 +#define TGSI_OPCODE_UP4B 57 +#define TGSI_OPCODE_UP4UB 58 +#define TGSI_OPCODE_X2D 59 + +/* + * GL_NV_vertex_program2 + */ +#define TGSI_OPCODE_ARA 60 +#define TGSI_OPCODE_ARR 61 +#define TGSI_OPCODE_BRA 62 +#define TGSI_OPCODE_CAL 63 +#define TGSI_OPCODE_RET 64 +#define TGSI_OPCODE_SSG 65 + +/* + * GL_ARB_vertex_program + */ +#define TGSI_OPCODE_SWZ 118 +#define TGSI_OPCODE_XPD TGSI_OPCODE_CROSSPRODUCT + +/* + * GL_ARB_fragment_program + */ +#define TGSI_OPCODE_CMP 66 +#define TGSI_OPCODE_KIL 116 /* conditional kill */ +#define TGSI_OPCODE_SCS 67 +#define TGSI_OPCODE_TXB 68 + +/* + * GL_NV_fragment_program_option + */ +/* No new opcode */ + +/* + * GL_NV_fragment_program2 + */ +#define TGSI_OPCODE_NRM 69 +#define TGSI_OPCODE_DIV 70 +#define TGSI_OPCODE_DP2 71 +#define TGSI_OPCODE_DP2A TGSI_OPCODE_DOT2ADD +#define TGSI_OPCODE_TXL 72 +#define TGSI_OPCODE_BRK 73 +#define TGSI_OPCODE_IF 74 +#define TGSI_OPCODE_LOOP 75 +#define TGSI_OPCODE_REP 76 +#define TGSI_OPCODE_ELSE 77 +#define TGSI_OPCODE_ENDIF 78 +#define TGSI_OPCODE_ENDLOOP 79 +#define TGSI_OPCODE_ENDREP 80 + +/* + * GL_NV_vertex_program2_option + */ + +/* + * GL_NV_vertex_program3 + */ +#define TGSI_OPCODE_PUSHA 81 +#define TGSI_OPCODE_POPA 82 + +/* + * GL_NV_gpu_program4 + */ +#define TGSI_OPCODE_CEIL 83 +#define TGSI_OPCODE_I2F 84 +#define TGSI_OPCODE_NOT 85 +#define TGSI_OPCODE_TRUNC 86 +#define TGSI_OPCODE_SHL 87 +#define TGSI_OPCODE_SHR 88 +#define TGSI_OPCODE_AND 89 +#define TGSI_OPCODE_OR 90 +#define TGSI_OPCODE_MOD 91 +#define TGSI_OPCODE_XOR 92 +#define TGSI_OPCODE_SAD 93 +#define TGSI_OPCODE_TXF 94 +#define TGSI_OPCODE_TXQ 95 +#define TGSI_OPCODE_CONT 96 + +/* + * GL_NV_vertex_program4 + */ +/* Same as GL_NV_gpu_program4 */ + +/* + * GL_NV_fragment_program4 + */ +/* Same as GL_NV_gpu_program4 */ + +/* + * GL_NV_geometry_program4 + */ +/* Same as GL_NV_gpu_program4 */ +#define TGSI_OPCODE_EMIT 97 +#define TGSI_OPCODE_ENDPRIM 98 + +/* + * GLSL + */ +#define TGSI_OPCODE_BGNLOOP2 99 +#define TGSI_OPCODE_BGNSUB 100 +#define TGSI_OPCODE_ENDLOOP2 101 +#define TGSI_OPCODE_ENDSUB 102 +#define TGSI_OPCODE_INT TGSI_OPCODE_TRUNC +#define TGSI_OPCODE_NOISE1 103 +#define TGSI_OPCODE_NOISE2 104 +#define TGSI_OPCODE_NOISE3 105 +#define TGSI_OPCODE_NOISE4 106 +#define TGSI_OPCODE_NOP 107 + +/* + * ps_1_1 + */ +#define TGSI_OPCODE_TEXKILL TGSI_OPCODE_KIL + +/* + * ps_1_2 + */ +/* CMP - use TGSI_OPCODE_CND0 */ + +/* + * ps_1_3 + */ +/* CMP - use TGSI_OPCODE_CND0 */ + +/* + * ps_1_4 + */ +#define TGSI_OPCODE_TEXLD TGSI_OPCODE_TEX + +/* + * ps_2_0 + */ +#define TGSI_OPCODE_M4X4 TGSI_OPCODE_MULTIPLYMATRIX +#define TGSI_OPCODE_M4X3 108 +#define TGSI_OPCODE_M3X4 109 +#define TGSI_OPCODE_M3X3 110 +#define TGSI_OPCODE_M3X2 111 +#define TGSI_OPCODE_CRS TGSI_OPCODE_XPD +#define TGSI_OPCODE_NRM4 112 +#define TGSI_OPCODE_SINCOS TGSI_OPCODE_SCS +#define TGSI_OPCODE_TEXLDB TGSI_OPCODE_TXB +#define TGSI_OPCODE_DP2ADD TGSI_OPCODE_DP2A + +/* + * ps_2_x + */ +#define TGSI_OPCODE_CALL TGSI_OPCODE_CAL +#define TGSI_OPCODE_CALLNZ 113 +#define TGSI_OPCODE_IFC 114 +#define TGSI_OPCODE_BREAK TGSI_OPCODE_BRK +#define TGSI_OPCODE_BREAKC 115 +#define TGSI_OPCODE_DSX TGSI_OPCODE_DDX +#define TGSI_OPCODE_DSY TGSI_OPCODE_DDY +#define TGSI_OPCODE_TEXLDD TGSI_OPCODE_TXD + +/* + * vs_1_1 + */ +#define TGSI_OPCODE_EXPP TGSI_OPCODE_EXP +#define TGSI_OPCODE_LOGP TGSI_OPCODE_LG2 + +/* + * vs_2_0 + */ +#define TGSI_OPCODE_SGN TGSI_OPCODE_SSG +#define TGSI_OPCODE_MOVA TGSI_OPCODE_ARR +/* EXPP - use TGSI_OPCODE_EX2 */ + +/* + * vs_2_x + */ + +#define TGSI_OPCODE_END 117 /* aka HALT */ + +#define TGSI_OPCODE_LAST 119 + +#define TGSI_SAT_NONE 0 /* do not saturate */ +#define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ +#define TGSI_SAT_MINUS_PLUS_ONE 2 /* clamp to [-1,1] */ + +/** + * Opcode is the operation code to execute. A given operation defines the + * semantics how the source registers (if any) are interpreted and what is + * written to the destination registers (if any) as a result of execution. + * + * NumDstRegs and NumSrcRegs is the number of destination and source registers, + * respectively. For a given operation code, those numbers are fixed and are + * present here only for convenience. + * + * If Extended is TRUE, it is now executed. + * + * Saturate controls how are final results in destination registers modified. + */ + +struct tgsi_instruction +{ + unsigned Type : 4; /* TGSI_TOKEN_TYPE_INSTRUCTION */ + unsigned Size : 8; /* UINT */ + unsigned Opcode : 8; /* TGSI_OPCODE_ */ + unsigned Saturate : 2; /* TGSI_SAT_ */ + unsigned NumDstRegs : 2; /* UINT */ + unsigned NumSrcRegs : 4; /* UINT */ + unsigned Padding : 3; + unsigned Extended : 1; /* BOOL */ +}; + +/* + * If tgsi_instruction::Extended is TRUE, tgsi_instruction_ext follows. + * + * Then, tgsi_instruction::NumDstRegs of tgsi_dst_register follow. + * + * Then, tgsi_instruction::NumSrcRegs of tgsi_src_register follow. + * + * tgsi_instruction::Size contains the total number of words that make the + * instruction, including the instruction word. + */ + +#define TGSI_INSTRUCTION_EXT_TYPE_NV 0 +#define TGSI_INSTRUCTION_EXT_TYPE_LABEL 1 +#define TGSI_INSTRUCTION_EXT_TYPE_TEXTURE 2 +#define TGSI_INSTRUCTION_EXT_TYPE_PREDICATE 3 + +struct tgsi_instruction_ext +{ + unsigned Type : 4; /* TGSI_INSTRUCTION_EXT_TYPE_ */ + unsigned Padding : 27; + unsigned Extended : 1; /* BOOL */ +}; + +/* + * If tgsi_instruction_ext::Type is TGSI_INSTRUCTION_EXT_TYPE_NV, it should + * be cast to tgsi_instruction_ext_nv. + * + * If tgsi_instruction_ext::Type is TGSI_INSTRUCTION_EXT_TYPE_LABEL, it + * should be cast to tgsi_instruction_ext_label. + * + * If tgsi_instruction_ext::Type is TGSI_INSTRUCTION_EXT_TYPE_TEXTURE, it + * should be cast to tgsi_instruction_ext_texture. + * + * If tgsi_instruction_ext::Type is TGSI_INSTRUCTION_EXT_TYPE_PREDICATE, it + * should be cast to tgsi_instruction_ext_predicate. + * + * If tgsi_instruction_ext::Extended is TRUE, another tgsi_instruction_ext + * follows. + */ + +#define TGSI_PRECISION_DEFAULT 0 +#define TGSI_PRECISION_FLOAT32 1 +#define TGSI_PRECISION_FLOAT16 2 +#define TGSI_PRECISION_FIXED12 3 + +#define TGSI_CC_GT 0 +#define TGSI_CC_EQ 1 +#define TGSI_CC_LT 2 +#define TGSI_CC_GE 3 +#define TGSI_CC_LE 4 +#define TGSI_CC_NE 5 +#define TGSI_CC_TR 6 +#define TGSI_CC_FL 7 + +#define TGSI_SWIZZLE_X 0 +#define TGSI_SWIZZLE_Y 1 +#define TGSI_SWIZZLE_Z 2 +#define TGSI_SWIZZLE_W 3 + +/** + * Precision controls the precision at which the operation should be executed. + * + * CondDstUpdate enables condition code register writes. When this field is + * TRUE, CondDstIndex specifies the index of the condition code register to + * update. + * + * CondFlowEnable enables conditional execution of the operation. When this + * field is TRUE, CondFlowIndex specifies the index of the condition code + * register to test against CondMask with component swizzle controled by + * CondSwizzleX, CondSwizzleY, CondSwizzleZ and CondSwizzleW. If the test fails, + * the operation is not executed. + */ + +struct tgsi_instruction_ext_nv +{ + unsigned Type : 4; /* TGSI_INSTRUCTION_EXT_TYPE_NV */ + unsigned Precision : 4; /* TGSI_PRECISION_ */ + unsigned CondDstIndex : 4; /* UINT */ + unsigned CondFlowIndex : 4; /* UINT */ + unsigned CondMask : 4; /* TGSI_CC_ */ + unsigned CondSwizzleX : 2; /* TGSI_SWIZZLE_ */ + unsigned CondSwizzleY : 2; /* TGSI_SWIZZLE_ */ + unsigned CondSwizzleZ : 2; /* TGSI_SWIZZLE_ */ + unsigned CondSwizzleW : 2; /* TGSI_SWIZZLE_ */ + unsigned CondDstUpdate : 1; /* BOOL */ + unsigned CondFlowEnable : 1; /* BOOL */ + unsigned Padding : 1; + unsigned Extended : 1; /* BOOL */ +}; + +struct tgsi_instruction_ext_label +{ + unsigned Type : 4; /* TGSI_INSTRUCTION_EXT_TYPE_LABEL */ + unsigned Label : 24; /* UINT */ + unsigned Padding : 3; + unsigned Extended : 1; /* BOOL */ +}; + +#define TGSI_TEXTURE_UNKNOWN 0 +#define TGSI_TEXTURE_1D 1 +#define TGSI_TEXTURE_2D 2 +#define TGSI_TEXTURE_3D 3 +#define TGSI_TEXTURE_CUBE 4 +#define TGSI_TEXTURE_RECT 5 +#define TGSI_TEXTURE_SHADOW1D 6 +#define TGSI_TEXTURE_SHADOW2D 7 +#define TGSI_TEXTURE_SHADOWRECT 8 +#define TGSI_TEXTURE_COUNT 9 + +struct tgsi_instruction_ext_texture +{ + unsigned Type : 4; /* TGSI_INSTRUCTION_EXT_TYPE_TEXTURE */ + unsigned Texture : 8; /* TGSI_TEXTURE_ */ + unsigned Padding : 19; + unsigned Extended : 1; /* BOOL */ +}; + +struct tgsi_instruction_ext_predicate +{ + unsigned Type : 4; /* TGSI_INSTRUCTION_EXT_TYPE_PREDICATE */ + unsigned PredDstIndex : 4; /* UINT */ + unsigned PredWriteMask : 4; /* TGSI_WRITEMASK_ */ + unsigned Padding : 19; + unsigned Extended : 1; /* BOOL */ +}; + +/** + * File specifies the register array to access. + * + * Index specifies the element number of a register in the register file. + * + * If Indirect is TRUE, Index should be offset by the X component of a source + * register that follows. The register can be now fetched into local storage + * for further processing. + * + * If Negate is TRUE, all components of the fetched register are negated. + * + * The fetched register components are swizzled according to SwizzleX, SwizzleY, + * SwizzleZ and SwizzleW. + * + * If Extended is TRUE, any further modifications to the source register are + * made to this temporary storage. + */ + +struct tgsi_src_register +{ + unsigned File : 4; /* TGSI_FILE_ */ + unsigned SwizzleX : 2; /* TGSI_SWIZZLE_ */ + unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */ + unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */ + unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */ + unsigned Negate : 1; /* BOOL */ + unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned Extended : 1; /* BOOL */ +}; + +/** + * If tgsi_src_register::Extended is TRUE, tgsi_src_register_ext follows. + * + * Then, if tgsi_src_register::Indirect is TRUE, another tgsi_src_register + * follows. + * + * Then, if tgsi_src_register::Dimension is TRUE, tgsi_dimension follows. + */ + +#define TGSI_SRC_REGISTER_EXT_TYPE_SWZ 0 +#define TGSI_SRC_REGISTER_EXT_TYPE_MOD 1 + +struct tgsi_src_register_ext +{ + unsigned Type : 4; /* TGSI_SRC_REGISTER_EXT_TYPE_ */ + unsigned Padding : 27; + unsigned Extended : 1; /* BOOL */ +}; + +/** + * If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_SWZ, + * it should be cast to tgsi_src_register_ext_swz. + * + * If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_MOD, + * it should be cast to tgsi_src_register_ext_mod. + * + * If tgsi_dst_register_ext::Extended is TRUE, another tgsi_dst_register_ext + * follows. + */ + +#define TGSI_EXTSWIZZLE_X TGSI_SWIZZLE_X +#define TGSI_EXTSWIZZLE_Y TGSI_SWIZZLE_Y +#define TGSI_EXTSWIZZLE_Z TGSI_SWIZZLE_Z +#define TGSI_EXTSWIZZLE_W TGSI_SWIZZLE_W +#define TGSI_EXTSWIZZLE_ZERO 4 +#define TGSI_EXTSWIZZLE_ONE 5 + +/** + * ExtSwizzleX, ExtSwizzleY, ExtSwizzleZ and ExtSwizzleW swizzle the source + * register in an extended manner. + * + * NegateX, NegateY, NegateZ and NegateW negate individual components of the + * source register. + * + * NOTE: To simplify matter, if this token is present, the corresponding Swizzle + * and Negate fields in tgsi_src_register should be set to X,Y,Z,W + * and FALSE, respectively. + */ + +struct tgsi_src_register_ext_swz +{ + unsigned Type : 4; /* TGSI_SRC_REGISTER_EXT_TYPE_SWZ */ + unsigned ExtSwizzleX : 4; /* TGSI_EXTSWIZZLE_ */ + unsigned ExtSwizzleY : 4; /* TGSI_EXTSWIZZLE_ */ + unsigned ExtSwizzleZ : 4; /* TGSI_EXTSWIZZLE_ */ + unsigned ExtSwizzleW : 4; /* TGSI_EXTSWIZZLE_ */ + unsigned NegateX : 1; /* BOOL */ + unsigned NegateY : 1; /* BOOL */ + unsigned NegateZ : 1; /* BOOL */ + unsigned NegateW : 1; /* BOOL */ + unsigned Padding : 7; + unsigned Extended : 1; /* BOOL */ +}; + +/** + * Extra src register modifiers + * + * If Complement is TRUE, the source register is modified by subtracting it + * from 1.0. + * + * If Bias is TRUE, the source register is modified by subtracting 0.5 from it. + * + * If Scale2X is TRUE, the source register is modified by multiplying it by 2.0. + * + * If Absolute is TRUE, the source register is modified by removing the sign. + * + * If Negate is TRUE, the source register is modified by negating it. + */ + +struct tgsi_src_register_ext_mod +{ + unsigned Type : 4; /* TGSI_SRC_REGISTER_EXT_TYPE_MOD */ + unsigned Complement : 1; /* BOOL */ + unsigned Bias : 1; /* BOOL */ + unsigned Scale2X : 1; /* BOOL */ + unsigned Absolute : 1; /* BOOL */ + unsigned Negate : 1; /* BOOL */ + unsigned Padding : 22; + unsigned Extended : 1; /* BOOL */ +}; + +struct tgsi_dimension +{ + unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + unsigned Padding : 13; + int Index : 16; /* SINT */ + unsigned Extended : 1; /* BOOL */ +}; + +struct tgsi_dst_register +{ + unsigned File : 4; /* TGSI_FILE_ */ + unsigned WriteMask : 4; /* TGSI_WRITEMASK_ */ + unsigned Indirect : 1; /* BOOL */ + unsigned Dimension : 1; /* BOOL */ + int Index : 16; /* SINT */ + unsigned Padding : 5; + unsigned Extended : 1; /* BOOL */ +}; + +/* + * If tgsi_dst_register::Extended is TRUE, tgsi_dst_register_ext follows. + * + * Then, if tgsi_dst_register::Indirect is TRUE, tgsi_src_register follows. + */ + +#define TGSI_DST_REGISTER_EXT_TYPE_CONDCODE 0 +#define TGSI_DST_REGISTER_EXT_TYPE_MODULATE 1 +#define TGSI_DST_REGISTER_EXT_TYPE_PREDICATE 2 + +struct tgsi_dst_register_ext +{ + unsigned Type : 4; /* TGSI_DST_REGISTER_EXT_TYPE_ */ + unsigned Padding : 27; + unsigned Extended : 1; /* BOOL */ +}; + +/** + * Extra destination register modifiers + * + * If tgsi_dst_register_ext::Type is TGSI_DST_REGISTER_EXT_TYPE_CONDCODE, + * it should be cast to tgsi_dst_register_ext_condcode. + * + * If tgsi_dst_register_ext::Type is TGSI_DST_REGISTER_EXT_TYPE_MODULATE, + * it should be cast to tgsi_dst_register_ext_modulate. + * + * If tgsi_dst_register_ext::Type is TGSI_DST_REGISTER_EXT_TYPE_PREDICATE, + * it should be cast to tgsi_dst_register_ext_predicate. + * + * If tgsi_dst_register_ext::Extended is TRUE, another tgsi_dst_register_ext + * follows. + */ +struct tgsi_dst_register_ext_concode +{ + unsigned Type : 4; /* TGSI_DST_REGISTER_EXT_TYPE_CONDCODE */ + unsigned CondMask : 4; /* TGSI_CC_ */ + unsigned CondSwizzleX : 2; /* TGSI_SWIZZLE_ */ + unsigned CondSwizzleY : 2; /* TGSI_SWIZZLE_ */ + unsigned CondSwizzleZ : 2; /* TGSI_SWIZZLE_ */ + unsigned CondSwizzleW : 2; /* TGSI_SWIZZLE_ */ + unsigned CondSrcIndex : 4; /* UINT */ + unsigned Padding : 11; + unsigned Extended : 1; /* BOOL */ +}; + +#define TGSI_MODULATE_1X 0 +#define TGSI_MODULATE_2X 1 +#define TGSI_MODULATE_4X 2 +#define TGSI_MODULATE_8X 3 +#define TGSI_MODULATE_HALF 4 +#define TGSI_MODULATE_QUARTER 5 +#define TGSI_MODULATE_EIGHTH 6 +#define TGSI_MODULATE_COUNT 7 + +struct tgsi_dst_register_ext_modulate +{ + unsigned Type : 4; /* TGSI_DST_REGISTER_EXT_TYPE_MODULATE */ + unsigned Modulate : 4; /* TGSI_MODULATE_ */ + unsigned Padding : 23; + unsigned Extended : 1; /* BOOL */ +}; + +/* + * Currently, the following constraints apply. + * + * - PredSwizzleXYZW is either set to identity or replicate. + * - PredSrcIndex is 0. + */ + +struct tgsi_dst_register_ext_predicate +{ + unsigned Type : 4; /* TGSI_DST_REGISTER_EXT_TYPE_PREDICATE */ + unsigned PredSwizzleX : 2; /* TGSI_SWIZZLE_ */ + unsigned PredSwizzleY : 2; /* TGSI_SWIZZLE_ */ + unsigned PredSwizzleZ : 2; /* TGSI_SWIZZLE_ */ + unsigned PredSwizzleW : 2; /* TGSI_SWIZZLE_ */ + unsigned PredSrcIndex : 4; /* UINT */ + unsigned Negate : 1; /* BOOL */ + unsigned Padding : 14; + unsigned Extended : 1; /* BOOL */ +}; + + +#ifdef __cplusplus +} +#endif + +#endif /* TGSI_TOKEN_H */ + diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h new file mode 100644 index 0000000000..342f17260a --- /dev/null +++ b/src/gallium/include/pipe/p_state.h @@ -0,0 +1,368 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * + * Abstract graphics pipe state objects. + * + * Basic notes: + * 1. Want compact representations, so we use bitfields. + * 2. Put bitfields before other (GLfloat) fields. + */ + + +#ifndef PIPE_STATE_H +#define PIPE_STATE_H + +#include "p_compiler.h" +#include "p_defines.h" +#include "p_format.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Implementation limits + */ +#define PIPE_MAX_ATTRIBS 32 +#define PIPE_MAX_CLIP_PLANES 6 +#define PIPE_MAX_COLOR_BUFS 8 +#define PIPE_MAX_CONSTANT 32 +#define PIPE_MAX_SAMPLERS 16 +#define PIPE_MAX_SHADER_INPUTS 16 +#define PIPE_MAX_SHADER_OUTPUTS 16 +#define PIPE_MAX_TEXTURE_LEVELS 16 + + +/* fwd decls */ +struct pipe_screen; +struct pipe_surface; +struct pipe_winsys; + + + +/** + * The driver will certainly subclass this to include actual memory + * management information. + */ +struct pipe_buffer +{ + unsigned alignment; + unsigned usage; + unsigned size; + + /** Reference count */ + unsigned refcount; +}; + + +/** + * Primitive (point/line/tri) rasterization info + */ +struct pipe_rasterizer_state +{ + unsigned flatshade:1; + unsigned light_twoside:1; + unsigned front_winding:2; /**< PIPE_WINDING_x */ + unsigned cull_mode:2; /**< PIPE_WINDING_x */ + unsigned fill_cw:2; /**< PIPE_POLYGON_MODE_x */ + unsigned fill_ccw:2; /**< PIPE_POLYGON_MODE_x */ + unsigned offset_cw:1; + unsigned offset_ccw:1; + unsigned scissor:1; + unsigned poly_smooth:1; + unsigned poly_stipple_enable:1; + unsigned point_smooth:1; + unsigned point_sprite:1; + unsigned point_size_per_vertex:1; /**< size computed in vertex shader */ + unsigned multisample:1; /* XXX maybe more ms state in future */ + unsigned line_smooth:1; + unsigned line_stipple_enable:1; + unsigned line_stipple_factor:8; /**< [1..256] actually */ + unsigned line_stipple_pattern:16; + unsigned line_last_pixel:1; + unsigned bypass_clipping:1; + unsigned bypass_vs:1; /**< Skip the vertex shader. Note that the shader is + still needed though, to indicate inputs/outputs */ + unsigned origin_lower_left:1; /**< Is (0,0) the lower-left corner? */ + unsigned flatshade_first:1; /**< take color attribute from the first vertex of a primitive */ + unsigned gl_rasterization_rules:1; /**< enable tweaks for GL rasterization? */ + + float line_width; + float point_size; /**< used when no per-vertex size */ + float point_size_min; /* XXX - temporary, will go away */ + float point_size_max; /* XXX - temporary, will go away */ + float offset_units; + float offset_scale; + ubyte sprite_coord_mode[PIPE_MAX_SHADER_OUTPUTS]; /**< PIPE_SPRITE_COORD_ */ +}; + + +struct pipe_poly_stipple +{ + unsigned stipple[32]; +}; + + +struct pipe_viewport_state +{ + float scale[4]; + float translate[4]; +}; + + +struct pipe_scissor_state +{ + unsigned minx:16; + unsigned miny:16; + unsigned maxx:16; + unsigned maxy:16; +}; + + +struct pipe_clip_state +{ + float ucp[PIPE_MAX_CLIP_PLANES][4]; + unsigned nr; +}; + + +/** + * Constants for vertex/fragment shaders + */ +struct pipe_constant_buffer +{ + struct pipe_buffer *buffer; + unsigned size; /** in bytes (XXX: redundant!) */ +}; + + +struct pipe_shader_state +{ + const struct tgsi_token *tokens; +}; + + +struct pipe_depth_state { + unsigned enabled:1; /**< depth test enabled? */ + unsigned writemask:1; /**< allow depth buffer writes? */ + unsigned func:3; /**< depth test func (PIPE_FUNC_x) */ + unsigned occlusion_count:1; /**< do occlusion counting? */ +}; + + +struct pipe_stencil_state { + unsigned enabled:1; /**< stencil[0]: stencil enabled, stencil[1]: two-side enabled */ + unsigned func:3; /**< PIPE_FUNC_x */ + unsigned fail_op:3; /**< PIPE_STENCIL_OP_x */ + unsigned zpass_op:3; /**< PIPE_STENCIL_OP_x */ + unsigned zfail_op:3; /**< PIPE_STENCIL_OP_x */ + ubyte ref_value; + ubyte value_mask; + ubyte write_mask; +}; + + +struct pipe_alpha_state { + unsigned enabled:1; + unsigned func:3; /**< PIPE_FUNC_x */ + float ref; /**< reference value */ +}; + + +struct pipe_depth_stencil_alpha_state +{ + struct pipe_depth_state depth; + struct pipe_stencil_state stencil[2]; /**< [0] = front, [1] = back */ + struct pipe_alpha_state alpha; +}; + + +struct pipe_blend_state +{ + unsigned blend_enable:1; + + unsigned rgb_func:3; /**< PIPE_BLEND_x */ + unsigned rgb_src_factor:5; /**< PIPE_BLENDFACTOR_x */ + unsigned rgb_dst_factor:5; /**< PIPE_BLENDFACTOR_x */ + + unsigned alpha_func:3; /**< PIPE_BLEND_x */ + unsigned alpha_src_factor:5; /**< PIPE_BLENDFACTOR_x */ + unsigned alpha_dst_factor:5; /**< PIPE_BLENDFACTOR_x */ + + unsigned logicop_enable:1; + unsigned logicop_func:4; /**< PIPE_LOGICOP_x */ + + unsigned colormask:4; /**< bitmask of PIPE_MASK_R/G/B/A */ + unsigned dither:1; +}; + + +struct pipe_blend_color +{ + float color[4]; +}; + + +struct pipe_framebuffer_state +{ + unsigned width, height; + + /** multiple colorbuffers for multiple render targets */ + unsigned num_cbufs; + struct pipe_surface *cbufs[PIPE_MAX_COLOR_BUFS]; + + struct pipe_surface *zsbuf; /**< Z/stencil buffer */ +}; + + +/** + * Texture sampler state. + */ +struct pipe_sampler_state +{ + unsigned wrap_s:3; /**< PIPE_TEX_WRAP_x */ + unsigned wrap_t:3; /**< PIPE_TEX_WRAP_x */ + unsigned wrap_r:3; /**< PIPE_TEX_WRAP_x */ + unsigned min_img_filter:2; /**< PIPE_TEX_FILTER_x */ + unsigned min_mip_filter:2; /**< PIPE_TEX_MIPFILTER_x */ + unsigned mag_img_filter:2; /**< PIPE_TEX_FILTER_x */ + unsigned compare_mode:1; /**< PIPE_TEX_COMPARE_x */ + unsigned compare_func:3; /**< PIPE_FUNC_x */ + unsigned normalized_coords:1; /**< Are coords normalized to [0,1]? */ + unsigned prefilter:4; /**< Wierd sampling state exposed by some api's */ + float shadow_ambient; /**< shadow test fail color/intensity */ + float lod_bias; /**< LOD/lambda bias */ + float min_lod, max_lod; /**< LOD clamp range, after bias */ + float border_color[4]; + float max_anisotropy; +}; + + +/** + * 2D surface. This is basically a view into a memory buffer. + * May be a renderbuffer, texture mipmap level, etc. + */ +struct pipe_surface +{ + struct pipe_buffer *buffer; /**< surface's buffer/memory */ + enum pipe_format format; /**< PIPE_FORMAT_x */ + unsigned status; /**< PIPE_SURFACE_STATUS_x */ + unsigned clear_value; /**< XXX may be temporary */ + unsigned width; /**< logical width in pixels */ + unsigned height; /**< logical height in pixels */ + struct pipe_format_block block; + unsigned nblocksx; /**< allocated width in blocks */ + unsigned nblocksy; /**< allocated height in blocks */ + unsigned stride; /**< stride in bytes between rows of blocks */ + unsigned layout; /**< PIPE_SURFACE_LAYOUT_x */ + unsigned offset; /**< offset from start of buffer, in bytes */ + unsigned refcount; + unsigned usage; /**< PIPE_BUFFER_USAGE_* */ + + struct pipe_winsys *winsys; /**< winsys which owns/created the surface */ + + struct pipe_texture *texture; /**< optional texture into which this is a view */ + unsigned face; + unsigned level; + unsigned zslice; +}; + + +/** + * Texture object. + */ +struct pipe_texture +{ + enum pipe_texture_target target; /**< PIPE_TEXTURE_x */ + enum pipe_format format; /**< PIPE_FORMAT_x */ + + unsigned width[PIPE_MAX_TEXTURE_LEVELS]; + unsigned height[PIPE_MAX_TEXTURE_LEVELS]; + unsigned depth[PIPE_MAX_TEXTURE_LEVELS]; + + struct pipe_format_block block; + unsigned nblocksx[PIPE_MAX_TEXTURE_LEVELS]; /**< allocated width in blocks */ + unsigned nblocksy[PIPE_MAX_TEXTURE_LEVELS]; /**< allocated height in blocks */ + + unsigned last_level:8; /**< Index of last mipmap level present/defined */ + unsigned compressed:1; + + unsigned nr_samples:8; /**< for multisampled surfaces, nr of samples */ + + unsigned tex_usage; /* PIPE_TEXTURE_USAGE_* */ + + /* These are also refcounted: + */ + unsigned refcount; + + struct pipe_screen *screen; /**< screen that this texture belongs to */ +}; + + +/** + * A vertex buffer. Typically, all the vertex data/attributes for + * drawing something will be in one buffer. But it's also possible, for + * example, to put colors in one buffer and texcoords in another. + */ +struct pipe_vertex_buffer +{ + unsigned pitch; /**< stride to same attrib in next vertex, in bytes */ + unsigned max_index; /**< number of vertices in this buffer */ + unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */ + struct pipe_buffer *buffer; /**< the actual buffer */ +}; + + +/** + * Information to describe a vertex attribute (position, color, etc) + */ +struct pipe_vertex_element +{ + /** Offset of this attribute, in bytes, from the start of the vertex */ + unsigned src_offset; + + /** Which vertex_buffer (as given to pipe->set_vertex_buffer()) does + * this attribute live in? + */ + unsigned vertex_buffer_index:8; + unsigned nr_components:8; + + enum pipe_format src_format; /**< PIPE_FORMAT_* */ +}; + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h new file mode 100644 index 0000000000..8af3cd958b --- /dev/null +++ b/src/gallium/include/pipe/p_thread.h @@ -0,0 +1,276 @@ +/************************************************************************** + * + * Copyright 1999-2006 Brian Paul + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * + * Thread, mutex, condition var and thread-specific data functions. + */ + + +#ifndef _P_THREAD2_H_ +#define _P_THREAD2_H_ + + +#include "pipe/p_compiler.h" + + +#if defined(PIPE_OS_LINUX) + +#include <pthread.h> /* POSIX threads headers */ +#include <stdio.h> /* for perror() */ + +typedef pthread_t pipe_thread; + +#define PIPE_THREAD_ROUTINE( name, param ) \ + void *name( void *param ) + +static INLINE pipe_thread pipe_thread_create( void *(* routine)( void *), void *param ) +{ + pipe_thread thread; + if (pthread_create( &thread, NULL, routine, param )) + return 0; + return thread; +} + +static INLINE int pipe_thread_wait( pipe_thread thread ) +{ + return pthread_join( thread, NULL ); +} + +static INLINE int pipe_thread_destroy( pipe_thread thread ) +{ + return pthread_detach( thread ); +} + +typedef pthread_mutex_t pipe_mutex; +typedef pthread_cond_t pipe_condvar; + +#define pipe_static_mutex(mutex) \ + static pipe_mutex mutex = PTHREAD_MUTEX_INITIALIZER + +#define pipe_mutex_init(mutex) \ + pthread_mutex_init(&(mutex), NULL) + +#define pipe_mutex_destroy(mutex) \ + pthread_mutex_destroy(&(mutex)) + +#define pipe_mutex_lock(mutex) \ + (void) pthread_mutex_lock(&(mutex)) + +#define pipe_mutex_unlock(mutex) \ + (void) pthread_mutex_unlock(&(mutex)) + +#define pipe_static_condvar(mutex) \ + static pipe_condvar mutex = PTHREAD_COND_INITIALIZER + +#define pipe_condvar_init(cond) \ + pthread_cond_init(&(cond), NULL) + +#define pipe_condvar_destroy(cond) \ + pthread_cond_destroy(&(cond)) + +#define pipe_condvar_wait(cond, mutex) \ + pthread_cond_wait(&(cond), &(mutex)) + +#define pipe_condvar_signal(cond) \ + pthread_cond_signal(&(cond)) + +#define pipe_condvar_broadcast(cond) \ + pthread_cond_broadcast(&(cond)) + + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + +#include <windows.h> + +typedef HANDLE pipe_thread; + +#define PIPE_THREAD_ROUTINE( name, param ) \ + void * WINAPI name( void *param ) + +static INLINE pipe_thread pipe_thread_create( void *(WINAPI * routine)( void *), void *param ) +{ + DWORD id; + return CreateThread( NULL, 0, (LPTHREAD_START_ROUTINE) routine, param, 0, &id ); +} + +static INLINE int pipe_thread_wait( pipe_thread thread ) +{ + if (WaitForSingleObject( thread, INFINITE ) == WAIT_OBJECT_0) + return 0; + return -1; +} + +static INLINE int pipe_thread_destroy( pipe_thread thread ) +{ + if (CloseHandle( thread )) + return 0; + return -1; +} + +typedef CRITICAL_SECTION pipe_mutex; + +#define pipe_static_mutex(name) \ + /*static*/ pipe_mutex name = {0,0,0,0,0,0} + +#define pipe_mutex_init(name) \ + InitializeCriticalSection(&name) + +#define pipe_mutex_destroy(name) \ + DeleteCriticalSection(&name) + +#define pipe_mutex_lock(name) \ + EnterCriticalSection(&name) + +#define pipe_mutex_unlock(name) \ + LeaveCriticalSection(&name) + +/* XXX: dummy definitions, make it compile */ + +typedef unsigned pipe_condvar; + +#define pipe_condvar_init(condvar) \ + (void) condvar + +#define pipe_condvar_broadcast(condvar) \ + (void) condvar + +#else + +/** Dummy definitions */ + +typedef unsigned pipe_thread; +typedef unsigned pipe_mutex; +typedef unsigned pipe_condvar; + +#define pipe_static_mutex(mutex) \ + static pipe_mutex mutex = 0 + +#define pipe_mutex_init(mutex) \ + (void) mutex + +#define pipe_mutex_destroy(mutex) \ + (void) mutex + +#define pipe_mutex_lock(mutex) \ + (void) mutex + +#define pipe_mutex_unlock(mutex) \ + (void) mutex + +#define pipe_static_condvar(condvar) \ + static unsigned condvar = 0 + +#define pipe_condvar_init(condvar) \ + (void) condvar + +#define pipe_condvar_destroy(condvar) \ + (void) condvar + +#define pipe_condvar_wait(condvar, mutex) \ + (void) condvar + +#define pipe_condvar_signal(condvar) \ + (void) condvar + +#define pipe_condvar_broadcast(condvar) \ + (void) condvar + + +#endif /* PIPE_OS_? */ + + + +/* + * Thread-specific data. + */ + +typedef struct { +#if defined(PIPE_OS_LINUX) + pthread_key_t key; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + DWORD key; +#endif + int initMagic; +} pipe_tsd; + + +#define PIPE_TSD_INIT_MAGIC 0xff8adc98 + + +static INLINE void +pipe_tsd_init(pipe_tsd *tsd) +{ +#if defined(PIPE_OS_LINUX) + if (pthread_key_create(&tsd->key, NULL/*free*/) != 0) { + perror("pthread_key_create(): failed to allocate key for thread specific data"); + exit(-1); + } +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + assert(0); +#endif + tsd->initMagic = PIPE_TSD_INIT_MAGIC; +} + +static INLINE void * +pipe_tsd_get(pipe_tsd *tsd) +{ + if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) { + pipe_tsd_init(tsd); + } +#if defined(PIPE_OS_LINUX) + return pthread_getspecific(tsd->key); +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + assert(0); + return NULL; +#else + assert(0); + return NULL; +#endif +} + +static INLINE void +pipe_tsd_set(pipe_tsd *tsd, void *value) +{ + if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) { + pipe_tsd_init(tsd); + } +#if defined(PIPE_OS_LINUX) + if (pthread_setspecific(tsd->key, value) != 0) { + perror("pthread_set_specific() failed"); + exit(-1); + } +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + assert(0); +#else + assert(0); +#endif +} + + + +#endif /* _P_THREAD2_H_ */ diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/p_winsys.h new file mode 100644 index 0000000000..5d18291dc6 --- /dev/null +++ b/src/gallium/include/pipe/p_winsys.h @@ -0,0 +1,186 @@ + /************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * This is the interface that Gallium3D requires any window system + * hosting it to implement. This is the only include file in Gallium3D + * which is public. + */ + +#ifndef P_WINSYS_H +#define P_WINSYS_H + + +#include "p_format.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** Opaque type */ +struct pipe_fence_handle; + +struct pipe_surface; + + +/** + * Gallium3D drivers are (meant to be!) independent of both GL and the + * window system. The window system provides a buffer manager and a + * set of additional hooks for things like command buffer submission, + * etc. + * + * There clearly has to be some agreement between the window system + * driver and the hardware driver about the format of command buffers, + * etc. + */ +struct pipe_winsys +{ + void (*destroy)( struct pipe_winsys *ws ); + + /** Returns name of this winsys interface */ + const char *(*get_name)( struct pipe_winsys *ws ); + + /** + * Do any special operations to ensure frontbuffer contents are + * displayed, eg copy fake frontbuffer. + */ + void (*flush_frontbuffer)( struct pipe_winsys *ws, + struct pipe_surface *surf, + void *context_private ); + + + /** allocate a new surface (no context dependency) */ + struct pipe_surface *(*surface_alloc)(struct pipe_winsys *ws); + + /** + * Allocate storage for a pipe_surface. + * \param flags XXX unused, remove someday + * \return 0 if succeeds. + */ + int (*surface_alloc_storage)(struct pipe_winsys *ws, + struct pipe_surface *surf, + unsigned width, unsigned height, + enum pipe_format format, + unsigned flags, + unsigned tex_usage); + + void (*surface_release)(struct pipe_winsys *ws, struct pipe_surface **s); + + + /** + * Buffer management. Buffer attributes are mostly fixed over its lifetime. + * + * Remember that gallium gets to choose the interface it needs, and the + * window systems must then implement that interface (rather than the + * other way around...). + * + * usage is a bitmask of PIPE_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This + * usage argument is only an optimization hint, not a guarantee, therefore + * proper behavior must be observed in all circumstances. + * + * alignment indicates the client's alignment requirements, eg for + * SSE instructions. + */ + struct pipe_buffer *(*buffer_create)( struct pipe_winsys *ws, + unsigned alignment, + unsigned usage, + unsigned size ); + + /** + * Create a buffer that wraps user-space data. + * + * Effectively this schedules a delayed call to buffer_create + * followed by an upload of the data at *some point in the future*, + * or perhaps never. Basically the allocate/upload is delayed + * until the buffer is actually passed to hardware. + * + * The intention is to provide a quick way to turn regular data + * into a buffer, and secondly to avoid a copy operation if that + * data subsequently turns out to be only accessed by the CPU. + * + * Common example is OpenGL vertex buffers that are subsequently + * processed either by software TNL in the driver or by passing to + * hardware. + * + * XXX: What happens if the delayed call to buffer_create() fails? + * + * Note that ptr may be accessed at any time upto the time when the + * buffer is destroyed, so the data must not be freed before then. + */ + struct pipe_buffer *(*user_buffer_create)(struct pipe_winsys *ws, + void *ptr, + unsigned bytes); + + /** + * Map the entire data store of a buffer object into the client's address. + * flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags. + */ + void *(*buffer_map)( struct pipe_winsys *ws, + struct pipe_buffer *buf, + unsigned usage ); + + void (*buffer_unmap)( struct pipe_winsys *ws, + struct pipe_buffer *buf ); + + void (*buffer_destroy)( struct pipe_winsys *ws, + struct pipe_buffer *buf ); + + + /** Set ptr = fence, with reference counting */ + void (*fence_reference)( struct pipe_winsys *ws, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence ); + + /** + * Checks whether the fence has been signalled. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_signalled)( struct pipe_winsys *ws, + struct pipe_fence_handle *fence, + unsigned flag ); + + /** + * Wait for the fence to finish. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_finish)( struct pipe_winsys *ws, + struct pipe_fence_handle *fence, + unsigned flag ); + +}; + + +#ifdef __cplusplus +} +#endif + +#endif /* P_WINSYS_H */ diff --git a/src/gallium/state_trackers/README b/src/gallium/state_trackers/README new file mode 100644 index 0000000000..28dd27bbd5 --- /dev/null +++ b/src/gallium/state_trackers/README @@ -0,0 +1,2 @@ +This directory is a placeholder for incubating state-trackers. Mesa's +state-tracker is in src/mesa. diff --git a/src/gallium/state_trackers/python/README b/src/gallium/state_trackers/python/README new file mode 100644 index 0000000000..8f45fb6d1b --- /dev/null +++ b/src/gallium/state_trackers/python/README @@ -0,0 +1,33 @@ +This directory contains Python bindings to Gallium3D. It looks like a state +tracker from the pipe driver perspective, and it looks like a pipe driver from +the python script perspective. + + +To build you'll need: +* Python (with development packages) +* SCons +* SWIG +* Python Imaging Library with TK support (for the samples) + +Invoke scons on the top dir as + + scons statetrackers=python + +To use do + + export PYTHONPATH=build/XXXX-XXXX-XXXX/gallium/state_trackers/python + +and then try running + + python src/gallium/state_trackers/python/samples/tri.py + +which should show a triangle. + + +This is still work in progress: +- errors are not handled properly and almost always result in crash +- state atoms with array members are awkward to set +- there no efficient way to view images + +-- +Jose Fonseca <jrfonseca@tungstengraphics.com> diff --git a/src/gallium/state_trackers/python/SConscript b/src/gallium/state_trackers/python/SConscript new file mode 100644 index 0000000000..1581182aec --- /dev/null +++ b/src/gallium/state_trackers/python/SConscript @@ -0,0 +1,34 @@ +import sys +import os.path + +Import('*') + +if 'python' in env['statetrackers']: + + env = env.Clone() + + env.Tool('python') + + env.Tool('swig') + env.Append(SWIGPATH = ['#src/gallium/include', '#src/gallium/include/pipe']) + env.Append(SWIGFLAGS = ['-python', '-keyword']) + + env.Append(CPPPATH = '.') + + pyst = env.ConvenienceLibrary( + target = 'pyst', + source = [ + 'gallium.i', + 'st_device.c', + 'st_sample.c', + 'st_softpipe_winsys.c', + ], + ) + + env.SharedLibrary( + target = '_gallium', + source = [ + 'st_hardpipe_winsys.c', + ], + LIBS = [pyst, softpipe, trace] + auxiliaries + env['LIBS'], + ) diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i new file mode 100644 index 0000000000..68d2db3325 --- /dev/null +++ b/src/gallium/state_trackers/python/gallium.i @@ -0,0 +1,101 @@ + /************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * SWIG interface definion for Gallium types. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +%module gallium; + +%{ + +#include <stdio.h> +#include <Python.h> + +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" +#include "cso_cache/cso_context.h" +#include "util/u_draw_quad.h" +#include "util/u_tile.h" +#include "tgsi/tgsi_text.h" +#include "tgsi/tgsi_dump.h" + +#include "st_device.h" +#include "st_sample.h" + +%} + +%include "typemaps.i" + +%include "carrays.i" +%array_class(unsigned char, ByteArray); +%array_class(int, IntArray); +%array_class(unsigned, UnsignedArray); +%array_class(float, FloatArray); + + +%rename(Device) st_device; +%rename(Context) st_context; +%rename(Texture) pipe_texture; +%rename(Surface) pipe_surface; +%rename(Buffer) st_buffer; + +%rename(BlendColor) pipe_blend_color; +%rename(Blend) pipe_blend_state; +%rename(Clip) pipe_clip_state; +%rename(ConstantBuffer) pipe_constant_buffer; +%rename(Depth) pipe_depth_state; +%rename(Stencil) pipe_stencil_state; +%rename(Alpha) pipe_alpha_state; +%rename(DepthStencilAlpha) pipe_depth_stencil_alpha_state; +%rename(FormatBlock) pipe_format_block; +%rename(Framebuffer) pipe_framebuffer_state; +%rename(PolyStipple) pipe_poly_stipple; +%rename(Rasterizer) pipe_rasterizer_state; +%rename(Sampler) pipe_sampler_state; +%rename(Scissor) pipe_scissor_state; +%rename(Shader) pipe_shader_state; +%rename(VertexBuffer) pipe_vertex_buffer; +%rename(VertexElement) pipe_vertex_element; +%rename(Viewport) pipe_viewport_state; + + +%include "p_compiler.i" +%include "pipe/p_defines.h"; +%include "p_format.i" + +%include "p_device.i" +%include "p_context.i" +%include "p_texture.i" +%include "p_state.i" + diff --git a/src/gallium/state_trackers/python/p_compiler.i b/src/gallium/state_trackers/python/p_compiler.i new file mode 100644 index 0000000000..15f6ba5b9d --- /dev/null +++ b/src/gallium/state_trackers/python/p_compiler.i @@ -0,0 +1,29 @@ + /************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +typedef unsigned char ubyte; diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i new file mode 100644 index 0000000000..1fdcec639f --- /dev/null +++ b/src/gallium/state_trackers/python/p_context.i @@ -0,0 +1,289 @@ + /************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * SWIG interface definion for Gallium types. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +%nodefaultctor st_context; +%nodefaultdtor st_context; + +struct st_context { +}; + +%extend st_context { + + ~st_context() { + st_context_destroy($self); + } + + /* + * State functions (create/bind/destroy state objects) + */ + + void set_blend( const struct pipe_blend_state *state ) { + cso_set_blend($self->cso, state); + } + + void set_sampler( unsigned index, const struct pipe_sampler_state *state ) { + cso_single_sampler($self->cso, index, state); + cso_single_sampler_done($self->cso); + } + + void set_rasterizer( const struct pipe_rasterizer_state *state ) { + cso_set_rasterizer($self->cso, state); + } + + void set_depth_stencil_alpha(const struct pipe_depth_stencil_alpha_state *state) { + cso_set_depth_stencil_alpha($self->cso, state); + } + + void set_fragment_shader( const struct pipe_shader_state *state ) { + void *fs; + + if(!state) { + cso_set_fragment_shader_handle($self->cso, NULL); + return; + } + + fs = $self->pipe->create_fs_state($self->pipe, state); + if(!fs) + return; + + if(cso_set_fragment_shader_handle($self->cso, fs) != PIPE_OK) + return; + + cso_delete_fragment_shader($self->cso, $self->fs); + $self->fs = fs; + } + + void set_vertex_shader( const struct pipe_shader_state *state ) { + void *vs; + + if(!state) { + cso_set_vertex_shader_handle($self->cso, NULL); + return; + } + + vs = $self->pipe->create_vs_state($self->pipe, state); + if(!vs) + return; + + if(cso_set_vertex_shader_handle($self->cso, vs) != PIPE_OK) + return; + + cso_delete_vertex_shader($self->cso, $self->vs); + $self->vs = vs; + } + + /* + * Parameter-like state (or properties) + */ + + void set_blend_color(const struct pipe_blend_color *state ) { + cso_set_blend_color($self->cso, state); + } + + void set_clip(const struct pipe_clip_state *state ) { + $self->pipe->set_clip_state($self->pipe, state); + } + + void set_constant_buffer(unsigned shader, unsigned index, + struct st_buffer *buffer ) + { + struct pipe_constant_buffer state; + memset(&state, 0, sizeof(state)); + state.buffer = buffer ? buffer->buffer : NULL; + state.size = buffer->buffer->size; + $self->pipe->set_constant_buffer($self->pipe, shader, index, &state); + } + + void set_framebuffer(const struct pipe_framebuffer_state *state ) { + cso_set_framebuffer($self->cso, state); + } + + void set_polygon_stipple(const struct pipe_poly_stipple *state ) { + $self->pipe->set_polygon_stipple($self->pipe, state); + } + + void set_scissor(const struct pipe_scissor_state *state ) { + $self->pipe->set_scissor_state($self->pipe, state); + } + + void set_viewport(const struct pipe_viewport_state *state) { + cso_set_viewport($self->cso, state); + } + + void set_sampler_texture(unsigned index, + struct pipe_texture *texture) { + if(!texture) + texture = $self->default_texture; + pipe_texture_reference(&$self->sampler_textures[index], texture); + $self->pipe->set_sampler_textures($self->pipe, + PIPE_MAX_SAMPLERS, + $self->sampler_textures); + } + + void set_vertex_buffer(unsigned index, + unsigned pitch, + unsigned max_index, + unsigned buffer_offset, + struct st_buffer *buffer) + { + unsigned i; + struct pipe_vertex_buffer state; + + memset(&state, 0, sizeof(state)); + state.pitch = pitch; + state.max_index = max_index; + state.buffer_offset = buffer_offset; + state.buffer = buffer ? buffer->buffer : NULL; + + memcpy(&$self->vertex_buffers[index], &state, sizeof(state)); + + for(i = 0; i < PIPE_MAX_ATTRIBS; ++i) + if(self->vertex_buffers[i].buffer) + $self->num_vertex_buffers = i + 1; + + $self->pipe->set_vertex_buffers($self->pipe, + $self->num_vertex_buffers, + $self->vertex_buffers); + } + + void set_vertex_element(unsigned index, + const struct pipe_vertex_element *element) + { + memcpy(&$self->vertex_elements[index], element, sizeof(*element)); + } + + void set_vertex_elements(unsigned num) + { + $self->num_vertex_elements = num; + $self->pipe->set_vertex_elements($self->pipe, + $self->num_vertex_elements, + $self->vertex_elements); + } + + /* + * Draw functions + */ + + void draw_arrays(unsigned mode, unsigned start, unsigned count) { + $self->pipe->draw_arrays($self->pipe, mode, start, count); + } + + void draw_elements( struct st_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) + { + $self->pipe->draw_elements($self->pipe, + indexBuffer->buffer, + indexSize, + mode, start, count); + } + + void draw_range_elements( struct st_buffer *indexBuffer, + unsigned indexSize, unsigned minIndex, unsigned maxIndex, + unsigned mode, unsigned start, unsigned count) + { + $self->pipe->draw_range_elements($self->pipe, + indexBuffer->buffer, + indexSize, minIndex, maxIndex, + mode, start, count); + } + + void draw_vertices(unsigned prim, + unsigned num_verts, + unsigned num_attribs, + const float *vertices) + { + struct pipe_context *pipe = $self->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_buffer *vbuf; + float *map; + unsigned size; + + size = num_verts * num_attribs * 4 * sizeof(float); + + vbuf = pipe_buffer_create(screen, + 32, + PIPE_BUFFER_USAGE_VERTEX, + size); + if(!vbuf) + goto error1; + + map = pipe_buffer_map(screen, vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); + if (!map) + goto error2; + memcpy(map, vertices, size); + pipe_buffer_unmap(screen, vbuf); + + util_draw_vertex_buffer(pipe, vbuf, prim, num_verts, num_attribs); + +error2: + pipe_buffer_reference(screen, &vbuf, NULL); +error1: + ; + } + + void + flush(unsigned flags = 0) { + struct pipe_fence_handle *fence = NULL; + $self->pipe->flush($self->pipe, flags | PIPE_FLUSH_RENDER_CACHE, &fence); + /* TODO: allow asynchronous operation */ + $self->pipe->winsys->fence_finish( $self->pipe->winsys, fence, 0 ); + $self->pipe->winsys->fence_reference( $self->pipe->winsys, &fence, NULL ); + } + + /* + * Surface functions + */ + + void surface_copy(int do_flip, + struct pipe_surface *dest, + unsigned destx, unsigned desty, + struct pipe_surface *src, + unsigned srcx, unsigned srcy, + unsigned width, unsigned height) { + $self->pipe->surface_copy($self->pipe, do_flip, dest, destx, desty, src, srcx, srcy, width, height); + } + + void surface_fill(struct pipe_surface *dst, + unsigned x, unsigned y, + unsigned width, unsigned height, + unsigned value) { + $self->pipe->surface_fill($self->pipe, dst, x, y, width, height, value); + } + + void surface_clear(struct pipe_surface *surface, unsigned value = 0) { + $self->pipe->clear($self->pipe, surface, value); + } + +}; diff --git a/src/gallium/state_trackers/python/p_device.i b/src/gallium/state_trackers/python/p_device.i new file mode 100644 index 0000000000..84fd2e4349 --- /dev/null +++ b/src/gallium/state_trackers/python/p_device.i @@ -0,0 +1,130 @@ + /************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * SWIG interface definion for Gallium types. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + + +%nodefaultctor st_device; +%nodefaultdtor st_device; + + +struct st_device { +}; + +%newobject st_device::texture_create; +%newobject st_device::context_create; +%newobject st_device::buffer_create; + +%extend st_device { + + st_device(int hardware = 1) { + return st_device_create(hardware ? TRUE : FALSE); + } + + ~st_device() { + st_device_destroy($self); + } + + const char * get_name( void ) { + return $self->screen->get_name($self->screen); + } + + const char * get_vendor( void ) { + return $self->screen->get_vendor($self->screen); + } + + /** + * Query an integer-valued capability/parameter/limit + * \param param one of PIPE_CAP_x + */ + int get_param( int param ) { + return $self->screen->get_param($self->screen, param); + } + + /** + * Query a float-valued capability/parameter/limit + * \param param one of PIPE_CAP_x + */ + float get_paramf( int param ) { + return $self->screen->get_paramf($self->screen, param); + } + + /** + * Check if the given pipe_format is supported as a texture or + * drawing surface. + * \param type one of PIPE_TEXTURE, PIPE_SURFACE + */ + int is_format_supported( enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) { + return $self->screen->is_format_supported( $self->screen, + format, + target, + tex_usage, + geom_flags ); + } + + struct st_context * + context_create(void) { + return st_context_create($self); + } + + struct pipe_texture * + texture_create( + enum pipe_format format, + unsigned width, + unsigned height, + unsigned depth = 1, + unsigned last_level = 0, + enum pipe_texture_target target = PIPE_TEXTURE_2D, + unsigned tex_usage = 0 + ) { + struct pipe_texture templat; + memset(&templat, 0, sizeof(templat)); + templat.format = format; + pf_get_block(templat.format, &templat.block); + templat.width[0] = width; + templat.height[0] = height; + templat.depth[0] = depth; + templat.last_level = last_level; + templat.target = target; + templat.tex_usage = tex_usage; + return $self->screen->texture_create($self->screen, &templat); + } + + struct st_buffer * + buffer_create(unsigned size, unsigned alignment = 0, unsigned usage = 0) { + return st_buffer_create($self, alignment, usage, size); + } + +}; diff --git a/src/gallium/state_trackers/python/p_format.i b/src/gallium/state_trackers/python/p_format.i new file mode 100644 index 0000000000..51ad4bebcd --- /dev/null +++ b/src/gallium/state_trackers/python/p_format.i @@ -0,0 +1,152 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * XXX: SWIG can't parse p_format.h, so we need to duplicate the relevant + * declarations here + */ + +%{ +#include "pipe/p_format.h" +%} + +enum pipe_format { + PIPE_FORMAT_NONE, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_X8R8G8B8_UNORM, + PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_B8G8R8X8_UNORM, + PIPE_FORMAT_A1R5G5B5_UNORM, + PIPE_FORMAT_A4R4G4B4_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_A2B10G10R10_UNORM, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_A8L8_UNORM, + PIPE_FORMAT_L16_UNORM, + PIPE_FORMAT_YCBCR, + PIPE_FORMAT_YCBCR_REV, + PIPE_FORMAT_Z16_UNORM, + PIPE_FORMAT_Z32_UNORM, + PIPE_FORMAT_Z32_FLOAT, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_Z24S8_UNORM, + PIPE_FORMAT_X8Z24_UNORM, + PIPE_FORMAT_Z24X8_UNORM, + PIPE_FORMAT_S8_UNORM, + PIPE_FORMAT_R64_FLOAT, + PIPE_FORMAT_R64G64_FLOAT, + PIPE_FORMAT_R64G64B64_FLOAT, + PIPE_FORMAT_R64G64B64A64_FLOAT, + PIPE_FORMAT_R32_FLOAT, + PIPE_FORMAT_R32G32_FLOAT, + PIPE_FORMAT_R32G32B32_FLOAT, + PIPE_FORMAT_R32G32B32A32_FLOAT, + PIPE_FORMAT_R32_UNORM, + PIPE_FORMAT_R32G32_UNORM, + PIPE_FORMAT_R32G32B32_UNORM, + PIPE_FORMAT_R32G32B32A32_UNORM, + PIPE_FORMAT_R32_USCALED, + PIPE_FORMAT_R32G32_USCALED, + PIPE_FORMAT_R32G32B32_USCALED, + PIPE_FORMAT_R32G32B32A32_USCALED, + PIPE_FORMAT_R32_SNORM, + PIPE_FORMAT_R32G32_SNORM, + PIPE_FORMAT_R32G32B32_SNORM, + PIPE_FORMAT_R32G32B32A32_SNORM, + PIPE_FORMAT_R32_SSCALED, + PIPE_FORMAT_R32G32_SSCALED, + PIPE_FORMAT_R32G32B32_SSCALED, + PIPE_FORMAT_R32G32B32A32_SSCALED, + PIPE_FORMAT_R16_UNORM, + PIPE_FORMAT_R16G16_UNORM, + PIPE_FORMAT_R16G16B16_UNORM, + PIPE_FORMAT_R16G16B16A16_UNORM, + PIPE_FORMAT_R16_USCALED, + PIPE_FORMAT_R16G16_USCALED, + PIPE_FORMAT_R16G16B16_USCALED, + PIPE_FORMAT_R16G16B16A16_USCALED, + PIPE_FORMAT_R16_SNORM, + PIPE_FORMAT_R16G16_SNORM, + PIPE_FORMAT_R16G16B16_SNORM, + PIPE_FORMAT_R16G16B16A16_SNORM, + PIPE_FORMAT_R16_SSCALED, + PIPE_FORMAT_R16G16_SSCALED, + PIPE_FORMAT_R16G16B16_SSCALED, + PIPE_FORMAT_R16G16B16A16_SSCALED, + PIPE_FORMAT_R8_UNORM, + PIPE_FORMAT_R8G8_UNORM, + PIPE_FORMAT_R8G8B8_UNORM, + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_R8G8B8X8_UNORM, + PIPE_FORMAT_R8_USCALED, + PIPE_FORMAT_R8G8_USCALED, + PIPE_FORMAT_R8G8B8_USCALED, + PIPE_FORMAT_R8G8B8A8_USCALED, + PIPE_FORMAT_R8G8B8X8_USCALED, + PIPE_FORMAT_R8_SNORM, + PIPE_FORMAT_R8G8_SNORM, + PIPE_FORMAT_R8G8B8_SNORM, + PIPE_FORMAT_R8G8B8A8_SNORM, + PIPE_FORMAT_R8G8B8X8_SNORM, + PIPE_FORMAT_B6G5R5_SNORM, + PIPE_FORMAT_A8B8G8R8_SNORM, + PIPE_FORMAT_X8B8G8R8_SNORM, + PIPE_FORMAT_R8_SSCALED, + PIPE_FORMAT_R8G8_SSCALED, + PIPE_FORMAT_R8G8B8_SSCALED, + PIPE_FORMAT_R8G8B8A8_SSCALED, + PIPE_FORMAT_R8G8B8X8_SSCALED, + PIPE_FORMAT_R32_FIXED, + PIPE_FORMAT_R32G32_FIXED, + PIPE_FORMAT_R32G32B32_FIXED, + PIPE_FORMAT_R32G32B32A32_FIXED, + + PIPE_FORMAT_L8_SRGB, + PIPE_FORMAT_A8_L8_SRGB, + PIPE_FORMAT_R8G8B8_SRGB, + PIPE_FORMAT_R8G8B8A8_SRGB, + PIPE_FORMAT_R8G8B8X8_SRGB, + + PIPE_FORMAT_X8UB8UG8SR8S_NORM, + PIPE_FORMAT_B6UG5SR5S_NORM, + + PIPE_FORMAT_DXT1_RGB, + PIPE_FORMAT_DXT1_RGBA, + PIPE_FORMAT_DXT3_RGBA, + PIPE_FORMAT_DXT5_RGBA, +}; + + +struct pipe_format_block +{ + unsigned size; + unsigned width; + unsigned height; +}; + diff --git a/src/gallium/state_trackers/python/p_state.i b/src/gallium/state_trackers/python/p_state.i new file mode 100644 index 0000000000..7f5760b3b6 --- /dev/null +++ b/src/gallium/state_trackers/python/p_state.i @@ -0,0 +1,109 @@ + /************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * SWIG interface definion for Gallium types. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + +%module gallium; + +%ignore winsys; +%ignore pipe_vertex_buffer::buffer; + +%include "pipe/p_state.h"; + + +%array_class(struct pipe_stencil_state, StencilArray); + + +%extend pipe_framebuffer_state { + + pipe_framebuffer_state(void) { + return CALLOC_STRUCT(pipe_framebuffer_state); + } + + ~pipe_framebuffer_state() { + unsigned index; + for(index = 0; index < PIPE_MAX_COLOR_BUFS; ++index) + pipe_surface_reference(&$self->cbufs[index], NULL); + pipe_surface_reference(&$self->zsbuf, NULL); + FREE($self); + } + + void + set_cbuf(unsigned index, struct pipe_surface *surface) { + pipe_surface_reference(&$self->cbufs[index], surface); + } + + void + set_zsbuf(struct pipe_surface *surface) { + pipe_surface_reference(&$self->zsbuf, surface); + } + +}; + + +%extend pipe_shader_state { + + pipe_shader_state(const char *text, unsigned num_tokens = 1024) { + struct tgsi_token *tokens; + struct pipe_shader_state *shader; + + tokens = MALLOC(num_tokens * sizeof(struct tgsi_token)); + if(!tokens) + goto error1; + + if(tgsi_text_translate(text, tokens, num_tokens ) != TRUE) + goto error2; + + shader = CALLOC_STRUCT(pipe_shader_state); + if(!shader) + goto error3; + + shader->tokens = tokens; + + return shader; + +error3: +error2: + FREE(tokens); +error1: + return NULL; + } + + ~pipe_shader_state() { + FREE((void*)$self->tokens); + FREE($self); + } + + void dump(unsigned flags = 0) { + tgsi_dump($self->tokens, flags); + } +} diff --git a/src/gallium/state_trackers/python/p_texture.i b/src/gallium/state_trackers/python/p_texture.i new file mode 100644 index 0000000000..33fb3743cc --- /dev/null +++ b/src/gallium/state_trackers/python/p_texture.i @@ -0,0 +1,204 @@ + /************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * SWIG interface definion for Gallium types. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + + +%nodefaultctor pipe_texture; +%nodefaultctor pipe_surface; +%nodefaultctor st_buffer; + +%nodefaultdtor pipe_texture; +%nodefaultdtor pipe_surface; +%nodefaultdtor st_buffer; + +%ignore pipe_texture::screen; + +%ignore pipe_surface::winsys; +%immutable pipe_surface::texture; +%immutable pipe_surface::buffer; + +%newobject pipe_texture::get_surface; + + +%extend pipe_texture { + + ~pipe_texture() { + struct pipe_texture *ptr = $self; + pipe_texture_reference(&ptr, NULL); + } + + unsigned get_width(unsigned level=0) { + return $self->width[level]; + } + + unsigned get_height(unsigned level=0) { + return $self->height[level]; + } + + unsigned get_depth(unsigned level=0) { + return $self->depth[level]; + } + + unsigned get_nblocksx(unsigned level=0) { + return $self->nblocksx[level]; + } + + unsigned get_nblocksy(unsigned level=0) { + return $self->nblocksy[level]; + } + + /** Get a surface which is a "view" into a texture */ + struct pipe_surface * + get_surface(unsigned face=0, unsigned level=0, unsigned zslice=0, unsigned usage=0 ) + { + struct pipe_screen *screen = $self->screen; + return screen->get_tex_surface(screen, $self, face, level, zslice, usage); + } + +}; + + +%extend pipe_surface { + + ~pipe_surface() { + struct pipe_surface *ptr = $self; + pipe_surface_reference(&ptr, NULL); + } + + // gets mapped to pipe_surface_map automatically + void * map( unsigned flags ); + + // gets mapped to pipe_surface_unmap automatically + void unmap( void ); + + void + get_tile_raw(unsigned x, unsigned y, unsigned w, unsigned h, char *raw, unsigned stride) { + pipe_get_tile_raw($self, x, y, w, h, raw, stride); + } + + void + put_tile_raw(unsigned x, unsigned y, unsigned w, unsigned h, const char *raw, unsigned stride) { + pipe_put_tile_raw($self, x, y, w, h, raw, stride); + } + + void + get_tile_rgba(unsigned x, unsigned y, unsigned w, unsigned h, float *rgba) { + pipe_get_tile_rgba($self, x, y, w, h, rgba); + } + + void + put_tile_rgba(unsigned x, unsigned y, unsigned w, unsigned h, const float *rgba) { + pipe_put_tile_rgba($self, x, y, w, h, rgba); + } + + void + get_tile_z(unsigned x, unsigned y, unsigned w, unsigned h, unsigned *z) { + pipe_get_tile_z($self, x, y, w, h, z); + } + + void + put_tile_z(unsigned x, unsigned y, unsigned w, unsigned h, const unsigned *z) { + pipe_put_tile_z($self, x, y, w, h, z); + } + + void + sample_rgba(float *rgba) { + st_sample_surface($self, rgba); + } + + unsigned + compare_tile_rgba(unsigned x, unsigned y, unsigned w, unsigned h, const float *rgba, float tol = 0.0) + { + float *rgba2; + const float *p1; + const float *p2; + unsigned i, j, n; + + rgba2 = MALLOC(h*w*4*sizeof(float)); + if(!rgba2) + return ~0; + + pipe_get_tile_rgba($self, x, y, w, h, rgba2); + + p1 = rgba; + p2 = rgba2; + n = 0; + for(i = h*w; i; --i) { + unsigned differs = 0; + for(j = 4; j; --j) { + float delta = *p2++ - *p1++; + if (delta < -tol || delta > tol) + differs = 1; + } + n += differs; + } + + FREE(rgba2); + + return n; + } + +}; + +struct st_buffer { +}; + +%extend st_buffer { + + ~st_buffer() { + st_buffer_destroy($self); + } + + void write( const char *STRING, unsigned LENGTH, unsigned offset = 0) { + struct pipe_screen *screen = $self->st_dev->screen; + char *map; + + assert($self->buffer->refcount); + + if(offset > $self->buffer->size) { + PyErr_SetString(PyExc_ValueError, "offset must be smaller than buffer size"); + return; + } + + if(offset + LENGTH > $self->buffer->size) { + PyErr_SetString(PyExc_ValueError, "data length must fit inside the buffer"); + return; + } + + map = pipe_buffer_map(screen, $self->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + if(map) { + memcpy(map + offset, STRING, LENGTH); + pipe_buffer_unmap(screen, $self->buffer); + } + } +}; diff --git a/src/gallium/state_trackers/python/retrace/README b/src/gallium/state_trackers/python/retrace/README new file mode 100644 index 0000000000..822cd11404 --- /dev/null +++ b/src/gallium/state_trackers/python/retrace/README @@ -0,0 +1,17 @@ +This is an application written in python to replay the traces captured by the + trace pipe driver. + + +To use it follow the instructions in src/gallium/drivers/trace/README and +src/gallium/state_trackers/python/README, and then do + + python src/gallium/state_trackers/python/samples/retrace/interpreter.py filename.trace + + +This is still work in progress: +- not everything is captured/replayed + - surface/textures contents +- any tiny error will result in a crash + +-- +Jose Fonseca <jrfonseca@tungstengraphics.com> diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py new file mode 100755 index 0000000000..351a6e739b --- /dev/null +++ b/src/gallium/state_trackers/python/retrace/interpreter.py @@ -0,0 +1,480 @@ +#!/usr/bin/env python +############################################################################# +# +# Copyright 2008 Tungsten Graphics, Inc. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +############################################################################# + + +import sys +import gallium +import model +import parser + + +def make_image(surface): + pixels = gallium.FloatArray(surface.height*surface.width*4) + surface.get_tile_rgba(0, 0, surface.width, surface.height, pixels) + + import Image + outimage = Image.new( + mode='RGB', + size=(surface.width, surface.height), + color=(0,0,0)) + outpixels = outimage.load() + for y in range(0, surface.height): + for x in range(0, surface.width): + offset = (y*surface.width + x)*4 + r, g, b, a = [int(pixels[offset + ch]*255) for ch in range(4)] + outpixels[x, y] = r, g, b + return outimage + +def save_image(filename, surface): + outimage = make_image(surface) + outimage.save(filename, "PNG") + +def show_image(surface): + outimage = make_image(surface) + + import Tkinter as tk + from PIL import Image, ImageTk + root = tk.Tk() + + root.title('background image') + + image1 = ImageTk.PhotoImage(outimage) + w = image1.width() + h = image1.height() + x = 100 + y = 100 + root.geometry("%dx%d+%d+%d" % (w, h, x, y)) + panel1 = tk.Label(root, image=image1) + panel1.pack(side='top', fill='both', expand='yes') + panel1.image = image1 + root.mainloop() + + + + +class Struct: + """C-like struct""" + + # A basic Python class can pass as a C-like structure + pass + + +struct_factories = { + "pipe_blend_color": gallium.BlendColor, + "pipe_blend_state": gallium.Blend, + #"pipe_clip_state": gallium.Clip, + #"pipe_constant_buffer": gallium.ConstantBuffer, + "pipe_depth_state": gallium.Depth, + "pipe_stencil_state": gallium.Stencil, + "pipe_alpha_state": gallium.Alpha, + "pipe_depth_stencil_alpha_state": gallium.DepthStencilAlpha, + "pipe_format_block": gallium.FormatBlock, + #"pipe_framebuffer_state": gallium.Framebuffer, + "pipe_poly_stipple": gallium.PolyStipple, + "pipe_rasterizer_state": gallium.Rasterizer, + "pipe_sampler_state": gallium.Sampler, + "pipe_scissor_state": gallium.Scissor, + #"pipe_shader_state": gallium.Shader, + #"pipe_vertex_buffer": gallium.VertexBuffer, + "pipe_vertex_element": gallium.VertexElement, + "pipe_viewport_state": gallium.Viewport, + #"pipe_texture": gallium.Texture, +} + + +member_array_factories = { + "pipe_rasterizer_state": {"sprite_coord_mode": gallium.ByteArray}, + "pipe_poly_stipple": {"stipple": gallium.UnsignedArray}, + "pipe_viewport_state": {"scale": gallium.FloatArray, "translate": gallium.FloatArray}, + #"pipe_clip_state": {"ucp": gallium.FloatArray}, + "pipe_depth_stencil_alpha_state": {"stencil": gallium.StencilArray}, + "pipe_blend_color": {"color": gallium.FloatArray}, + "pipe_sampler_state": {"border_color": gallium.FloatArray}, +} + + +class Translator(model.Visitor): + """Translate model arguments into regular Python objects""" + + def __init__(self, interpreter): + self.interpreter = interpreter + self.result = None + + def visit(self, node): + self.result = None + node.visit(self) + return self.result + + def visit_literal(self, node): + self.result = node.value + + def visit_named_constant(self, node): + # lookup the named constant in the gallium module + self.result = getattr(gallium, node.name) + + def visit_array(self, node): + array = [] + for element in node.elements: + array.append(self.visit(element)) + self.result = array + + def visit_struct(self, node): + struct_factory = struct_factories.get(node.name, Struct) + struct = struct_factory() + for member_name, member_node in node.members: + member_value = self.visit(member_node) + try: + array_factory = member_array_factories[node.name][member_name] + except KeyError: + pass + else: + assert isinstance(member_value, list) + array = array_factory(len(member_value)) + for i in range(len(member_value)): + array[i] = member_value[i] + member_value = array + #print node.name, member_name, member_value + assert isinstance(struct, Struct) or hasattr(struct, member_name) + setattr(struct, member_name, member_value) + self.result = struct + + def visit_pointer(self, node): + self.result = self.interpreter.lookup_object(node.address) + + +class Object: + + def __init__(self, interpreter, real): + self.interpreter = interpreter + self.real = real + + +class Global(Object): + + def __init__(self, interpreter, real): + self.interpreter = interpreter + self.real = real + + def pipe_winsys_create(self): + return Winsys(self.interpreter, gallium.Device()) + + def pipe_screen_create(self, winsys): + return Screen(self.interpreter, winsys.real) + + def pipe_context_create(self, screen): + context = screen.real.context_create() + return Context(self.interpreter, context) + + +class Winsys(Object): + + def __init__(self, interpreter, real): + self.interpreter = interpreter + self.real = real + + def get_name(self): + pass + + def user_buffer_create(self, data, size): + # We don't really care to distinguish between user and regular buffers + buffer = self.real.buffer_create(size, + 4, + gallium.PIPE_BUFFER_USAGE_CPU_READ | + gallium.PIPE_BUFFER_USAGE_CPU_WRITE ) + buffer.write(data, size) + return buffer + + def buffer_create(self, alignment, usage, size): + return self.real.buffer_create(size, alignment, usage) + + def buffer_destroy(self, buffer): + pass + + def buffer_write(self, buffer, data, size): + buffer.write(data, size) + + def fence_finish(self, fence, flags): + pass + + def fence_reference(self, dst, src): + pass + + def flush_frontbuffer(self, surface): + pass + + def surface_alloc(self): + return None + + def surface_release(self, surface): + pass + + +class Screen(Object): + + def get_name(self): + pass + + def get_vendor(self): + pass + + def get_param(self, param): + pass + + def get_paramf(self, param): + pass + + def is_format_supported(self, format, target, tex_usage, geom_flags): + return self.real.is_format_supported(format, target, tex_usage, geom_flags) + + def texture_create(self, template): + return self.real.texture_create( + format = template.format, + width = template.width[0], + height = template.height[0], + depth = template.depth[0], + last_level = template.last_level, + target = template.target, + tex_usage = template.tex_usage, + ) + + def texture_destroy(self, texture): + self.interpreter.unregister_object(texture) + + def texture_release(self, surface): + pass + + def get_tex_surface(self, texture, face, level, zslice, usage): + return texture.get_surface(face, level, zslice, usage) + + def tex_surface_destroy(self, surface): + self.interpreter.unregister_object(surface) + + def tex_surface_release(self, surface): + pass + + def surface_write(self, surface, data, stride, size): + assert surface.nblocksy * stride == size + surface.put_tile_raw(0, 0, surface.width, surface.height, data, stride) + + +class Context(Object): + + def __init__(self, interpreter, real): + Object.__init__(self, interpreter, real) + self.cbufs = [] + self.zsbuf = None + + def destroy(self): + pass + + def create_blend_state(self, state): + return state + + def bind_blend_state(self, state): + if state is not None: + self.real.set_blend(state) + + def delete_blend_state(self, state): + pass + + def create_sampler_state(self, state): + return state + + def delete_sampler_state(self, state): + pass + + def bind_sampler_states(self, n, states): + for i in range(n): + self.real.set_sampler(i, states[i]) + + def create_rasterizer_state(self, state): + return state + + def bind_rasterizer_state(self, state): + if state is not None: + self.real.set_rasterizer(state) + + def delete_rasterizer_state(self, state): + pass + + def create_depth_stencil_alpha_state(self, state): + return state + + def bind_depth_stencil_alpha_state(self, state): + if state is not None: + self.real.set_depth_stencil_alpha(state) + + def delete_depth_stencil_alpha_state(self, state): + pass + + def create_fs_state(self, state): + tokens = str(state.tokens) + shader = gallium.Shader(tokens) + return shader + + create_vs_state = create_fs_state + + def bind_fs_state(self, state): + self.real.set_fragment_shader(state) + + def bind_vs_state(self, state): + self.real.set_vertex_shader(state) + + def delete_fs_state(self, state): + pass + + delete_vs_state = delete_fs_state + + def set_blend_color(self, state): + self.real.set_blend_color(state) + + def set_clip_state(self, state): + _state = gallium.Clip() + _state.nr = state.nr + if state.nr: + # FIXME + ucp = gallium.FloatArray(gallium.PIPE_MAX_CLIP_PLANES*4) + for i in range(len(state.ucp)): + for j in range(len(state.ucp[i])): + ucp[i*4 + j] = state.ucp[i][j] + _state.ucp = ucp + self.real.set_clip(_state) + + def set_constant_buffer(self, shader, index, state): + if state is not None: + self.real.set_constant_buffer(shader, index, state.buffer) + + def set_framebuffer_state(self, state): + _state = gallium.Framebuffer() + _state.width = state.width + _state.height = state.height + _state.num_cbufs = state.num_cbufs + for i in range(len(state.cbufs)): + _state.set_cbuf(i, state.cbufs[i]) + _state.set_zsbuf(state.zsbuf) + self.real.set_framebuffer(_state) + + self.cbufs = state.cbufs + self.zsbuf = state.zsbuf + + def set_polygon_stipple(self, state): + self.real.set_polygon_stipple(state) + + def set_scissor_state(self, state): + self.real.set_scissor(state) + + def set_viewport_state(self, state): + self.real.set_viewport(state) + + def set_sampler_textures(self, n, textures): + for i in range(n): + self.real.set_sampler_texture(i, textures[i]) + + def set_vertex_buffers(self, n, vbufs): + for i in range(n): + vbuf = vbufs[i] + self.real.set_vertex_buffer( + i, + pitch = vbuf.pitch, + max_index = vbuf.max_index, + buffer_offset = vbuf.buffer_offset, + buffer = vbuf.buffer, + ) + + def set_vertex_elements(self, n, elements): + for i in range(n): + self.real.set_vertex_element(i, elements[i]) + self.real.set_vertex_elements(n) + + def set_edgeflags(self, bitfield): + # FIXME + pass + + def draw_arrays(self, mode, start, count): + self.real.draw_arrays(mode, start, count) + + def draw_elements(self, indexBuffer, indexSize, mode, start, count): + self.real.draw_elements(indexBuffer, indexSize, mode, start, count) + + def draw_range_elements(self, indexBuffer, indexSize, minIndex, maxIndex, mode, start, count): + self.real.draw_range_elements(indexBuffer, indexSize, minIndex, maxIndex, mode, start, count) + + def flush(self, flags): + self.real.flush(flags) + if flags & gallium.PIPE_FLUSH_FRAME: + self._update() + return None + + def clear(self, surface, value): + self.real.surface_clear(surface, value) + + def _update(self): + self.real.flush() + + if self.cbufs and self.cbufs[0]: + show_image(self.cbufs[0]) + + +class Interpreter(parser.TraceParser): + + def __init__(self, stream): + parser.TraceParser.__init__(self, stream) + self.objects = {} + self.result = None + self.globl = Global(self, None) + + def register_object(self, address, object): + self.objects[address] = object + + def unregister_object(self, object): + # FIXME: + pass + + def lookup_object(self, address): + return self.objects[address] + + def interpret(self, trace): + for call in trace.calls: + self.interpret_call(call) + + def handle_call(self, call): + sys.stderr.write("%s\n" % call) + + args = [self.interpret_arg(arg) for name, arg in call.args] + + if call.klass: + obj = args[0] + args = args[1:] + else: + obj = self.globl + + method = getattr(obj, call.method) + ret = method(*args) + + if call.ret and isinstance(call.ret, model.Pointer): + self.register_object(call.ret.address, ret) + + def interpret_arg(self, node): + translator = Translator(self) + return translator.visit(node) + + +if __name__ == '__main__': + parser.main(Interpreter) diff --git a/src/gallium/state_trackers/python/retrace/model.py b/src/gallium/state_trackers/python/retrace/model.py new file mode 100755 index 0000000000..a17a765914 --- /dev/null +++ b/src/gallium/state_trackers/python/retrace/model.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python +############################################################################# +# +# Copyright 2008 Tungsten Graphics, Inc. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +############################################################################# + + +'''Trace data model.''' + + +class Node: + + def visit(self, visitor): + raise NotImplementedError + + +class Literal(Node): + + def __init__(self, value): + self.value = value + + def visit(self, visitor): + visitor.visit_literal(self) + + def __str__(self): + if isinstance(self.value, str) and len(self.value) > 32: + return '...' + else: + return repr(self.value) + + +class NamedConstant(Node): + + def __init__(self, name): + self.name = name + + def visit(self, visitor): + visitor.visit_named_constant(self) + + def __str__(self): + return self.name + + +class Array(Node): + + def __init__(self, elements): + self.elements = elements + + def visit(self, visitor): + visitor.visit_array(self) + + def __str__(self): + return '{' + ', '.join([str(value) for value in self.elements]) + '}' + + +class Struct(Node): + + def __init__(self, name, members): + self.name = name + self.members = members + + def visit(self, visitor): + visitor.visit_struct(self) + + def __str__(self): + return '{' + ', '.join([name + ' = ' + str(value) for name, value in self.members]) + '}' + + +class Pointer(Node): + + def __init__(self, address): + self.address = address + + def visit(self, visitor): + visitor.visit_pointer(self) + + def __str__(self): + return self.address + + +class Call: + + def __init__(self, klass, method, args, ret): + self.klass = klass + self.method = method + self.args = args + self.ret = ret + + def visit(self, visitor): + visitor.visit_call(self) + + def __str__(self): + s = self.method + if self.klass: + s = self.klass + '::' + s + s += '(' + ', '.join([name + ' = ' + str(value) for name, value in self.args]) + ')' + if self.ret is not None: + s += ' = ' + str(self.ret) + return s + + +class Trace: + + def __init__(self, calls): + self.calls = calls + + def visit(self, visitor): + visitor.visit_trace(self) + + def __str__(self): + return '\n'.join([str(call) for call in self.calls]) + + +class Visitor: + + def visit_literal(self, node): + raise NotImplementedError + + def visit_named_constant(self, node): + raise NotImplementedError + + def visit_array(self, node): + raise NotImplementedError + + def visit_struct(self, node): + raise NotImplementedError + + def visit_pointer(self, node): + raise NotImplementedError + + def visit_call(self, node): + raise NotImplementedError + + def visit_trace(self, node): + raise NotImplementedError + + diff --git a/src/gallium/state_trackers/python/retrace/parser.py b/src/gallium/state_trackers/python/retrace/parser.py new file mode 100755 index 0000000000..6bc75ad685 --- /dev/null +++ b/src/gallium/state_trackers/python/retrace/parser.py @@ -0,0 +1,345 @@ +#!/usr/bin/env python +############################################################################# +# +# Copyright 2008 Tungsten Graphics, Inc. +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +############################################################################# + + +import sys +import xml.parsers.expat +import binascii + +from model import * + + +ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF = range(4) + + +class XmlToken: + + def __init__(self, type, name_or_data, attrs = None, line = None, column = None): + assert type in (ELEMENT_START, ELEMENT_END, CHARACTER_DATA, EOF) + self.type = type + self.name_or_data = name_or_data + self.attrs = attrs + self.line = line + self.column = column + + def __str__(self): + if self.type == ELEMENT_START: + return '<' + self.name_or_data + ' ...>' + if self.type == ELEMENT_END: + return '</' + self.name_or_data + '>' + if self.type == CHARACTER_DATA: + return self.name_or_data + if self.type == EOF: + return 'end of file' + assert 0 + + +class XmlTokenizer: + """Expat based XML tokenizer.""" + + def __init__(self, fp, skip_ws = True): + self.fp = fp + self.tokens = [] + self.index = 0 + self.final = False + self.skip_ws = skip_ws + + self.character_pos = 0, 0 + self.character_data = '' + + self.parser = xml.parsers.expat.ParserCreate() + self.parser.StartElementHandler = self.handle_element_start + self.parser.EndElementHandler = self.handle_element_end + self.parser.CharacterDataHandler = self.handle_character_data + + def handle_element_start(self, name, attributes): + self.finish_character_data() + line, column = self.pos() + token = XmlToken(ELEMENT_START, name, attributes, line, column) + self.tokens.append(token) + + def handle_element_end(self, name): + self.finish_character_data() + line, column = self.pos() + token = XmlToken(ELEMENT_END, name, None, line, column) + self.tokens.append(token) + + def handle_character_data(self, data): + if not self.character_data: + self.character_pos = self.pos() + self.character_data += data + + def finish_character_data(self): + if self.character_data: + if not self.skip_ws or not self.character_data.isspace(): + line, column = self.character_pos + token = XmlToken(CHARACTER_DATA, self.character_data, None, line, column) + self.tokens.append(token) + self.character_data = '' + + def next(self): + size = 16*1024 + while self.index >= len(self.tokens) and not self.final: + self.tokens = [] + self.index = 0 + data = self.fp.read(size) + self.final = len(data) < size + data = data.rstrip('\0') + try: + self.parser.Parse(data, self.final) + except xml.parsers.expat.ExpatError, e: + #if e.code == xml.parsers.expat.errors.XML_ERROR_NO_ELEMENTS: + if e.code == 3: + pass + else: + raise e + if self.index >= len(self.tokens): + line, column = self.pos() + token = XmlToken(EOF, None, None, line, column) + else: + token = self.tokens[self.index] + self.index += 1 + return token + + def pos(self): + return self.parser.CurrentLineNumber, self.parser.CurrentColumnNumber + + +class TokenMismatch(Exception): + + def __init__(self, expected, found): + self.expected = expected + self.found = found + + def __str__(self): + return '%u:%u: %s expected, %s found' % (self.found.line, self.found.column, str(self.expected), str(self.found)) + + + +class XmlParser: + """Base XML document parser.""" + + def __init__(self, fp): + self.tokenizer = XmlTokenizer(fp) + self.consume() + + def consume(self): + self.token = self.tokenizer.next() + + def match_element_start(self, name): + return self.token.type == ELEMENT_START and self.token.name_or_data == name + + def match_element_end(self, name): + return self.token.type == ELEMENT_END and self.token.name_or_data == name + + def element_start(self, name): + while self.token.type == CHARACTER_DATA: + self.consume() + if self.token.type != ELEMENT_START: + raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token) + if self.token.name_or_data != name: + raise TokenMismatch(XmlToken(ELEMENT_START, name), self.token) + attrs = self.token.attrs + self.consume() + return attrs + + def element_end(self, name): + while self.token.type == CHARACTER_DATA: + self.consume() + if self.token.type != ELEMENT_END: + raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token) + if self.token.name_or_data != name: + raise TokenMismatch(XmlToken(ELEMENT_END, name), self.token) + self.consume() + + def character_data(self, strip = True): + data = '' + while self.token.type == CHARACTER_DATA: + data += self.token.name_or_data + self.consume() + if strip: + data = data.strip() + return data + + +class TraceParser(XmlParser): + + def parse(self): + self.element_start('trace') + while self.token.type not in (ELEMENT_END, EOF): + call = self.parse_call() + self.handle_call(call) + if self.token.type != EOF: + self.element_end('trace') + + def parse_call(self): + attrs = self.element_start('call') + klass = attrs['class'] + method = attrs['method'] + args = [] + ret = None + while self.token.type == ELEMENT_START: + if self.token.name_or_data == 'arg': + arg = self.parse_arg() + args.append(arg) + elif self.token.name_or_data == 'ret': + ret = self.parse_ret() + elif self.token.name_or_data == 'call': + # ignore nested function calls + self.parse_call() + else: + raise TokenMismatch("<arg ...> or <ret ...>", self.token) + self.element_end('call') + + return Call(klass, method, args, ret) + + def parse_arg(self): + attrs = self.element_start('arg') + name = attrs['name'] + value = self.parse_value() + self.element_end('arg') + + return name, value + + def parse_ret(self): + attrs = self.element_start('ret') + value = self.parse_value() + self.element_end('ret') + + return value + + def parse_value(self): + expected_tokens = ('null', 'bool', 'int', 'uint', 'float', 'string', 'enum', 'array', 'struct', 'ptr', 'bytes') + if self.token.type == ELEMENT_START: + if self.token.name_or_data in expected_tokens: + method = getattr(self, 'parse_' + self.token.name_or_data) + return method() + raise TokenMismatch(" or " .join(expected_tokens), self.token) + + def parse_null(self): + self.element_start('null') + self.element_end('null') + return Literal(None) + + def parse_bool(self): + self.element_start('bool') + value = int(self.character_data()) + self.element_end('bool') + return Literal(value) + + def parse_int(self): + self.element_start('int') + value = int(self.character_data()) + self.element_end('int') + return Literal(value) + + def parse_uint(self): + self.element_start('uint') + value = int(self.character_data()) + self.element_end('uint') + return Literal(value) + + def parse_float(self): + self.element_start('float') + value = float(self.character_data()) + self.element_end('float') + return Literal(value) + + def parse_enum(self): + self.element_start('enum') + name = self.character_data() + self.element_end('enum') + return NamedConstant(name) + + def parse_string(self): + self.element_start('string') + value = self.character_data() + self.element_end('string') + return Literal(value) + + def parse_bytes(self): + self.element_start('bytes') + value = binascii.a2b_hex(self.character_data()) + self.element_end('bytes') + return Literal(value) + + def parse_array(self): + self.element_start('array') + elems = [] + while self.token.type != ELEMENT_END: + elems.append(self.parse_elem()) + self.element_end('array') + return Array(elems) + + def parse_elem(self): + self.element_start('elem') + value = self.parse_value() + self.element_end('elem') + return value + + def parse_struct(self): + attrs = self.element_start('struct') + name = attrs['name'] + members = [] + while self.token.type != ELEMENT_END: + members.append(self.parse_member()) + self.element_end('struct') + return Struct(name, members) + + def parse_member(self): + attrs = self.element_start('member') + name = attrs['name'] + value = self.parse_value() + self.element_end('member') + + return name, value + + def parse_ptr(self): + self.element_start('ptr') + address = self.character_data() + self.element_end('ptr') + + return Pointer(address) + + def handle_call(self, call): + + pass + + +class TraceDumper(TraceParser): + + + def handle_call(self, call): + print call + + +def main(ParserFactory): + for arg in sys.argv[1:]: + if arg.endswith('.gz'): + import gzip + stream = gzip.GzipFile(arg, 'rt') + else: + stream = open(arg, 'rt') + parser = ParserFactory(stream) + parser.parse() + + +if __name__ == '__main__': + main(TraceDumper) diff --git a/src/gallium/state_trackers/python/samples/tri.py b/src/gallium/state_trackers/python/samples/tri.py new file mode 100644 index 0000000000..193479f7d6 --- /dev/null +++ b/src/gallium/state_trackers/python/samples/tri.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python +########################################################################## +# +# Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. +# All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +########################################################################## + + +from gallium import * + + +def make_image(surface): + pixels = FloatArray(surface.height*surface.width*4) + surface.get_tile_rgba(0, 0, surface.width, surface.height, pixels) + + import Image + outimage = Image.new( + mode='RGB', + size=(surface.width, surface.height), + color=(0,0,0)) + outpixels = outimage.load() + for y in range(0, surface.height): + for x in range(0, surface.width): + offset = (y*surface.width + x)*4 + r, g, b, a = [int(pixels[offset + ch]*255) for ch in range(4)] + outpixels[x, y] = r, g, b + return outimage + +def save_image(filename, surface): + outimage = make_image(surface) + outimage.save(filename, "PNG") + +def show_image(surface): + outimage = make_image(surface) + + import Tkinter as tk + from PIL import Image, ImageTk + root = tk.Tk() + + root.title('background image') + + image1 = ImageTk.PhotoImage(outimage) + w = image1.width() + h = image1.height() + x = 100 + y = 100 + root.geometry("%dx%d+%d+%d" % (w, h, x, y)) + panel1 = tk.Label(root, image=image1) + panel1.pack(side='top', fill='both', expand='yes') + panel1.image = image1 + root.mainloop() + + +def test(dev): + ctx = dev.context_create() + + width = 255 + height = 255 + + # disabled blending/masking + blend = Blend() + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.colormask = PIPE_MASK_RGBA + ctx.set_blend(blend) + + # no-op depth/stencil/alpha + depth_stencil_alpha = DepthStencilAlpha() + ctx.set_depth_stencil_alpha(depth_stencil_alpha) + + # rasterizer + rasterizer = Rasterizer() + rasterizer.front_winding = PIPE_WINDING_CW + rasterizer.cull_mode = PIPE_WINDING_NONE + rasterizer.bypass_clipping = 1 + rasterizer.scissor = 1 + #rasterizer.bypass_vs = 1 + ctx.set_rasterizer(rasterizer) + + # viewport (identity, we setup vertices in wincoords) + viewport = Viewport() + scale = FloatArray(4) + scale[0] = 1.0 + scale[1] = 1.0 + scale[2] = 1.0 + scale[3] = 1.0 + viewport.scale = scale + translate = FloatArray(4) + translate[0] = 0.0 + translate[1] = 0.0 + translate[2] = 0.0 + translate[3] = 0.0 + viewport.translate = translate + ctx.set_viewport(viewport) + + # samplers + sampler = Sampler() + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE + sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST + sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST + sampler.normalized_coords = 1 + ctx.set_sampler(0, sampler) + + # scissor + scissor = Scissor() + scissor.minx = 0 + scissor.miny = 0 + scissor.maxx = width + scissor.maxy = height + ctx.set_scissor(scissor) + + clip = Clip() + clip.nr = 0 + ctx.set_clip(clip) + + # framebuffer + cbuf = dev.texture_create( + PIPE_FORMAT_X8R8G8B8_UNORM, + width, height, + tex_usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET, + ) + _cbuf = cbuf.get_surface(usage = PIPE_BUFFER_USAGE_GPU_READ|PIPE_BUFFER_USAGE_GPU_WRITE) + fb = Framebuffer() + fb.width = width + fb.height = height + fb.num_cbufs = 1 + fb.set_cbuf(0, _cbuf) + ctx.set_framebuffer(fb) + _cbuf.clear_value = 0x00000000 + ctx.surface_clear(_cbuf, _cbuf.clear_value) + del _cbuf + + # vertex shader + vs = Shader(''' + VERT1.1 + DCL IN[0], POSITION, CONSTANT + DCL IN[1], COLOR, CONSTANT + DCL OUT[0], POSITION, CONSTANT + DCL OUT[1], COLOR, CONSTANT + 0:MOV OUT[0], IN[0] + 1:MOV OUT[1], IN[1] + 2:END + ''') + ctx.set_vertex_shader(vs) + + # fragment shader + fs = Shader(''' + FRAG1.1 + DCL IN[0], COLOR, LINEAR + DCL OUT[0], COLOR, CONSTANT + 0:MOV OUT[0], IN[0] + 1:END + ''') + ctx.set_fragment_shader(fs) + + nverts = 3 + nattrs = 2 + verts = FloatArray(nverts * nattrs * 4) + + verts[ 0] = 128.0 # x1 + verts[ 1] = 32.0 # y1 + verts[ 2] = 0.0 # z1 + verts[ 3] = 1.0 # w1 + verts[ 4] = 1.0 # r1 + verts[ 5] = 0.0 # g1 + verts[ 6] = 0.0 # b1 + verts[ 7] = 1.0 # a1 + verts[ 8] = 32.0 # x2 + verts[ 9] = 224.0 # y2 + verts[10] = 0.0 # z2 + verts[11] = 1.0 # w2 + verts[12] = 0.0 # r2 + verts[13] = 1.0 # g2 + verts[14] = 0.0 # b2 + verts[15] = 1.0 # a2 + verts[16] = 224.0 # x3 + verts[17] = 224.0 # y3 + verts[18] = 0.0 # z3 + verts[19] = 1.0 # w3 + verts[20] = 0.0 # r3 + verts[21] = 0.0 # g3 + verts[22] = 1.0 # b3 + verts[23] = 1.0 # a3 + + ctx.draw_vertices(PIPE_PRIM_TRIANGLES, + nverts, + nattrs, + verts) + + ctx.flush() + + show_image(cbuf.get_surface(usage = PIPE_BUFFER_USAGE_CPU_READ|PIPE_BUFFER_USAGE_CPU_WRITE)) + #save_image('tri.png', cbuf.get_surface(usage = PIPE_BUFFER_USAGE_CPU_READ|PIPE_BUFFER_USAGE_CPU_WRITE)) + + + +def main(): + dev = Device() + test(dev) + + +if __name__ == '__main__': + main() diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c new file mode 100644 index 0000000000..95c1378a03 --- /dev/null +++ b/src/gallium/state_trackers/python/st_device.c @@ -0,0 +1,323 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_winsys.h" +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_inlines.h" +#include "cso_cache/cso_context.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_simple_shaders.h" +#include "trace/tr_screen.h" +#include "trace/tr_context.h" + +#include "st_device.h" +#include "st_winsys.h" + + +static void +st_device_really_destroy(struct st_device *st_dev) +{ + if(st_dev->screen) + st_dev->screen->destroy(st_dev->screen); + + FREE(st_dev); +} + + +void +st_device_destroy(struct st_device *st_dev) +{ + if(!--st_dev->refcount) + st_device_really_destroy(st_dev); +} + + +static struct st_device * +st_device_create_from_st_winsys(const struct st_winsys *st_ws) +{ + struct st_device *st_dev; + + if(!st_ws->screen_create || + !st_ws->context_create) + return NULL; + + st_dev = CALLOC_STRUCT(st_device); + if(!st_dev) + return NULL; + + st_dev->refcount = 1; + st_dev->st_ws = st_ws; + + st_dev->real_screen = st_ws->screen_create(); + if(!st_dev->real_screen) { + st_device_destroy(st_dev); + return NULL; + } + + st_dev->screen = trace_screen_create(st_dev->real_screen); + if(!st_dev->screen) { + st_device_destroy(st_dev); + return NULL; + } + + return st_dev; +} + + +struct st_device * +st_device_create(boolean hardware) { + if(hardware) + return st_device_create_from_st_winsys(&st_hardpipe_winsys); + else + return st_device_create_from_st_winsys(&st_softpipe_winsys); +} + + +void +st_context_destroy(struct st_context *st_ctx) +{ + unsigned i; + + if(st_ctx) { + struct st_device *st_dev = st_ctx->st_dev; + + if(st_ctx->cso) { + cso_delete_vertex_shader(st_ctx->cso, st_ctx->vs); + cso_delete_fragment_shader(st_ctx->cso, st_ctx->fs); + + cso_destroy_context(st_ctx->cso); + } + + if(st_ctx->pipe) + st_ctx->pipe->destroy(st_ctx->pipe); + + for(i = 0; i < PIPE_MAX_SAMPLERS; ++i) + pipe_texture_reference(&st_ctx->sampler_textures[i], NULL); + pipe_texture_reference(&st_ctx->default_texture, NULL); + + FREE(st_ctx); + + if(!--st_dev->refcount) + st_device_really_destroy(st_dev); + } +} + + +struct st_context * +st_context_create(struct st_device *st_dev) +{ + struct st_context *st_ctx; + + st_ctx = CALLOC_STRUCT(st_context); + if(!st_ctx) + return NULL; + + st_ctx->st_dev = st_dev; + ++st_dev->refcount; + + st_ctx->real_pipe = st_dev->st_ws->context_create(st_dev->real_screen); + if(!st_ctx->real_pipe) { + st_context_destroy(st_ctx); + return NULL; + } + + st_ctx->pipe = trace_context_create(st_dev->screen, st_ctx->real_pipe); + if(!st_ctx->pipe) { + st_context_destroy(st_ctx); + return NULL; + } + + st_ctx->cso = cso_create_context(st_ctx->pipe); + if(!st_ctx->cso) { + st_context_destroy(st_ctx); + return NULL; + } + + /* disabled blending/masking */ + { + struct pipe_blend_state blend; + memset(&blend, 0, sizeof(blend)); + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.colormask = PIPE_MASK_RGBA; + cso_set_blend(st_ctx->cso, &blend); + } + + /* no-op depth/stencil/alpha */ + { + struct pipe_depth_stencil_alpha_state depthstencil; + memset(&depthstencil, 0, sizeof(depthstencil)); + cso_set_depth_stencil_alpha(st_ctx->cso, &depthstencil); + } + + /* rasterizer */ + { + struct pipe_rasterizer_state rasterizer; + memset(&rasterizer, 0, sizeof(rasterizer)); + rasterizer.front_winding = PIPE_WINDING_CW; + rasterizer.cull_mode = PIPE_WINDING_NONE; + rasterizer.bypass_clipping = 1; + /*rasterizer.bypass_vs = 1;*/ + cso_set_rasterizer(st_ctx->cso, &rasterizer); + } + + /* identity viewport */ + { + struct pipe_viewport_state viewport; + viewport.scale[0] = 1.0; + viewport.scale[1] = 1.0; + viewport.scale[2] = 1.0; + viewport.scale[3] = 1.0; + viewport.translate[0] = 0.0; + viewport.translate[1] = 0.0; + viewport.translate[2] = 0.0; + viewport.translate[3] = 0.0; + cso_set_viewport(st_ctx->cso, &viewport); + } + + /* samplers */ + { + struct pipe_sampler_state sampler; + unsigned i; + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; + sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST; + sampler.normalized_coords = 1; + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + cso_single_sampler(st_ctx->cso, i, &sampler); + cso_single_sampler_done(st_ctx->cso); + } + + /* default textures */ + { + struct pipe_screen *screen = st_dev->screen; + struct pipe_texture templat; + struct pipe_surface *surface; + unsigned i; + + memset( &templat, 0, sizeof( templat ) ); + templat.target = PIPE_TEXTURE_2D; + templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; + templat.block.size = 4; + templat.block.width = 1; + templat.block.height = 1; + templat.width[0] = 1; + templat.height[0] = 1; + templat.depth[0] = 1; + templat.last_level = 0; + + st_ctx->default_texture = screen->texture_create( screen, &templat ); + if(st_ctx->default_texture) { + surface = screen->get_tex_surface( screen, + st_ctx->default_texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE ); + if(surface) { + uint32_t *map; + map = (uint32_t *) pipe_surface_map(surface, PIPE_BUFFER_USAGE_CPU_WRITE ); + if(map) { + *map = 0x00000000; + pipe_surface_unmap( surface ); + } + pipe_surface_reference(&surface, NULL); + } + } + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + pipe_texture_reference(&st_ctx->sampler_textures[i], st_ctx->default_texture); + + cso_set_sampler_textures(st_ctx->cso, PIPE_MAX_SAMPLERS, st_ctx->sampler_textures); + } + + /* vertex shader */ + { + struct pipe_shader_state vert_shader; + + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indexes[] = { 0, 0 }; + st_ctx->vs = util_make_vertex_passthrough_shader(st_ctx->pipe, + 2, + semantic_names, + semantic_indexes, + &vert_shader); + cso_set_vertex_shader_handle(st_ctx->cso, st_ctx->vs); + } + + /* fragment shader */ + { + struct pipe_shader_state frag_shader; + st_ctx->fs = util_make_fragment_passthrough_shader(st_ctx->pipe, + &frag_shader); + cso_set_fragment_shader_handle(st_ctx->cso, st_ctx->fs); + } + + return st_ctx; +} + + +void +st_buffer_destroy(struct st_buffer *st_buf) +{ + if(st_buf) { + struct pipe_screen *screen = st_buf->st_dev->screen; + pipe_buffer_reference(screen, &st_buf->buffer, NULL); + FREE(st_buf); + } +} + + +struct st_buffer * +st_buffer_create(struct st_device *st_dev, + unsigned alignment, unsigned usage, unsigned size) +{ + struct pipe_screen *screen = st_dev->screen; + struct st_buffer *st_buf; + + st_buf = CALLOC_STRUCT(st_buffer); + if(!st_buf) + return NULL; + + st_buf->st_dev = st_dev; + + st_buf->buffer = pipe_buffer_create(screen, alignment, usage, size); + if(!st_buf->buffer) { + st_buffer_destroy(st_buf); + return NULL; + } + + return st_buf; +} + diff --git a/src/gallium/state_trackers/python/st_device.h b/src/gallium/state_trackers/python/st_device.h new file mode 100644 index 0000000000..7cfe6de9f6 --- /dev/null +++ b/src/gallium/state_trackers/python/st_device.h @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_DEVICE_H_ +#define ST_DEVICE_H_ + + +#include "pipe/p_state.h" + +struct cso_context; +struct pipe_screen; +struct pipe_context; +struct st_winsys; + + +struct st_buffer { + struct st_device *st_dev; + + struct pipe_buffer *buffer; +}; + + +struct st_context { + struct st_device *st_dev; + + struct pipe_context *real_pipe; + struct pipe_context *pipe; + + struct cso_context *cso; + + void *vs; + void *fs; + + struct pipe_texture *default_texture; + struct pipe_texture *sampler_textures[PIPE_MAX_SAMPLERS]; + + unsigned num_vertex_buffers; + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + + unsigned num_vertex_elements; + struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS]; +}; + + +struct st_device { + const struct st_winsys *st_ws; + + struct pipe_screen *real_screen; + struct pipe_screen *screen; + + /* FIXME: we also need to refcount for textures and surfaces... */ + unsigned refcount; +}; + + +struct st_buffer * +st_buffer_create(struct st_device *st_dev, + unsigned alignment, unsigned usage, unsigned size); + +void +st_buffer_destroy(struct st_buffer *st_buf); + +struct st_context * +st_context_create(struct st_device *st_dev); + +void +st_context_destroy(struct st_context *st_ctx); + +struct st_device * +st_device_create(boolean hardware); + +void +st_device_destroy(struct st_device *st_dev); + + +#endif /* ST_DEVICE_H_ */ diff --git a/src/gallium/state_trackers/python/st_hardpipe_winsys.c b/src/gallium/state_trackers/python/st_hardpipe_winsys.c new file mode 100644 index 0000000000..8b33c70fd7 --- /dev/null +++ b/src/gallium/state_trackers/python/st_hardpipe_winsys.c @@ -0,0 +1,62 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * @file + * Stub for hardware pipe driver support. + */ + + +#include "pipe/p_compiler.h" + +#include "st_winsys.h" + + +/* XXX: Force init_gallium symbol to be linked */ +extern void init_gallium(void); +void (*force_init_gallium_linkage)(void) = &init_gallium; + + +static struct pipe_screen * +st_hardpipe_screen_create(void) +{ + return st_softpipe_winsys.screen_create(); +} + + +static struct pipe_context * +st_hardpipe_context_create(struct pipe_screen *screen) +{ + return st_softpipe_winsys.context_create(screen); +} + + +const struct st_winsys st_hardpipe_winsys = { + &st_hardpipe_screen_create, + &st_hardpipe_context_create +}; diff --git a/src/gallium/state_trackers/python/st_sample.c b/src/gallium/state_trackers/python/st_sample.c new file mode 100644 index 0000000000..7765df3c4a --- /dev/null +++ b/src/gallium/state_trackers/python/st_sample.c @@ -0,0 +1,549 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "st_sample.h" + + +/** + * Use our own pseudo random generator to ensure consistent runs among + * multiple runs and platforms. + * + * @sa http://en.wikipedia.org/wiki/Linear_congruential_generator + */ +static uint32_t st_random(void) { + static uint64_t seed = UINT64_C(0xbb9a063afb0a739d); + + seed = UINT64_C(134775813) * seed + UINT64_C(1); + + return (uint16_t)(seed >> 32); +} + + +/** + * We don't want to include the patent-encumbered DXT code here, so instead + * we store several uncompressed/compressed data pairs for hardware testing + * purposes. + */ +struct dxt_data +{ + uint8_t rgba[16*4]; + uint8_t raw[16]; +}; + + +static const struct dxt_data +dxt1_rgb_data[] = { + { + { + 0x99, 0xb0, 0x8e, 0xff, + 0x5d, 0x62, 0x89, 0xff, + 0x99, 0xb0, 0x8e, 0xff, + 0x99, 0xb0, 0x8e, 0xff, + 0xd6, 0xff, 0x94, 0xff, + 0x5d, 0x62, 0x89, 0xff, + 0x99, 0xb0, 0x8e, 0xff, + 0xd6, 0xff, 0x94, 0xff, + 0x5d, 0x62, 0x89, 0xff, + 0x5d, 0x62, 0x89, 0xff, + 0x99, 0xb0, 0x8e, 0xff, + 0x21, 0x14, 0x84, 0xff, + 0x5d, 0x62, 0x89, 0xff, + 0x21, 0x14, 0x84, 0xff, + 0x21, 0x14, 0x84, 0xff, + 0x99, 0xb0, 0x8e, 0xff + }, + {0xf2, 0xd7, 0xb0, 0x20, 0xae, 0x2c, 0x6f, 0x97} + }, + { + { + 0xb5, 0xcf, 0x9c, 0xff, + 0x83, 0x8c, 0x8b, 0xff, + 0x21, 0x08, 0x6b, 0xff, + 0x83, 0x8c, 0x8b, 0xff, + 0x52, 0x4a, 0x7b, 0xff, + 0x83, 0x8c, 0x8b, 0xff, + 0x83, 0x8c, 0x8b, 0xff, + 0xb5, 0xcf, 0x9c, 0xff, + 0x21, 0x08, 0x6b, 0xff, + 0xb5, 0xcf, 0x9c, 0xff, + 0x83, 0x8c, 0x8b, 0xff, + 0x52, 0x4a, 0x7b, 0xff, + 0xb5, 0xcf, 0x9c, 0xff, + 0x83, 0x8c, 0x8b, 0xff, + 0x52, 0x4a, 0x7b, 0xff, + 0x83, 0x8c, 0x8b, 0xff + }, + {0x73, 0xb6, 0x4d, 0x20, 0x98, 0x2b, 0xe1, 0xb8} + }, + { + { + 0x00, 0x2c, 0xff, 0xff, + 0x94, 0x8d, 0x7b, 0xff, + 0x4a, 0x5c, 0xbd, 0xff, + 0x4a, 0x5c, 0xbd, 0xff, + 0x4a, 0x5c, 0xbd, 0xff, + 0x94, 0x8d, 0x7b, 0xff, + 0x94, 0x8d, 0x7b, 0xff, + 0x94, 0x8d, 0x7b, 0xff, + 0xde, 0xbe, 0x39, 0xff, + 0x94, 0x8d, 0x7b, 0xff, + 0xde, 0xbe, 0x39, 0xff, + 0xde, 0xbe, 0x39, 0xff, + 0xde, 0xbe, 0x39, 0xff, + 0xde, 0xbe, 0x39, 0xff, + 0xde, 0xbe, 0x39, 0xff, + 0x94, 0x8d, 0x7b, 0xff + }, + {0xe7, 0xdd, 0x7f, 0x01, 0xf9, 0xab, 0x08, 0x80} + }, + { + { + 0x6b, 0x24, 0x21, 0xff, + 0x7b, 0x4f, 0x5d, 0xff, + 0x7b, 0x4f, 0x5d, 0xff, + 0x8b, 0x7a, 0x99, 0xff, + 0x7b, 0x4f, 0x5d, 0xff, + 0x7b, 0x4f, 0x5d, 0xff, + 0x6b, 0x24, 0x21, 0xff, + 0x8b, 0x7a, 0x99, 0xff, + 0x9c, 0xa6, 0xd6, 0xff, + 0x6b, 0x24, 0x21, 0xff, + 0x7b, 0x4f, 0x5d, 0xff, + 0x8b, 0x7a, 0x99, 0xff, + 0x6b, 0x24, 0x21, 0xff, + 0x8b, 0x7a, 0x99, 0xff, + 0x7b, 0x4f, 0x5d, 0xff, + 0x9c, 0xa6, 0xd6, 0xff + }, + {0x3a, 0x9d, 0x24, 0x69, 0xbd, 0x9f, 0xb4, 0x39} + } +}; + + +static const struct dxt_data +dxt1_rgba_data[] = { + { + { + 0x00, 0x00, 0x00, 0x00, + 0x4e, 0xaa, 0x90, 0xff, + 0x4e, 0xaa, 0x90, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0x4e, 0xaa, 0x90, 0xff, + 0x29, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0x4e, 0xaa, 0x90, 0xff, + 0x73, 0x55, 0x21, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x4e, 0xaa, 0x90, 0xff, + 0x4e, 0xaa, 0x90, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x4e, 0xaa, 0x90, 0xff + }, + {0xff, 0x2f, 0xa4, 0x72, 0xeb, 0xb2, 0xbd, 0xbe} + }, + { + { + 0xb5, 0xe3, 0x63, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0x6b, 0x24, 0x84, 0xff, + 0xb5, 0xe3, 0x63, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0xb5, 0xe3, 0x63, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x6b, 0x24, 0x84, 0xff, + 0x6b, 0x24, 0x84, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0xb5, 0xe3, 0x63, 0xff, + 0x90, 0x83, 0x73, 0xff, + 0xb5, 0xe3, 0x63, 0xff + }, + {0x30, 0x69, 0x0c, 0xb7, 0x4d, 0xf7, 0x0f, 0x67} + }, + { + { + 0x00, 0x00, 0x00, 0x00, + 0xc6, 0x86, 0x8c, 0xff, + 0xc6, 0x86, 0x8c, 0xff, + 0x21, 0x65, 0x42, 0xff, + 0x21, 0x65, 0x42, 0xff, + 0x21, 0x65, 0x42, 0xff, + 0x21, 0x65, 0x42, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x21, 0x65, 0x42, 0xff, + 0xc6, 0x86, 0x8c, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0xc6, 0x86, 0x8c, 0xff + }, + {0x28, 0x23, 0x31, 0xc4, 0x17, 0xc0, 0xd3, 0x7f} + }, + { + { + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0xc6, 0xe3, 0x9c, 0xff, + 0x7b, 0x1c, 0x52, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x7b, 0x1c, 0x52, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0x7b, 0x1c, 0x52, 0xff, + 0xa0, 0x7f, 0x77, 0xff, + 0xc6, 0xe3, 0x9c, 0xff, + 0x00, 0x00, 0x00, 0x00, + 0xa0, 0x7f, 0x77, 0xff + }, + {0xea, 0x78, 0x13, 0xc7, 0x7f, 0xfc, 0x33, 0xb6} + }, +}; + + +static const struct dxt_data +dxt3_rgba_data[] = { + { + { + 0x6d, 0xc6, 0x96, 0x77, + 0x6d, 0xc6, 0x96, 0xee, + 0x6d, 0xc6, 0x96, 0xaa, + 0x8c, 0xff, 0xb5, 0x44, + 0x6d, 0xc6, 0x96, 0xff, + 0x6d, 0xc6, 0x96, 0x88, + 0x31, 0x55, 0x5a, 0x66, + 0x6d, 0xc6, 0x96, 0x99, + 0x31, 0x55, 0x5a, 0xbb, + 0x31, 0x55, 0x5a, 0x55, + 0x31, 0x55, 0x5a, 0x11, + 0x6d, 0xc6, 0x96, 0xcc, + 0x6d, 0xc6, 0x96, 0xcc, + 0x6d, 0xc6, 0x96, 0x11, + 0x31, 0x55, 0x5a, 0x44, + 0x31, 0x55, 0x5a, 0x88 + }, + {0xe7, 0x4a, 0x8f, 0x96, 0x5b, 0xc1, 0x1c, 0x84, 0xf6, 0x8f, 0xab, 0x32, 0x2a, 0x9a, 0x95, 0x5a} + }, + { + { + 0xad, 0xeb, 0x73, 0x99, + 0x97, 0xaa, 0x86, 0x66, + 0x6b, 0x28, 0xad, 0x99, + 0xad, 0xeb, 0x73, 0x99, + 0x6b, 0x28, 0xad, 0x22, + 0xad, 0xeb, 0x73, 0xff, + 0x97, 0xaa, 0x86, 0x55, + 0x6b, 0x28, 0xad, 0x55, + 0x6b, 0x28, 0xad, 0x44, + 0xad, 0xeb, 0x73, 0x33, + 0x6b, 0x28, 0xad, 0xee, + 0x6b, 0x28, 0xad, 0x99, + 0x97, 0xaa, 0x86, 0x66, + 0xad, 0xeb, 0x73, 0xbb, + 0x97, 0xaa, 0x86, 0x99, + 0xad, 0xeb, 0x73, 0xbb + }, + {0x69, 0x99, 0xf2, 0x55, 0x34, 0x9e, 0xb6, 0xb9, 0x4e, 0xaf, 0x55, 0x69, 0x18, 0x61, 0x51, 0x22} + }, + { + { + 0x63, 0xd7, 0xd6, 0x00, + 0x57, 0x62, 0x5d, 0xdd, + 0x57, 0x62, 0x5d, 0xcc, + 0x57, 0x62, 0x5d, 0xbb, + 0x52, 0x28, 0x21, 0xaa, + 0x57, 0x62, 0x5d, 0xcc, + 0x57, 0x62, 0x5d, 0xcc, + 0x57, 0x62, 0x5d, 0x66, + 0x57, 0x62, 0x5d, 0x22, + 0x57, 0x62, 0x5d, 0xdd, + 0x63, 0xd7, 0xd6, 0xee, + 0x57, 0x62, 0x5d, 0x33, + 0x63, 0xd7, 0xd6, 0x55, + 0x52, 0x28, 0x21, 0x55, + 0x57, 0x62, 0x5d, 0x11, + 0x5d, 0x9c, 0x99, 0xee + }, + {0xd0, 0xbc, 0xca, 0x6c, 0xd2, 0x3e, 0x55, 0xe1, 0xba, 0x66, 0x44, 0x51, 0xfc, 0xfd, 0xcf, 0xb4} + }, + { + { + 0x94, 0x6f, 0x60, 0x22, + 0x94, 0x6f, 0x60, 0x22, + 0xc5, 0xab, 0x76, 0x11, + 0xc5, 0xab, 0x76, 0xee, + 0x63, 0x34, 0x4a, 0xdd, + 0x63, 0x34, 0x4a, 0x33, + 0x94, 0x6f, 0x60, 0x77, + 0xf7, 0xe7, 0x8c, 0x00, + 0x94, 0x6f, 0x60, 0x33, + 0x63, 0x34, 0x4a, 0xaa, + 0x94, 0x6f, 0x60, 0x77, + 0x63, 0x34, 0x4a, 0xcc, + 0x94, 0x6f, 0x60, 0xaa, + 0xf7, 0xe7, 0x8c, 0x99, + 0x63, 0x34, 0x4a, 0x44, + 0xc5, 0xab, 0x76, 0xaa + }, + {0x22, 0xe1, 0x3d, 0x07, 0xa3, 0xc7, 0x9a, 0xa4, 0x31, 0xf7, 0xa9, 0x61, 0xaf, 0x35, 0x77, 0x93} + }, +}; + + +static const struct dxt_data +dxt5_rgba_data[] = { + { + { + 0x6d, 0xc6, 0x96, 0x74, + 0x6d, 0xc6, 0x96, 0xf8, + 0x6d, 0xc6, 0x96, 0xb6, + 0x8c, 0xff, 0xb5, 0x53, + 0x6d, 0xc6, 0x96, 0xf8, + 0x6d, 0xc6, 0x96, 0x95, + 0x31, 0x55, 0x5a, 0x53, + 0x6d, 0xc6, 0x96, 0x95, + 0x31, 0x55, 0x5a, 0xb6, + 0x31, 0x55, 0x5a, 0x53, + 0x31, 0x55, 0x5a, 0x11, + 0x6d, 0xc6, 0x96, 0xd7, + 0x6d, 0xc6, 0x96, 0xb6, + 0x6d, 0xc6, 0x96, 0x11, + 0x31, 0x55, 0x5a, 0x32, + 0x31, 0x55, 0x5a, 0x95 + }, + {0xf8, 0x11, 0xc5, 0x0c, 0x9a, 0x73, 0xb4, 0x9c, 0xf6, 0x8f, 0xab, 0x32, 0x2a, 0x9a, 0x95, 0x5a} + }, + { + { + 0xad, 0xeb, 0x73, 0xa1, + 0x97, 0xaa, 0x86, 0x65, + 0x6b, 0x28, 0xad, 0xa1, + 0xad, 0xeb, 0x73, 0xa1, + 0x6b, 0x28, 0xad, 0x2a, + 0xad, 0xeb, 0x73, 0xfb, + 0x97, 0xaa, 0x86, 0x47, + 0x6b, 0x28, 0xad, 0x65, + 0x6b, 0x28, 0xad, 0x47, + 0xad, 0xeb, 0x73, 0x47, + 0x6b, 0x28, 0xad, 0xdd, + 0x6b, 0x28, 0xad, 0xa1, + 0x97, 0xaa, 0x86, 0x65, + 0xad, 0xeb, 0x73, 0xbf, + 0x97, 0xaa, 0x86, 0xa1, + 0xad, 0xeb, 0x73, 0xbf + }, + {0xfb, 0x2a, 0x34, 0x19, 0xdc, 0xbf, 0xe8, 0x71, 0x4e, 0xaf, 0x55, 0x69, 0x18, 0x61, 0x51, 0x22} + }, + { + { + 0x63, 0xd7, 0xd6, 0x00, + 0x57, 0x62, 0x5d, 0xf5, + 0x57, 0x62, 0x5d, 0xd2, + 0x57, 0x62, 0x5d, 0xaf, + 0x52, 0x28, 0x21, 0xaf, + 0x57, 0x62, 0x5d, 0xd2, + 0x57, 0x62, 0x5d, 0xd2, + 0x57, 0x62, 0x5d, 0x69, + 0x57, 0x62, 0x5d, 0x23, + 0x57, 0x62, 0x5d, 0xd2, + 0x63, 0xd7, 0xd6, 0xf5, + 0x57, 0x62, 0x5d, 0x46, + 0x63, 0xd7, 0xd6, 0x46, + 0x52, 0x28, 0x21, 0x69, + 0x57, 0x62, 0x5d, 0x23, + 0x5d, 0x9c, 0x99, 0xf5 + }, + {0xf5, 0x00, 0x81, 0x36, 0xa9, 0x17, 0xec, 0x1e, 0xba, 0x66, 0x44, 0x51, 0xfc, 0xfd, 0xcf, 0xb4} + }, + { + { + 0x94, 0x6f, 0x60, 0x25, + 0x94, 0x6f, 0x60, 0x25, + 0xc5, 0xab, 0x76, 0x05, + 0xc5, 0xab, 0x76, 0xe8, + 0x63, 0x34, 0x4a, 0xe8, + 0x63, 0x34, 0x4a, 0x25, + 0x94, 0x6f, 0x60, 0x86, + 0xf7, 0xe7, 0x8c, 0x05, + 0x94, 0x6f, 0x60, 0x25, + 0x63, 0x34, 0x4a, 0xa7, + 0x94, 0x6f, 0x60, 0x66, + 0x63, 0x34, 0x4a, 0xc7, + 0x94, 0x6f, 0x60, 0xa7, + 0xf7, 0xe7, 0x8c, 0xa7, + 0x63, 0x34, 0x4a, 0x45, + 0xc5, 0xab, 0x76, 0xa7 + }, + {0xe8, 0x05, 0x7f, 0x80, 0x33, 0x5f, 0xb5, 0x79, 0x31, 0xf7, 0xa9, 0x61, 0xaf, 0x35, 0x77, 0x93} + }, +}; + + +static INLINE void +st_sample_dxt_pixel_block(enum pipe_format format, + const struct pipe_format_block *block, + uint8_t *raw, + float *rgba, unsigned rgba_stride, + unsigned w, unsigned h) +{ + const struct dxt_data *data; + unsigned n; + unsigned i; + unsigned x, y, ch; + + switch(format) { + case PIPE_FORMAT_DXT1_RGB: + data = dxt1_rgb_data; + n = sizeof(dxt1_rgb_data)/sizeof(dxt1_rgb_data[0]); + break; + case PIPE_FORMAT_DXT1_RGBA: + data = dxt1_rgba_data; + n = sizeof(dxt1_rgba_data)/sizeof(dxt1_rgba_data[0]); + break; + case PIPE_FORMAT_DXT3_RGBA: + data = dxt3_rgba_data; + n = sizeof(dxt3_rgba_data)/sizeof(dxt3_rgba_data[0]); + break; + case PIPE_FORMAT_DXT5_RGBA: + data = dxt5_rgba_data; + n = sizeof(dxt5_rgba_data)/sizeof(dxt5_rgba_data[0]); + break; + default: + assert(0); + } + + i = st_random() % n; + + for(y = 0; y < h; ++y) + for(x = 0; x < w; ++x) + for(ch = 0; ch < 4; ++ch) + rgba[y*rgba_stride + x*4 + ch] = (float)(data[i].rgba[y*4*4 + x*4 + ch])/255.0f; + + memcpy(raw, data[i].raw, block->size); +} + + +static INLINE void +st_sample_generic_pixel_block(enum pipe_format format, + const struct pipe_format_block *block, + uint8_t *raw, + float *rgba, unsigned rgba_stride, + unsigned w, unsigned h) +{ + unsigned i; + unsigned x, y, ch; + + for(i = 0; i < block->size; ++i) + raw[i] = (uint8_t)st_random(); + + + pipe_tile_raw_to_rgba(format, + raw, + w, h, + rgba, rgba_stride); + + if(format == PIPE_FORMAT_YCBCR || format == PIPE_FORMAT_YCBCR_REV) { + for(y = 0; y < h; ++y) { + for(x = 0; x < w; ++x) { + for(ch = 0; ch < 4; ++ch) { + unsigned offset = y*rgba_stride + x*4 + ch; + rgba[offset] = CLAMP(rgba[offset], 0.0f, 1.0f); + } + } + } + } +} + + +/** + * Randomly sample pixels. + */ +void +st_sample_pixel_block(enum pipe_format format, + const struct pipe_format_block *block, + void *raw, + float *rgba, unsigned rgba_stride, + unsigned w, unsigned h) +{ + switch(format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + st_sample_dxt_pixel_block(format, block, raw, rgba, rgba_stride, w, h); + break; + + default: + st_sample_generic_pixel_block(format, block, raw, rgba, rgba_stride, w, h); + break; + } +} + + +void +st_sample_surface(struct pipe_surface *surface, float *rgba) +{ + const struct pipe_format_block *block = &surface->block; + unsigned rgba_stride = surface->width*4; + void *raw; + unsigned x, y; + + raw = pipe_surface_map(surface, PIPE_BUFFER_USAGE_CPU_READ); + if(!raw) + return; + + for (y = 0; y < surface->nblocksy; ++y) { + for(x = 0; x < surface->nblocksx; ++x) { + st_sample_pixel_block(surface->format, + block, + (uint8_t*)raw + y*surface->stride + x*block->size, + rgba + y*block->height*rgba_stride + x*block->width*4, + rgba_stride, + MIN2(block->width, surface->width - x*block->width), + MIN2(block->height, surface->height - y*block->height)); + } + } + + pipe_surface_unmap(surface); +} diff --git a/src/gallium/state_trackers/python/st_sample.h b/src/gallium/state_trackers/python/st_sample.h new file mode 100644 index 0000000000..ff04a12613 --- /dev/null +++ b/src/gallium/state_trackers/python/st_sample.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_SAMPLE_H_ +#define ST_SAMPLE_H_ + + +#include "pipe/p_format.h" + + +void +st_sample_pixel_block(enum pipe_format format, + const struct pipe_format_block *block, + void *raw, + float *rgba, unsigned rgba_stride, + unsigned w, unsigned h); + +void +st_sample_surface(struct pipe_surface *surface, float *rgba); + + +#endif /* ST_SAMPLE_H_ */ diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c new file mode 100644 index 0000000000..f62113a469 --- /dev/null +++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c @@ -0,0 +1,311 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * @file + * Softpipe support. + * + * @author Keith Whitwell + * @author Brian Paul + * @author Jose Fonseca + */ + + +#include "pipe/p_winsys.h" +#include "pipe/p_format.h" +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "softpipe/sp_winsys.h" +#include "st_winsys.h" + + +struct st_softpipe_buffer +{ + struct pipe_buffer base; + boolean userBuffer; /** Is this a user-space buffer? */ + void *data; + void *mapped; +}; + + +/** Cast wrapper */ +static INLINE struct st_softpipe_buffer * +st_softpipe_buffer( struct pipe_buffer *buf ) +{ + return (struct st_softpipe_buffer *)buf; +} + + +static void * +st_softpipe_buffer_map(struct pipe_winsys *winsys, + struct pipe_buffer *buf, + unsigned flags) +{ + struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); + st_softpipe_buf->mapped = st_softpipe_buf->data; + return st_softpipe_buf->mapped; +} + + +static void +st_softpipe_buffer_unmap(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + struct st_softpipe_buffer *st_softpipe_buf = st_softpipe_buffer(buf); + st_softpipe_buf->mapped = NULL; +} + + +static void +st_softpipe_buffer_destroy(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + struct st_softpipe_buffer *oldBuf = st_softpipe_buffer(buf); + + if (oldBuf->data) { + if (!oldBuf->userBuffer) + align_free(oldBuf->data); + + oldBuf->data = NULL; + } + + FREE(oldBuf); +} + + +static void +st_softpipe_flush_frontbuffer(struct pipe_winsys *winsys, + struct pipe_surface *surf, + void *context_private) +{ +} + + + +static const char * +st_softpipe_get_name(struct pipe_winsys *winsys) +{ + return "softpipe"; +} + + +static struct pipe_buffer * +st_softpipe_buffer_create(struct pipe_winsys *winsys, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct st_softpipe_buffer *buffer = CALLOC_STRUCT(st_softpipe_buffer); + + buffer->base.refcount = 1; + buffer->base.alignment = alignment; + buffer->base.usage = usage; + buffer->base.size = size; + + buffer->data = align_malloc(size, alignment); + + return &buffer->base; +} + + +/** + * Create buffer which wraps user-space data. + */ +static struct pipe_buffer * +st_softpipe_user_buffer_create(struct pipe_winsys *winsys, + void *ptr, + unsigned bytes) +{ + struct st_softpipe_buffer *buffer; + + buffer = CALLOC_STRUCT(st_softpipe_buffer); + if(!buffer) + return NULL; + + buffer->base.refcount = 1; + buffer->base.size = bytes; + buffer->userBuffer = TRUE; + buffer->data = ptr; + + return &buffer->base; +} + + +/** + * Round n up to next multiple. + */ +static INLINE unsigned +round_up(unsigned n, unsigned multiple) +{ + return (n + multiple - 1) & ~(multiple - 1); +} + + +static int +st_softpipe_surface_alloc_storage(struct pipe_winsys *winsys, + struct pipe_surface *surf, + unsigned width, unsigned height, + enum pipe_format format, + unsigned flags, + unsigned tex_usage) +{ + const unsigned alignment = 64; + + surf->width = width; + surf->height = height; + surf->format = format; + pf_get_block(format, &surf->block); + surf->nblocksx = pf_get_nblocksx(&surf->block, width); + surf->nblocksy = pf_get_nblocksy(&surf->block, height); + surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); + surf->usage = flags; + + assert(!surf->buffer); + surf->buffer = winsys->buffer_create(winsys, alignment, + PIPE_BUFFER_USAGE_PIXEL, + surf->stride * surf->nblocksy); + if(!surf->buffer) + return -1; + + return 0; +} + + +static struct pipe_surface * +st_softpipe_surface_alloc(struct pipe_winsys *winsys) +{ + struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); + + assert(winsys); + + surface->refcount = 1; + surface->winsys = winsys; + + return surface; +} + + +static void +st_softpipe_surface_release(struct pipe_winsys *winsys, + struct pipe_surface **s) +{ + struct pipe_surface *surf = *s; + assert(!surf->texture); + surf->refcount--; + if (surf->refcount == 0) { + if (surf->buffer) + winsys_buffer_reference(winsys, &surf->buffer, NULL); + free(surf); + } + *s = NULL; +} + + +static void +st_softpipe_fence_reference(struct pipe_winsys *winsys, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ +} + + +static int +st_softpipe_fence_signalled(struct pipe_winsys *winsys, + struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + +static int +st_softpipe_fence_finish(struct pipe_winsys *winsys, + struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + +static void +st_softpipe_destroy(struct pipe_winsys *winsys) +{ + FREE(winsys); +} + + +static struct pipe_screen * +st_softpipe_screen_create(void) +{ + static struct pipe_winsys *winsys; + struct pipe_screen *screen; + + winsys = CALLOC_STRUCT(pipe_winsys); + if(!winsys) + return NULL; + + winsys->destroy = st_softpipe_destroy; + + winsys->buffer_create = st_softpipe_buffer_create; + winsys->user_buffer_create = st_softpipe_user_buffer_create; + winsys->buffer_map = st_softpipe_buffer_map; + winsys->buffer_unmap = st_softpipe_buffer_unmap; + winsys->buffer_destroy = st_softpipe_buffer_destroy; + + winsys->surface_alloc = st_softpipe_surface_alloc; + winsys->surface_alloc_storage = st_softpipe_surface_alloc_storage; + winsys->surface_release = st_softpipe_surface_release; + + winsys->fence_reference = st_softpipe_fence_reference; + winsys->fence_signalled = st_softpipe_fence_signalled; + winsys->fence_finish = st_softpipe_fence_finish; + + winsys->flush_frontbuffer = st_softpipe_flush_frontbuffer; + winsys->get_name = st_softpipe_get_name; + + screen = softpipe_create_screen(winsys); + if(!screen) + st_softpipe_destroy(winsys); + + return screen; +} + + +static struct pipe_context * +st_softpipe_context_create(struct pipe_screen *screen) +{ + return softpipe_create(screen, screen->winsys, NULL); +} + + +const struct st_winsys st_softpipe_winsys = { + &st_softpipe_screen_create, + &st_softpipe_context_create, +}; diff --git a/src/gallium/state_trackers/python/st_winsys.h b/src/gallium/state_trackers/python/st_winsys.h new file mode 100644 index 0000000000..b8cb612d86 --- /dev/null +++ b/src/gallium/state_trackers/python/st_winsys.h @@ -0,0 +1,52 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_WINSYS_H_ +#define ST_WINSYS_H_ + + +struct pipe_screen; +struct pipe_context; + + +struct st_winsys +{ + struct pipe_screen * + (*screen_create)(void); + + struct pipe_context * + (*context_create)(struct pipe_screen *screen); +}; + + +extern const struct st_winsys st_softpipe_winsys; + +extern const struct st_winsys st_hardpipe_winsys; + + +#endif /* ST_WINSYS_H_ */ diff --git a/src/gallium/state_trackers/python/tests/base.py b/src/gallium/state_trackers/python/tests/base.py new file mode 100644 index 0000000000..8477aa5fc9 --- /dev/null +++ b/src/gallium/state_trackers/python/tests/base.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python +########################################################################## +# +# Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. +# All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +########################################################################## + + +"""Base classes for tests. + +Loosely inspired on Python's unittest module. +""" + + +from gallium import * + + +# Enumerate all pixel formats +formats = {} +for name, value in globals().items(): + if name.startswith("PIPE_FORMAT_") and isinstance(value, int): + formats[value] = name + + +def make_image(width, height, rgba): + import Image + outimage = Image.new( + mode='RGB', + size=(width, height), + color=(0,0,0)) + outpixels = outimage.load() + for y in range(0, height): + for x in range(0, width): + offset = (y*width + x)*4 + r, g, b, a = [int(min(max(rgba[offset + ch], 0.0), 1.0)*255) for ch in range(4)] + outpixels[x, y] = r, g, b + return outimage + +def save_image(width, height, rgba, filename): + outimage = make_image(width, height, rgba) + outimage.save(filename, "PNG") + +def show_image(width, height, **rgbas): + import Tkinter as tk + from PIL import Image, ImageTk + + root = tk.Tk() + + x = 64 + y = 64 + + labels = rgbas.keys() + labels.sort() + for i in range(len(labels)): + label = labels[i] + outimage = make_image(width, height, rgbas[label]) + + if i: + window = tk.Toplevel(root) + else: + window = root + window.title(label) + image1 = ImageTk.PhotoImage(outimage) + w = image1.width() + h = image1.height() + window.geometry("%dx%d+%d+%d" % (w, h, x, y)) + panel1 = tk.Label(window, image=image1) + panel1.pack(side='top', fill='both', expand='yes') + panel1.image = image1 + x += w + 2 + + root.mainloop() + + +class TestFailure(Exception): + + pass + +class TestSkip(Exception): + + pass + + +class Test: + + def __init__(self): + pass + + def _run(self, result): + raise NotImplementedError + + def run(self): + result = TestResult() + self._run(result) + result.summary() + + +class TestCase(Test): + + def __init__(self, dev, **kargs): + Test.__init__(self) + self.dev = dev + self.__dict__.update(kargs) + + def description(self): + raise NotImplementedError + + def test(self): + raise NotImplementedError + + def _run(self, result): + result.test_start(self) + try: + self.test() + except KeyboardInterrupt: + raise + except TestSkip: + result.test_skipped(self) + except TestFailure: + result.test_failed(self) + else: + result.test_passed(self) + + +class TestSuite(Test): + + def __init__(self, tests = None): + Test.__init__(self) + if tests is None: + self.tests = [] + else: + self.tests = tests + + def add_test(self, test): + self.tests.append(test) + + def _run(self, result): + for test in self.tests: + test._run(result) + + +class TestResult: + + def __init__(self): + self.tests = 0 + self.passed = 0 + self.skipped = 0 + self.failed = 0 + self.failed_descriptions = [] + + def test_start(self, test): + self.tests += 1 + print "Running %s..." % test.description() + + def test_passed(self, test): + self.passed += 1 + print "PASS" + + def test_skipped(self, test): + self.skipped += 1 + print "SKIP" + + def test_failed(self, test): + self.failed += 1 + self.failed_descriptions.append(test.description()) + print "FAIL" + + def summary(self): + print "%u tests, %u passed, %u skipped, %u failed" % (self.tests, self.passed, self.skipped, self.failed) + for description in self.failed_descriptions: + print " %s" % description +
\ No newline at end of file diff --git a/src/gallium/state_trackers/python/tests/texture.py b/src/gallium/state_trackers/python/tests/texture.py new file mode 100644 index 0000000000..880a61306c --- /dev/null +++ b/src/gallium/state_trackers/python/tests/texture.py @@ -0,0 +1,397 @@ +#!/usr/bin/env python +########################################################################## +# +# Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. +# All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +########################################################################## + + +import sys +from gallium import * +from base import * + + +def lods(*dims): + size = max(dims) + lods = 0 + while size: + lods += 1 + size >>= 1 + return lods + + +def minify(dims, level = 1): + return [max(dim>>level, 1) for dim in dims] + + +def tex_coords(texture, face, level, zslice): + st = [ + [0.0, 0.0], + [1.0, 0.0], + [1.0, 1.0], + [0.0, 1.0], + ] + + if texture.target == PIPE_TEXTURE_2D: + return [[s, t, 0.0] for s, t in st] + elif texture.target == PIPE_TEXTURE_3D: + depth = texture.get_depth(level) + if depth > 1: + r = float(zslice)/float(depth - 1) + else: + r = 0.0 + return [[s, t, r] for s, t in st] + elif texture.target == PIPE_TEXTURE_CUBE: + result = [] + for s, t in st: + # See http://developer.nvidia.com/object/cube_map_ogl_tutorial.html + sc = 2.0*s - 1.0 + tc = 2.0*t - 1.0 + if face == PIPE_TEX_FACE_POS_X: + rx = 1.0 + ry = -tc + rz = -sc + if face == PIPE_TEX_FACE_NEG_X: + rx = -1.0 + ry = -tc + rz = sc + if face == PIPE_TEX_FACE_POS_Y: + rx = sc + ry = 1.0 + rz = tc + if face == PIPE_TEX_FACE_NEG_Y: + rx = sc + ry = -1.0 + rz = -tc + if face == PIPE_TEX_FACE_POS_Z: + rx = sc + ry = -tc + rz = 1.0 + if face == PIPE_TEX_FACE_NEG_Z: + rx = -sc + ry = -tc + rz = -1.0 + result.append([rx, ry, rz]) + return result + +def is_pot(n): + return n & (n - 1) == 0 + + +class TextureTest(TestCase): + + def description(self): + target = { + PIPE_TEXTURE_1D: "1d", + PIPE_TEXTURE_2D: "2d", + PIPE_TEXTURE_3D: "3d", + PIPE_TEXTURE_CUBE: "cube", + }[self.target] + format = formats[self.format] + if self.target == PIPE_TEXTURE_CUBE: + face = { + PIPE_TEX_FACE_POS_X: "+x", + PIPE_TEX_FACE_NEG_X: "-x", + PIPE_TEX_FACE_POS_Y: "+y", + PIPE_TEX_FACE_NEG_Y: "-y", + PIPE_TEX_FACE_POS_Z: "+z", + PIPE_TEX_FACE_NEG_Z: "-z", + }[self.face] + else: + face = "" + return "%s %s %ux%ux%u last_level=%u face=%s level=%u zslice=%u" % ( + target, format, + self.width, self.height, self.depth, self.last_level, + face, self.level, self.zslice, + ) + + def test(self): + dev = self.dev + + target = self.target + format = self.format + width = self.width + height = self.height + depth = self.depth + last_level = self.last_level + face = self.face + level = self.level + zslice = self.zslice + + tex_usage = PIPE_TEXTURE_USAGE_SAMPLER + geom_flags = 0 + if width != height: + geom_flags |= PIPE_TEXTURE_GEOM_NON_SQUARE + if not is_pot(width) or not is_pot(height) or not is_pot(depth): + geom_flags |= PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO + + if not dev.is_format_supported(format, target, tex_usage, geom_flags): + raise TestSkip + + ctx = self.dev.context_create() + + # disabled blending/masking + blend = Blend() + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO + blend.colormask = PIPE_MASK_RGBA + ctx.set_blend(blend) + + # no-op depth/stencil/alpha + depth_stencil_alpha = DepthStencilAlpha() + ctx.set_depth_stencil_alpha(depth_stencil_alpha) + + # rasterizer + rasterizer = Rasterizer() + rasterizer.front_winding = PIPE_WINDING_CW + rasterizer.cull_mode = PIPE_WINDING_NONE + rasterizer.bypass_clipping = 1 + #rasterizer.bypass_vs = 1 + ctx.set_rasterizer(rasterizer) + + # viewport (identity, we setup vertices in wincoords) + viewport = Viewport() + scale = FloatArray(4) + scale[0] = 1.0 + scale[1] = 1.0 + scale[2] = 1.0 + scale[3] = 1.0 + viewport.scale = scale + translate = FloatArray(4) + translate[0] = 0.0 + translate[1] = 0.0 + translate[2] = 0.0 + translate[3] = 0.0 + viewport.translate = translate + ctx.set_viewport(viewport) + + # samplers + sampler = Sampler() + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST + sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST + sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST + sampler.normalized_coords = 1 + sampler.min_lod = 0 + sampler.max_lod = PIPE_MAX_TEXTURE_LEVELS - 1 + ctx.set_sampler(0, sampler) + + # texture + texture = dev.texture_create( + target = target, + format = format, + width = width, + height = height, + depth = depth, + last_level = last_level, + tex_usage = tex_usage, + ) + + expected_rgba = FloatArray(height*width*4) + texture.get_surface( + usage = PIPE_BUFFER_USAGE_CPU_READ|PIPE_BUFFER_USAGE_CPU_WRITE, + face = face, + level = level, + zslice = zslice, + ).sample_rgba(expected_rgba) + + ctx.set_sampler_texture(0, texture) + + # framebuffer + cbuf_tex = dev.texture_create( + PIPE_FORMAT_A8R8G8B8_UNORM, + width, + height, + tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET, + ) + + cbuf = cbuf_tex.get_surface(usage = PIPE_BUFFER_USAGE_GPU_WRITE|PIPE_BUFFER_USAGE_GPU_READ) + fb = Framebuffer() + fb.width = width + fb.height = height + fb.num_cbufs = 1 + fb.set_cbuf(0, cbuf) + ctx.set_framebuffer(fb) + ctx.surface_clear(cbuf, 0x00000000) + del fb + + # vertex shader + vs = Shader(''' + VERT1.1 + DCL IN[0], POSITION, CONSTANT + DCL IN[1], GENERIC, CONSTANT + DCL OUT[0], POSITION, CONSTANT + DCL OUT[1], GENERIC, CONSTANT + 0:MOV OUT[0], IN[0] + 1:MOV OUT[1], IN[1] + 2:END + ''') + #vs.dump() + ctx.set_vertex_shader(vs) + + # fragment shader + op = { + PIPE_TEXTURE_1D: "1D", + PIPE_TEXTURE_2D: "2D", + PIPE_TEXTURE_3D: "3D", + PIPE_TEXTURE_CUBE: "CUBE", + }[target] + fs = Shader(''' + FRAG1.1 + DCL IN[0], GENERIC[0], LINEAR + DCL OUT[0], COLOR, CONSTANT + DCL SAMP[0], CONSTANT + 0:TEX OUT[0], IN[0], SAMP[0], %s + 1:END + ''' % op) + #fs.dump() + ctx.set_fragment_shader(fs) + + nverts = 4 + nattrs = 2 + verts = FloatArray(nverts * nattrs * 4) + + x = 0 + y = 0 + w, h = minify((width, height), level) + + pos = [ + [x, y], + [x+w, y], + [x+w, y+h], + [x, y+h], + ] + + tex = tex_coords(texture, face, level, zslice) + + for i in range(0, 4): + j = 8*i + verts[j + 0] = pos[i][0] # x + verts[j + 1] = pos[i][1] # y + verts[j + 2] = 0.0 # z + verts[j + 3] = 1.0 # w + verts[j + 4] = tex[i][0] # s + verts[j + 5] = tex[i][1] # r + verts[j + 6] = tex[i][2] # q + verts[j + 7] = 1.0 + + ctx.draw_vertices(PIPE_PRIM_TRIANGLE_FAN, + nverts, + nattrs, + verts) + + ctx.flush() + + cbuf = cbuf_tex.get_surface(usage = PIPE_BUFFER_USAGE_CPU_READ) + + total = h*w + different = cbuf.compare_tile_rgba(x, y, w, h, expected_rgba, tol=4.0/256) + if different: + sys.stderr.write("%u out of %u pixels differ\n" % (different, total)) + + if float(total - different)/float(total) < 0.85: + + if 0: + rgba = FloatArray(h*w*4) + cbuf.get_tile_rgba(x, y, w, h, rgba) + show_image(w, h, Result=rgba, Expected=expected_rgba) + save_image(w, h, rgba, "result.png") + save_image(w, h, expected_rgba, "expected.png") + #sys.exit(0) + + raise TestFailure + + del ctx + + + +def main(): + dev = Device() + suite = TestSuite() + + targets = [] + targets += [PIPE_TEXTURE_2D] + targets += [PIPE_TEXTURE_CUBE] + targets += [PIPE_TEXTURE_3D] + + formats = [] + formats += [PIPE_FORMAT_A8R8G8B8_UNORM] + formats += [PIPE_FORMAT_R5G6B5_UNORM] + formats += [PIPE_FORMAT_L8_UNORM] + formats += [PIPE_FORMAT_YCBCR] + formats += [PIPE_FORMAT_DXT1_RGB] + + sizes = [64, 32, 16, 8, 4, 2, 1] + #sizes = [1020, 508, 252, 62, 30, 14, 6, 3] + #sizes = [64] + #sizes = [63] + + for target in targets: + for format in formats: + for size in sizes: + if target == PIPE_TEXTURE_CUBE: + faces = [ + PIPE_TEX_FACE_POS_X, + PIPE_TEX_FACE_NEG_X, + PIPE_TEX_FACE_POS_Y, + PIPE_TEX_FACE_NEG_Y, + PIPE_TEX_FACE_POS_Z, + PIPE_TEX_FACE_NEG_Z, + ] + #faces = [PIPE_TEX_FACE_NEG_X] + else: + faces = [0] + if target == PIPE_TEXTURE_3D: + depth = size + else: + depth = 1 + for face in faces: + levels = lods(size) + for last_level in range(levels): + for level in range(0, last_level + 1): + zslice = 0 + while zslice < depth >> level: + test = TextureTest( + dev = dev, + target = target, + format = format, + width = size, + height = size, + depth = depth, + last_level = last_level, + face = face, + level = level, + zslice = zslice, + ) + suite.add_test(test) + zslice = (zslice + 1)*2 - 1 + suite.run() + + +if __name__ == '__main__': + main() diff --git a/src/gallium/winsys/Makefile b/src/gallium/winsys/Makefile new file mode 100644 index 0000000000..2360a6a94a --- /dev/null +++ b/src/gallium/winsys/Makefile @@ -0,0 +1,24 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +SUBDIRS = $(GALLIUM_WINSYS_DIRS) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` + + +# Dummy install target +install: diff --git a/src/gallium/winsys/SConscript b/src/gallium/winsys/SConscript new file mode 100644 index 0000000000..30c3378dff --- /dev/null +++ b/src/gallium/winsys/SConscript @@ -0,0 +1,16 @@ +Import('*') + +if env['dri']: + SConscript([ + 'drm/SConscript', + ]) + +if 'xlib' in env['winsys']: + SConscript([ + 'xlib/SConscript', + ]) + +if 'gdi' in env['winsys']: + SConscript([ + 'gdi/SConscript', + ]) diff --git a/src/gallium/winsys/drm/Makefile b/src/gallium/winsys/drm/Makefile new file mode 100644 index 0000000000..f466ce6c3c --- /dev/null +++ b/src/gallium/winsys/drm/Makefile @@ -0,0 +1,38 @@ +# src/mesa/drivers/dri/Makefile + +TOP = ../../../.. + +include $(TOP)/configs/current + + + +default: $(TOP)/$(LIB_DIR) subdirs + + +$(TOP)/$(LIB_DIR): + -mkdir $(TOP)/$(LIB_DIR) + + +subdirs: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +install: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) install) || exit 1 ; \ + fi \ + done + + +clean: + @for dir in $(DRI_DIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) clean) ; \ + fi \ + done + -rm -f common/*.o diff --git a/src/gallium/winsys/drm/Makefile.template b/src/gallium/winsys/drm/Makefile.template new file mode 100644 index 0000000000..80e817b808 --- /dev/null +++ b/src/gallium/winsys/drm/Makefile.template @@ -0,0 +1,125 @@ +# -*-makefile-*- + +MESA_MODULES = \ + $(TOP)/src/mesa/libmesa.a \ + $(GALLIUM_AUXILIARIES) + +COMMON_GALLIUM_SOURCES = \ + $(TOP)/src/mesa/drivers/dri/common/utils.c \ + $(TOP)/src/mesa/drivers/dri/common/vblank.c \ + $(TOP)/src/mesa/drivers/dri/common/dri_util.c \ + $(TOP)/src/mesa/drivers/dri/common/xmlconfig.c + +COMMON_SOURCES = $(COMMON_GALLIUM_SOURCES) \ + $(TOP)/src/mesa/drivers/common/driverfuncs.c \ + $(TOP)/src/mesa/drivers/dri/common/texmem.c \ + $(TOP)/src/mesa/drivers/dri/common/drirenderbuffer.c + +COMMON_BM_SOURCES = \ + $(TOP)/src/mesa/drivers/dri/common/dri_bufmgr.c \ + $(TOP)/src/mesa/drivers/dri/common/dri_drmpool.c + + +ifeq ($(WINDOW_SYSTEM),dri) +WINOBJ= +WINLIB= +INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES) + +OBJECTS = \ + $(C_SOURCES:.c=.o) \ + $(ASM_SOURCES:.S=.o) + +else +# miniglx +WINOBJ= +WINLIB=-L$(MESA)/src/glx/mini +MINIGLX_INCLUDES = -I$(TOP)/src/glx/mini +INCLUDES = $(MINIGLX_INCLUDES) \ + $(SHARED_INCLUDES) \ + $(PCIACCESS_CFLAGS) + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(MINIGLX_SOURCES:.c=.o) \ + $(ASM_SOURCES:.S=.o) +endif + + +### Include directories +SHARED_INCLUDES = \ + -I. \ + -I$(TOP)/src/mesa/drivers/dri/common \ + -Iserver \ + -I$(TOP)/include \ + -I$(TOP)/include/GL/internal \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/winsys/common \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/mesa/glapi \ + -I$(TOP)/src/mesa/math \ + -I$(TOP)/src/mesa/transform \ + -I$(TOP)/src/mesa/shader \ + -I$(TOP)/src/mesa/swrast \ + -I$(TOP)/src/mesa/swrast_setup \ + -I$(TOP)/src/egl/main \ + -I$(TOP)/src/egl/drivers/dri \ + $(LIBDRM_CFLAGS) + + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + + +##### TARGETS ##### + +default: depend symlinks $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME) $(LIBNAME_EGL) $(TOP)/$(LIB_DIR)/$(LIBNAME_EGL) + + +$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(PIPE_DRIVERS) $(WINOBJ) Makefile $(TOP)/src/mesa/drivers/dri/Makefile.template + $(TOP)/bin/mklib -noprefix -o $@ \ + $(OBJECTS) $(PIPE_DRIVERS) $(MESA_MODULES) $(WINOBJ) $(DRI_LIB_DEPS) + +$(LIBNAME_EGL): $(WINSYS_OBJECTS) $(LIBS) + $(TOP)/bin/mklib -o $(LIBNAME_EGL) \ + -linker "$(CC)" \ + -noprefix \ + $(OBJECTS) $(MKLIB_OPTIONS) $(WINSYS_OBJECTS) $(PIPE_DRIVERS) $(WINOBJ) $(DRI_LIB_DEPS) \ + --whole-archive $(LIBS) $(GALLIUM_AUXILIARIES) --no-whole-archive + +$(TOP)/$(LIB_DIR)/$(LIBNAME): $(LIBNAME) + $(INSTALL) $(LIBNAME) $(TOP)/$(LIB_DIR) + +$(TOP)/$(LIB_DIR)/$(LIBNAME_EGL): $(LIBNAME_EGL) + $(INSTALL) $(LIBNAME_EGL) $(TOP)/$(LIB_DIR) + +depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) \ + $(ASM_SOURCES) 2> /dev/null + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + + +# Remove .o and backup files +clean: + -rm -f *.o */*.o *~ *.so *~ server/*.o $(SYMLINKS) + -rm -f depend depend.bak + + +install: $(LIBNAME) + $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR) + $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR) + + +include depend diff --git a/src/gallium/winsys/drm/SConscript b/src/gallium/winsys/drm/SConscript new file mode 100644 index 0000000000..aef5210a32 --- /dev/null +++ b/src/gallium/winsys/drm/SConscript @@ -0,0 +1,54 @@ +Import('*') + +if env['dri']: + + drienv = env.Clone() + + drienv.Replace(CPPPATH = [ + '#src/mesa/drivers/dri/common', + '#include', + '#include/GL/internal', + '#src/gallium/include', + '#src/gallium/auxiliary', + '#src/gallium/drivers', + '#src/mesa', + '#src/mesa/main', + '#src/mesa/glapi', + '#src/mesa/math', + '#src/mesa/transform', + '#src/mesa/shader', + '#src/mesa/swrast', + '#src/mesa/swrast_setup', + '#src/egl/main', + '#src/egl/drivers/dri', + ]) + + drienv.ParseConfig('pkg-config --cflags --libs libdrm') + + COMMON_GALLIUM_SOURCES = [ + '#src/mesa/drivers/dri/common/utils.c', + '#src/mesa/drivers/dri/common/vblank.c', + '#src/mesa/drivers/dri/common/dri_util.c', + '#src/mesa/drivers/dri/common/xmlconfig.c', + ] + + COMMON_BM_SOURCES = [ + '#src/mesa/drivers/dri/common/dri_bufmgr.c', + '#src/mesa/drivers/dri/common/dri_drmpool.c', + ] + + Export([ + 'drienv', + 'COMMON_GALLIUM_SOURCES', + 'COMMON_BM_SOURCES', + ]) + + # TODO: Installation + #install: $(LIBNAME) + # $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR) + # $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR) + + if 'intel' in env['winsys']: + SConscript([ + 'intel/SConscript', + ]) diff --git a/src/gallium/winsys/drm/intel/Makefile b/src/gallium/winsys/drm/intel/Makefile new file mode 100644 index 0000000000..a670ac044d --- /dev/null +++ b/src/gallium/winsys/drm/intel/Makefile @@ -0,0 +1,25 @@ +TOP = ../../../../.. +include $(TOP)/configs/current + + +SUBDIRS = common dri egl + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` + rm -f `find . -name depend` + + +# Dummy install target +install: diff --git a/src/gallium/winsys/drm/intel/common/Makefile b/src/gallium/winsys/drm/intel/common/Makefile new file mode 100644 index 0000000000..bf1a7d691f --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/Makefile @@ -0,0 +1,23 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = inteldrm + +C_SOURCES = \ + intel_be_batchbuffer.c \ + intel_be_context.c \ + intel_be_device.c \ + ws_dri_bufmgr.c \ + ws_dri_drmpool.c \ + ws_dri_fencemgr.c \ + ws_dri_mallocpool.c \ + ws_dri_slabpool.c + + +include ./Makefile.template + +DRIVER_DEFINES = $(shell pkg-config libdrm --cflags \ + && pkg-config libdrm --atleast-version=2.3.1 \ + && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") +symlinks: + diff --git a/src/gallium/winsys/drm/intel/common/Makefile.template b/src/gallium/winsys/drm/intel/common/Makefile.template new file mode 100644 index 0000000000..02ed363a43 --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/Makefile.template @@ -0,0 +1,64 @@ +# -*-makefile-*- + + +# We still have a dependency on the "dri" buffer manager. Most likely +# the interface can be reused in non-dri environments, and also as a +# frontend to simpler memory managers. +# +COMMON_SOURCES = + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(CPP_SOURCES:.cpp=.o) \ + $(ASM_SOURCES:.S=.o) + + +### Include directories +INCLUDES = \ + -I. \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/include \ + $(DRIVER_INCLUDES) + + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + + +##### TARGETS ##### + +default: depend symlinks $(LIBNAME) + + +$(LIBNAME): $(OBJECTS) Makefile Makefile.template + $(TOP)/bin/mklib -o $@ -static $(OBJECTS) $(DRIVER_LIBS) + + +depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \ + $(ASM_SOURCES) 2> /dev/null + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + + +# Remove .o and backup files +clean:: + -rm -f *.o */*.o *~ *.so *~ server/*.o $(SYMLINKS) + -rm -f depend depend.bak + + +include depend diff --git a/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.c b/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.c new file mode 100644 index 0000000000..bc13a5761e --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.c @@ -0,0 +1,429 @@ + +#include "intel_be_batchbuffer.h" +#include "intel_be_context.h" +#include "intel_be_device.h" +#include <errno.h> + +#include "xf86drm.h" + +static void +intel_realloc_relocs(struct intel_be_batchbuffer *batch, int num_relocs) +{ + unsigned long size = num_relocs * I915_RELOC0_STRIDE + I915_RELOC_HEADER; + + size *= sizeof(uint32_t); + batch->reloc = realloc(batch->reloc, size); + batch->reloc_size = num_relocs; +} + + +void +intel_be_batchbuffer_reset(struct intel_be_batchbuffer *batch) +{ + /* + * Get a new, free batchbuffer. + */ + drmBO *bo; + struct drm_bo_info_req *req; + + driBOUnrefUserList(batch->list); + driBOResetList(batch->list); + + /* base.size is the size available to the i915simple driver */ + batch->base.size = batch->device->max_batch_size - BATCH_RESERVED; + batch->base.actual_size = batch->device->max_batch_size; + driBOData(batch->buffer, batch->base.actual_size, NULL, NULL, 0); + + /* + * Add the batchbuffer to the validate list. + */ + + driBOAddListItem(batch->list, batch->buffer, + DRM_BO_FLAG_EXE | DRM_BO_FLAG_MEM_TT, + DRM_BO_FLAG_EXE | DRM_BO_MASK_MEM, + &batch->dest_location, &batch->node); + + req = &batch->node->bo_arg.d.req.bo_req; + + /* + * Set up information needed for us to make relocations + * relative to the underlying drm buffer objects. + */ + + driReadLockKernelBO(); + bo = driBOKernel(batch->buffer); + req->presumed_offset = (uint64_t) bo->offset; + req->hint = DRM_BO_HINT_PRESUMED_OFFSET; + batch->drmBOVirtual = (uint8_t *) bo->virtual; + driReadUnlockKernelBO(); + + /* + * Adjust the relocation buffer size. + */ + + if (batch->reloc_size > INTEL_MAX_RELOCS || + batch->reloc == NULL) + intel_realloc_relocs(batch, INTEL_DEFAULT_RELOCS); + + assert(batch->reloc != NULL); + batch->reloc[0] = 0; /* No relocs yet. */ + batch->reloc[1] = 1; /* Reloc type 1 */ + batch->reloc[2] = 0; /* Only a single relocation list. */ + batch->reloc[3] = 0; /* Only a single relocation list. */ + + batch->base.map = driBOMap(batch->buffer, DRM_BO_FLAG_WRITE, 0); + batch->poolOffset = driBOPoolOffset(batch->buffer); + batch->base.ptr = batch->base.map; + batch->dirty_state = ~0; + batch->nr_relocs = 0; + batch->flags = 0; + batch->id = 0;//batch->intel->intelScreen->batch_id++; +} + +/*====================================================================== + * Public functions + */ +struct intel_be_batchbuffer * +intel_be_batchbuffer_alloc(struct intel_be_context *intel) +{ + struct intel_be_batchbuffer *batch = calloc(sizeof(*batch), 1); + + batch->intel = intel; + batch->device = intel->device; + + driGenBuffers(intel->device->batchPool, "batchbuffer", 1, + &batch->buffer, 4096, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE, 0); + batch->last_fence = NULL; + batch->list = driBOCreateList(20); + batch->reloc = NULL; + intel_be_batchbuffer_reset(batch); + return batch; +} + +void +intel_be_batchbuffer_free(struct intel_be_batchbuffer *batch) +{ + if (batch->last_fence) { + driFenceFinish(batch->last_fence, + DRM_FENCE_TYPE_EXE, FALSE); + driFenceUnReference(&batch->last_fence); + } + if (batch->base.map) { + driBOUnmap(batch->buffer); + batch->base.map = NULL; + } + driBOUnReference(batch->buffer); + driBOFreeList(batch->list); + if (batch->reloc) + free(batch->reloc); + batch->buffer = NULL; + free(batch); +} + +void +intel_be_offset_relocation(struct intel_be_batchbuffer *batch, + unsigned pre_add, + struct _DriBufferObject *driBO, + uint64_t val_flags, + uint64_t val_mask) +{ + int itemLoc; + struct _drmBONode *node; + uint32_t *reloc; + struct drm_bo_info_req *req; + + driBOAddListItem(batch->list, driBO, val_flags, val_mask, + &itemLoc, &node); + req = &node->bo_arg.d.req.bo_req; + + if (!(req->hint & DRM_BO_HINT_PRESUMED_OFFSET)) { + + /* + * Stop other threads from tampering with the underlying + * drmBO while we're reading its offset. + */ + + driReadLockKernelBO(); + req->presumed_offset = (uint64_t) driBOKernel(driBO)->offset; + driReadUnlockKernelBO(); + req->hint = DRM_BO_HINT_PRESUMED_OFFSET; + } + + pre_add += driBOPoolOffset(driBO); + + if (batch->nr_relocs == batch->reloc_size) + intel_realloc_relocs(batch, batch->reloc_size * 2); + + reloc = batch->reloc + + (I915_RELOC_HEADER + batch->nr_relocs * I915_RELOC0_STRIDE); + + reloc[0] = ((uint8_t *)batch->base.ptr - batch->drmBOVirtual); + i915_batchbuffer_dword(&batch->base, req->presumed_offset + pre_add); + reloc[1] = pre_add; + reloc[2] = itemLoc; + reloc[3] = batch->dest_location; + batch->nr_relocs++; +} + +static void +i915_drm_copy_reply(const struct drm_bo_info_rep * rep, drmBO * buf) +{ + buf->handle = rep->handle; + buf->flags = rep->flags; + buf->size = rep->size; + buf->offset = rep->offset; + buf->mapHandle = rep->arg_handle; + buf->proposedFlags = rep->proposed_flags; + buf->start = rep->buffer_start; + buf->fenceFlags = rep->fence_flags; + buf->replyFlags = rep->rep_flags; + buf->pageAlignment = rep->page_alignment; +} + +static int +i915_execbuf(struct intel_be_batchbuffer *batch, + unsigned int used, + boolean ignore_cliprects, + drmBOList *list, + struct drm_i915_execbuffer *ea) +{ +// struct intel_be_context *intel = batch->intel; + drmBONode *node; + drmMMListHead *l; + struct drm_i915_op_arg *arg, *first; + struct drm_bo_op_req *req; + struct drm_bo_info_rep *rep; + uint64_t *prevNext = NULL; + drmBO *buf; + int ret = 0; + uint32_t count = 0; + + first = NULL; + for (l = list->list.next; l != &list->list; l = l->next) { + node = DRMLISTENTRY(drmBONode, l, head); + + arg = &node->bo_arg; + req = &arg->d.req; + + if (!first) + first = arg; + + if (prevNext) + *prevNext = (unsigned long)arg; + + prevNext = &arg->next; + req->bo_req.handle = node->buf->handle; + req->op = drm_bo_validate; + req->bo_req.flags = node->arg0; + req->bo_req.mask = node->arg1; + req->bo_req.hint |= 0; + count++; + } + + memset(ea, 0, sizeof(*ea)); + ea->num_buffers = count; + ea->batch.start = batch->poolOffset; + ea->batch.used = used; +#if 0 /* ZZZ JB: no cliprects used */ + ea->batch.cliprects = intel->pClipRects; + ea->batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; + ea->batch.DR1 = 0; + ea->batch.DR4 = 0;((((GLuint) intel->drawX) & 0xffff) | + (((GLuint) intel->drawY) << 16)); +#else + ea->batch.cliprects = NULL; + ea->batch.num_cliprects = 0; + ea->batch.DR1 = 0; + ea->batch.DR4 = 0; +#endif + ea->fence_arg.flags = DRM_I915_FENCE_FLAG_FLUSHED; + ea->ops_list = (unsigned long) first; + first->reloc_ptr = (unsigned long) batch->reloc; + batch->reloc[0] = batch->nr_relocs; + + //return -EFAULT; + do { + ret = drmCommandWriteRead(batch->device->fd, DRM_I915_EXECBUFFER, ea, + sizeof(*ea)); + } while (ret == -EAGAIN); + + if (ret != 0) + return ret; + + for (l = list->list.next; l != &list->list; l = l->next) { + node = DRMLISTENTRY(drmBONode, l, head); + arg = &node->bo_arg; + rep = &arg->d.rep.bo_info; + + if (!arg->handled) { + return -EFAULT; + } + if (arg->d.rep.ret) + return arg->d.rep.ret; + + buf = node->buf; + i915_drm_copy_reply(rep, buf); + } + return 0; +} + +/* TODO: Push this whole function into bufmgr. + */ +static struct _DriFenceObject * +do_flush_locked(struct intel_be_batchbuffer *batch, + unsigned int used, + boolean ignore_cliprects, boolean allow_unlock) +{ + struct intel_be_context *intel = batch->intel; + struct _DriFenceObject *fo; + drmFence fence; + drmBOList *boList; + struct drm_i915_execbuffer ea; + int ret = 0; + + driBOValidateUserList(batch->list); + boList = driGetdrmBOList(batch->list); + +#if 0 /* ZZZ JB Allways run */ + if (!(intel->numClipRects == 0 && !ignore_cliprects)) { +#else + if (1) { +#endif + ret = i915_execbuf(batch, used, ignore_cliprects, boList, &ea); + } else { + driPutdrmBOList(batch->list); + fo = NULL; + goto out; + } + driPutdrmBOList(batch->list); + if (ret) + abort(); + + if (ea.fence_arg.error != 0) { + + /* + * The hardware has been idled by the kernel. + * Don't fence the driBOs. + */ + + if (batch->last_fence) + driFenceUnReference(&batch->last_fence); +#if 0 /* ZZZ JB: no _mesa_* funcs in gallium */ + _mesa_printf("fence error\n"); +#endif + batch->last_fence = NULL; + fo = NULL; + goto out; + } + + fence.handle = ea.fence_arg.handle; + fence.fence_class = ea.fence_arg.fence_class; + fence.type = ea.fence_arg.type; + fence.flags = ea.fence_arg.flags; + fence.signaled = ea.fence_arg.signaled; + + fo = driBOFenceUserList(batch->device->fenceMgr, batch->list, + "SuperFence", &fence); + + if (driFenceType(fo) & DRM_I915_FENCE_TYPE_RW) { + if (batch->last_fence) + driFenceUnReference(&batch->last_fence); + /* + * FIXME: Context last fence?? + */ + batch->last_fence = fo; + driFenceReference(fo); + } + out: +#if 0 /* ZZZ JB: fix this */ + intel->vtbl.lost_hardware(intel); +#else + (void)intel; +#endif + return fo; +} + + +struct _DriFenceObject * +intel_be_batchbuffer_flush(struct intel_be_batchbuffer *batch) +{ + struct intel_be_context *intel = batch->intel; + unsigned int used = batch->base.ptr - batch->base.map; + boolean was_locked = batch->intel->hardware_locked(intel); + struct _DriFenceObject *fence; + + if (used == 0) { + driFenceReference(batch->last_fence); + return batch->last_fence; + } + + /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a + * performance drain that we would like to avoid. + */ +#if 0 /* ZZZ JB: what should we do here? */ + if (used & 4) { + ((int *) batch->base.ptr)[0] = intel->vtbl.flush_cmd(); + ((int *) batch->base.ptr)[1] = 0; + ((int *) batch->base.ptr)[2] = MI_BATCH_BUFFER_END; + used += 12; + } + else { + ((int *) batch->base.ptr)[0] = intel->vtbl.flush_cmd(); + ((int *) batch->base.ptr)[1] = MI_BATCH_BUFFER_END; + used += 8; + } +#else + if (used & 4) { + ((int *) batch->base.ptr)[0] = ((0<<29)|(4<<23)); // MI_FLUSH; + ((int *) batch->base.ptr)[1] = 0; + ((int *) batch->base.ptr)[2] = (0xA<<23); // MI_BATCH_BUFFER_END; + used += 12; + } + else { + ((int *) batch->base.ptr)[0] = ((0<<29)|(4<<23)); // MI_FLUSH; + ((int *) batch->base.ptr)[1] = (0xA<<23); // MI_BATCH_BUFFER_END; + used += 8; + } +#endif + driBOUnmap(batch->buffer); + batch->base.ptr = NULL; + batch->base.map = NULL; + + /* TODO: Just pass the relocation list and dma buffer up to the + * kernel. + */ + if (!was_locked) + intel->hardware_lock(intel); + + fence = do_flush_locked(batch, used, !(batch->flags & INTEL_BATCH_CLIPRECTS), + FALSE); + + if (!was_locked) + intel->hardware_unlock(intel); + + /* Reset the buffer: + */ + intel_be_batchbuffer_reset(batch); + return fence; +} + +void +intel_be_batchbuffer_finish(struct intel_be_batchbuffer *batch) +{ + struct _DriFenceObject *fence = intel_be_batchbuffer_flush(batch); + driFenceFinish(fence, driFenceType(fence), FALSE); + driFenceUnReference(&fence); +} + +#if 0 +void +intel_be_batchbuffer_data(struct intel_be_batchbuffer *batch, + const void *data, unsigned int bytes, unsigned int flags) +{ + assert((bytes & 3) == 0); + intel_batchbuffer_require_space(batch, bytes, flags); + memcpy(batch->base.ptr, data, bytes); + batch->base.ptr += bytes; +} +#endif diff --git a/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.h b/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.h new file mode 100644 index 0000000000..f150e3a674 --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.h @@ -0,0 +1,69 @@ + +#ifndef INTEL_BE_BATCHBUFFER_H +#define INTEL_BE_BATCHBUFFER_H + +#include "i915simple/i915_batch.h" + +#include "ws_dri_bufmgr.h" + +#define BATCH_RESERVED 16 + +#define INTEL_DEFAULT_RELOCS 100 +#define INTEL_MAX_RELOCS 400 + +#define INTEL_BATCH_NO_CLIPRECTS 0x1 +#define INTEL_BATCH_CLIPRECTS 0x2 + +struct intel_be_context; +struct intel_be_device; + +struct intel_be_batchbuffer +{ + struct i915_batchbuffer base; + + struct intel_be_context *intel; + struct intel_be_device *device; + + struct _DriBufferObject *buffer; + struct _DriFenceObject *last_fence; + uint32_t flags; + + struct _DriBufferList *list; + size_t list_count; + + uint32_t *reloc; + size_t reloc_size; + size_t nr_relocs; + + uint32_t dirty_state; + uint32_t id; + + uint32_t poolOffset; + uint8_t *drmBOVirtual; + struct _drmBONode *node; /* Validation list node for this buffer */ + int dest_location; /* Validation list sequence for this buffer */ +}; + +struct intel_be_batchbuffer * +intel_be_batchbuffer_alloc(struct intel_be_context *intel); + +void +intel_be_batchbuffer_free(struct intel_be_batchbuffer *batch); + +void +intel_be_batchbuffer_finish(struct intel_be_batchbuffer *batch); + +struct _DriFenceObject * +intel_be_batchbuffer_flush(struct intel_be_batchbuffer *batch); + +void +intel_be_batchbuffer_reset(struct intel_be_batchbuffer *batch); + +void +intel_be_offset_relocation(struct intel_be_batchbuffer *batch, + unsigned pre_add, + struct _DriBufferObject *driBO, + uint64_t val_flags, + uint64_t val_mask); + +#endif diff --git a/src/gallium/winsys/drm/intel/common/intel_be_context.c b/src/gallium/winsys/drm/intel/common/intel_be_context.c new file mode 100644 index 0000000000..1af39674f4 --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/intel_be_context.c @@ -0,0 +1,107 @@ + +/* + * Authors: Jakob Bornecrantz <jakob-at-tungstengraphics.com> + */ + +#include "ws_dri_fencemgr.h" +#include "intel_be_device.h" +#include "intel_be_context.h" +#include "intel_be_batchbuffer.h" + +static INLINE struct intel_be_context * +intel_be_context(struct i915_winsys *sws) +{ + return (struct intel_be_context *)sws; +} + +/* Simple batchbuffer interface: + */ + +static struct i915_batchbuffer* +intel_i915_batch_get(struct i915_winsys *sws) +{ + struct intel_be_context *intel = intel_be_context(sws); + return &intel->batch->base; +} + +static void intel_i915_batch_reloc(struct i915_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags, + unsigned delta) +{ + struct intel_be_context *intel = intel_be_context(sws); + + unsigned flags = DRM_BO_FLAG_MEM_TT; + unsigned mask = DRM_BO_MASK_MEM; + + if (access_flags & I915_BUFFER_ACCESS_WRITE) { + flags |= DRM_BO_FLAG_WRITE; + mask |= DRM_BO_FLAG_WRITE; + } + + if (access_flags & I915_BUFFER_ACCESS_READ) { + flags |= DRM_BO_FLAG_READ; + mask |= DRM_BO_FLAG_READ; + } + + intel_be_offset_relocation(intel->batch, + delta, + dri_bo(buf), + flags, + mask); +} + +static void intel_i915_batch_flush(struct i915_winsys *sws, + struct pipe_fence_handle **fence) +{ + struct intel_be_context *intel = intel_be_context(sws); + + union { + struct _DriFenceObject *dri; + struct pipe_fence_handle *pipe; + } fu; + + if (fence) + assert(!*fence); + + fu.dri = intel_be_batchbuffer_flush(intel->batch); + + if (!fu.dri) { + assert(0); + *fence = NULL; + return; + } + + if (fu.dri) { + if (fence) + *fence = fu.pipe; + else + driFenceUnReference(&fu.dri); + } + +} + +boolean +intel_be_init_context(struct intel_be_context *intel, struct intel_be_device *device) +{ + assert(intel); + assert(device); + + intel->device = device; + + /* TODO move framebuffer createion to the driver */ + + intel->base.batch_get = intel_i915_batch_get; + intel->base.batch_reloc = intel_i915_batch_reloc; + intel->base.batch_flush = intel_i915_batch_flush; + + intel->batch = intel_be_batchbuffer_alloc(intel); + + return true; +} + +void +intel_be_destroy_context(struct intel_be_context *intel) +{ + intel_be_batchbuffer_free(intel->batch); +} diff --git a/src/gallium/winsys/drm/intel/common/intel_be_context.h b/src/gallium/winsys/drm/intel/common/intel_be_context.h new file mode 100644 index 0000000000..d5cbc93594 --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/intel_be_context.h @@ -0,0 +1,40 @@ +/* These need to be diffrent from the intel winsys */ +#ifndef INTEL_BE_CONTEXT_H +#define INTEL_BE_CONTEXT_H + +#include "i915simple/i915_winsys.h" + +struct intel_be_context +{ + /** Interface to i915simple driver */ + struct i915_winsys base; + + struct intel_be_device *device; + struct intel_be_batchbuffer *batch; + + /* + * Hardware lock functions. + * + * Needs to be filled in by the winsys. + */ + void (*hardware_lock)(struct intel_be_context *context); + void (*hardware_unlock)(struct intel_be_context *context); + boolean (*hardware_locked)(struct intel_be_context *context); +}; + +/** + * Intialize a allocated intel_be_context struct. + * + * Remember to set the hardware_* functions. + */ +boolean +intel_be_init_context(struct intel_be_context *intel, + struct intel_be_device *device); + +/** + * Destroy a intel_be_context. + * Does not free the struct that is up to the winsys. + */ +void +intel_be_destroy_context(struct intel_be_context *intel); +#endif diff --git a/src/gallium/winsys/drm/intel/common/intel_be_device.c b/src/gallium/winsys/drm/intel/common/intel_be_device.c new file mode 100644 index 0000000000..019ee5cbd2 --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/intel_be_device.c @@ -0,0 +1,308 @@ + + +/* + * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> + * Jakob Bornecrantz <jakob-at-tungstengraphics-dot-com> + */ + +#include "intel_be_device.h" +#include "ws_dri_bufmgr.h" +#include "ws_dri_bufpool.h" +#include "ws_dri_fencemgr.h" + +#include "pipe/p_winsys.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" +#include "util/u_memory.h" + +#include "i915simple/i915_screen.h" + +/* Turn a pipe winsys into an intel/pipe winsys: + */ +static INLINE struct intel_be_device * +intel_be_device( struct pipe_winsys *winsys ) +{ + return (struct intel_be_device *)winsys; +} + + +/* + * Buffer functions. + * + * Most callbacks map direcly onto dri_bufmgr operations: + */ + +static void *intel_be_buffer_map(struct pipe_winsys *winsys, + struct pipe_buffer *buf, + unsigned flags ) +{ + unsigned drm_flags = 0; + + if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) + drm_flags |= DRM_BO_FLAG_WRITE; + + if (flags & PIPE_BUFFER_USAGE_CPU_READ) + drm_flags |= DRM_BO_FLAG_READ; + + return driBOMap( dri_bo(buf), drm_flags, 0 ); +} + +static void intel_be_buffer_unmap(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + driBOUnmap( dri_bo(buf) ); +} + +static void +intel_be_buffer_destroy(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + driBOUnReference( dri_bo(buf) ); + FREE(buf); +} + +static struct pipe_buffer * +intel_be_buffer_create(struct pipe_winsys *winsys, + unsigned alignment, + unsigned usage, + unsigned size ) +{ + struct intel_be_buffer *buffer = CALLOC_STRUCT( intel_be_buffer ); + struct intel_be_device *iws = intel_be_device(winsys); + unsigned flags = 0; + struct _DriBufferPool *pool; + + buffer->base.refcount = 1; + buffer->base.alignment = alignment; + buffer->base.usage = usage; + buffer->base.size = size; + + if (usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_CONSTANT)) { + flags |= DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED; + pool = iws->mallocPool; + } else if (usage & PIPE_BUFFER_USAGE_CUSTOM) { + /* For vertex buffers */ + flags |= DRM_BO_FLAG_MEM_VRAM | DRM_BO_FLAG_MEM_TT; + pool = iws->vertexPool; + } else { + flags |= DRM_BO_FLAG_MEM_VRAM | DRM_BO_FLAG_MEM_TT; + pool = iws->regionPool; + } + + if (usage & PIPE_BUFFER_USAGE_GPU_READ) + flags |= DRM_BO_FLAG_READ; + + if (usage & PIPE_BUFFER_USAGE_GPU_WRITE) + flags |= DRM_BO_FLAG_WRITE; + + /* drm complains if we don't set any read/write flags. + */ + if ((flags & (DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE)) == 0) + flags |= DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE; + + buffer->pool = pool; + driGenBuffers( buffer->pool, + "pipe buffer", 1, &buffer->driBO, alignment, flags, 0 ); + + driBOData( buffer->driBO, size, NULL, buffer->pool, 0 ); + + return &buffer->base; +} + + +static struct pipe_buffer * +intel_be_user_buffer_create(struct pipe_winsys *winsys, void *ptr, unsigned bytes) +{ + struct intel_be_buffer *buffer = CALLOC_STRUCT( intel_be_buffer ); + struct intel_be_device *iws = intel_be_device(winsys); + + driGenUserBuffer( iws->regionPool, + "pipe user buffer", &buffer->driBO, ptr, bytes ); + + buffer->base.refcount = 1; + + return &buffer->base; +} + +struct pipe_buffer * +intel_be_buffer_from_handle(struct intel_be_device *device, + const char* name, unsigned handle) +{ + struct intel_be_buffer *be_buf = malloc(sizeof(*be_buf)); + struct pipe_buffer *buffer; + + if (!be_buf) + goto err; + + memset(be_buf, 0, sizeof(*be_buf)); + + driGenBuffers(device->staticPool, name, 1, &be_buf->driBO, 0, 0, 0); + driBOSetReferenced(be_buf->driBO, handle); + + if (0) /** XXX TODO check error */ + goto err_bo; + + buffer = &be_buf->base; + buffer->refcount = 1; + buffer->alignment = 0; + buffer->usage = 0; + buffer->size = driBOSize(be_buf->driBO); + + return buffer; +err_bo: + free(be_buf); +err: + return NULL; +} + + +/* + * Surface functions. + * + * Deprecated! + */ + +static struct pipe_surface * +intel_i915_surface_alloc(struct pipe_winsys *winsys) +{ + assert((size_t)"intel_i915_surface_alloc is deprecated" & 0); + return NULL; +} + +static int +intel_i915_surface_alloc_storage(struct pipe_winsys *winsys, + struct pipe_surface *surf, + unsigned width, unsigned height, + enum pipe_format format, + unsigned flags, + unsigned tex_usage) +{ + assert((size_t)"intel_i915_surface_alloc_storage is deprecated" & 0); + return -1; +} + +static void +intel_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) +{ + assert((size_t)"intel_i915_surface_release is deprecated" & 0); +} + + +/* + * Fence functions + */ + +static void +intel_be_fence_reference( struct pipe_winsys *sws, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence ) +{ + if (*ptr) + driFenceUnReference((struct _DriFenceObject **)ptr); + + if (fence) + *ptr = (struct pipe_fence_handle *)driFenceReference((struct _DriFenceObject *)fence); +} + +static int +intel_be_fence_signalled( struct pipe_winsys *sws, + struct pipe_fence_handle *fence, + unsigned flag ) +{ + return driFenceSignaled((struct _DriFenceObject *)fence, flag); +} + +static int +intel_be_fence_finish( struct pipe_winsys *sws, + struct pipe_fence_handle *fence, + unsigned flag ) +{ + return driFenceFinish((struct _DriFenceObject *)fence, flag, 0); +} + + +/* + * Misc functions + */ + +boolean +intel_be_init_device(struct intel_be_device *dev, int fd, unsigned id) +{ + dev->fd = fd; + dev->max_batch_size = 16 * 4096; + dev->max_vertex_size = 128 * 4096; + + dev->base.buffer_create = intel_be_buffer_create; + dev->base.user_buffer_create = intel_be_user_buffer_create; + dev->base.buffer_map = intel_be_buffer_map; + dev->base.buffer_unmap = intel_be_buffer_unmap; + dev->base.buffer_destroy = intel_be_buffer_destroy; + dev->base.surface_alloc = intel_i915_surface_alloc; + dev->base.surface_alloc_storage = intel_i915_surface_alloc_storage; + dev->base.surface_release = intel_i915_surface_release; + dev->base.fence_reference = intel_be_fence_reference; + dev->base.fence_signalled = intel_be_fence_signalled; + dev->base.fence_finish = intel_be_fence_finish; + +#if 0 /* Set by the winsys */ + dev->base.flush_frontbuffer = intel_flush_frontbuffer; + dev->base.get_name = intel_get_name; +#endif + + dev->fMan = driInitFreeSlabManager(10, 10); + dev->fenceMgr = driFenceMgrTTMInit(dev->fd); + + dev->mallocPool = driMallocPoolInit(); + dev->staticPool = driDRMPoolInit(dev->fd); + /* Sizes: 64 128 256 512 1024 2048 4096 8192 16384 32768 */ + dev->regionPool = driSlabPoolInit(dev->fd, + DRM_BO_FLAG_READ | + DRM_BO_FLAG_WRITE | + DRM_BO_FLAG_MEM_TT, + DRM_BO_FLAG_READ | + DRM_BO_FLAG_WRITE | + DRM_BO_FLAG_MEM_TT, + 64, + 10, 120, 4096 * 64, 0, + dev->fMan); + + dev->vertexPool = driSlabPoolInit(dev->fd, + DRM_BO_FLAG_READ | + DRM_BO_FLAG_WRITE | + DRM_BO_FLAG_MEM_TT, + DRM_BO_FLAG_READ | + DRM_BO_FLAG_WRITE | + DRM_BO_FLAG_MEM_TT, + dev->max_vertex_size, + 1, 120, dev->max_vertex_size * 4, 0, + dev->fMan); + + dev->batchPool = driSlabPoolInit(dev->fd, + DRM_BO_FLAG_EXE | + DRM_BO_FLAG_MEM_TT, + DRM_BO_FLAG_EXE | + DRM_BO_FLAG_MEM_TT, + dev->max_batch_size, + 1, 40, dev->max_batch_size * 16, 0, + dev->fMan); + + /* Fill in this struct with callbacks that i915simple will need to + * communicate with the window system, buffer manager, etc. + */ + dev->screen = i915_create_screen(&dev->base, id); + + return true; +} + +void +intel_be_destroy_device(struct intel_be_device *dev) +{ + driPoolTakeDown(dev->mallocPool); + driPoolTakeDown(dev->staticPool); + driPoolTakeDown(dev->regionPool); + driPoolTakeDown(dev->vertexPool); + driPoolTakeDown(dev->batchPool); + + /** TODO takedown fenceMgr and fMan */ +} diff --git a/src/gallium/winsys/drm/intel/common/intel_be_device.h b/src/gallium/winsys/drm/intel/common/intel_be_device.h new file mode 100644 index 0000000000..3f8b3f585c --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/intel_be_device.h @@ -0,0 +1,72 @@ +#ifndef INTEL_DRM_DEVICE_H +#define INTEL_DRM_DEVICE_H + +#include "pipe/p_winsys.h" +#include "pipe/p_context.h" + +/* + * Device + */ + +struct intel_be_device +{ + struct pipe_winsys base; + + /** + * Hw level screen + */ + struct pipe_screen *screen; + + int fd; /**< Drm file discriptor */ + + size_t max_batch_size; + size_t max_vertex_size; + + struct _DriFenceMgr *fenceMgr; + + struct _DriBufferPool *batchPool; + struct _DriBufferPool *regionPool; + struct _DriBufferPool *mallocPool; + struct _DriBufferPool *vertexPool; + struct _DriBufferPool *staticPool; + struct _DriFreeSlabManager *fMan; +}; + +boolean +intel_be_init_device(struct intel_be_device *device, int fd, unsigned id); + +void +intel_be_destroy_device(struct intel_be_device *dev); + +/* + * Buffer + */ + +struct intel_be_buffer { + struct pipe_buffer base; + struct _DriBufferPool *pool; + struct _DriBufferObject *driBO; +}; + +/** + * Create a be buffer from a drm bo handle + * + * Takes a reference + */ +struct pipe_buffer * +intel_be_buffer_from_handle(struct intel_be_device *device, + const char* name, unsigned handle); + +static INLINE struct intel_be_buffer * +intel_be_buffer(struct pipe_buffer *buf) +{ + return (struct intel_be_buffer *)buf; +} + +static INLINE struct _DriBufferObject * +dri_bo(struct pipe_buffer *buf) +{ + return intel_be_buffer(buf)->driBO; +} + +#endif diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.c b/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.c new file mode 100644 index 0000000000..517a97b3ee --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.c @@ -0,0 +1,949 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> + * Keith Whitwell <keithw-at-tungstengraphics-dot-com> + */ + +#include <xf86drm.h> +#include <stdlib.h> +#include <stdio.h> +#include "pipe/p_thread.h" +#include "errno.h" +#include "ws_dri_bufmgr.h" +#include "string.h" +#include "pipe/p_debug.h" +#include "ws_dri_bufpool.h" +#include "ws_dri_fencemgr.h" + + +/* + * This lock is here to protect drmBO structs changing underneath us during a + * validate list call, since validatelist cannot take individiual locks for + * each drmBO. Validatelist takes this lock in write mode. Any access to an + * individual drmBO should take this lock in read mode, since in that case, the + * driBufferObject mutex will protect the access. Locking order is + * driBufferObject mutex - > this rw lock. + */ + +pipe_static_mutex(bmMutex); +pipe_static_condvar(bmCond); + +static int kernelReaders = 0; +static int num_buffers = 0; +static int num_user_buffers = 0; + +static drmBO *drmBOListBuf(void *iterator) +{ + drmBONode *node; + drmMMListHead *l = (drmMMListHead *) iterator; + node = DRMLISTENTRY(drmBONode, l, head); + return node->buf; +} + +static void *drmBOListIterator(drmBOList *list) +{ + void *ret = list->list.next; + + if (ret == &list->list) + return NULL; + return ret; +} + +static void *drmBOListNext(drmBOList *list, void *iterator) +{ + void *ret; + + drmMMListHead *l = (drmMMListHead *) iterator; + ret = l->next; + if (ret == &list->list) + return NULL; + return ret; +} + +static drmBONode *drmAddListItem(drmBOList *list, drmBO *item, + uint64_t arg0, + uint64_t arg1) +{ + drmBONode *node; + drmMMListHead *l; + + l = list->free.next; + if (l == &list->free) { + node = (drmBONode *) malloc(sizeof(*node)); + if (!node) { + return NULL; + } + list->numCurrent++; + } + else { + DRMLISTDEL(l); + node = DRMLISTENTRY(drmBONode, l, head); + } + node->buf = item; + node->arg0 = arg0; + node->arg1 = arg1; + DRMLISTADD(&node->head, &list->list); + list->numOnList++; + return node; +} + +static int drmAddValidateItem(drmBOList *list, drmBO *buf, uint64_t flags, + uint64_t mask, int *newItem) +{ + drmBONode *node, *cur; + drmMMListHead *l; + + *newItem = 0; + cur = NULL; + + for (l = list->list.next; l != &list->list; l = l->next) { + node = DRMLISTENTRY(drmBONode, l, head); + if (node->buf == buf) { + cur = node; + break; + } + } + if (!cur) { + cur = drmAddListItem(list, buf, flags, mask); + if (!cur) { + return -ENOMEM; + } + *newItem = 1; + cur->arg0 = flags; + cur->arg1 = mask; + } + else { + uint64_t memFlags = cur->arg0 & flags & DRM_BO_MASK_MEM; + uint64_t accFlags = (cur->arg0 | flags) & ~DRM_BO_MASK_MEM; + + if (mask & cur->arg1 & ~DRM_BO_MASK_MEM & (cur->arg0 ^ flags)) { + return -EINVAL; + } + + cur->arg1 |= mask; + cur->arg0 = (cur->arg0 & ~mask) | ((memFlags | accFlags) & mask); + + if (((cur->arg1 & DRM_BO_MASK_MEM) != 0) && + (cur->arg0 & DRM_BO_MASK_MEM) == 0) { + return -EINVAL; + } + } + return 0; +} + +static void drmBOFreeList(drmBOList *list) +{ + drmBONode *node; + drmMMListHead *l; + + l = list->list.next; + while(l != &list->list) { + DRMLISTDEL(l); + node = DRMLISTENTRY(drmBONode, l, head); + free(node); + l = list->list.next; + list->numCurrent--; + list->numOnList--; + } + + l = list->free.next; + while(l != &list->free) { + DRMLISTDEL(l); + node = DRMLISTENTRY(drmBONode, l, head); + free(node); + l = list->free.next; + list->numCurrent--; + } +} + +static int drmAdjustListNodes(drmBOList *list) +{ + drmBONode *node; + drmMMListHead *l; + int ret = 0; + + while(list->numCurrent < list->numTarget) { + node = (drmBONode *) malloc(sizeof(*node)); + if (!node) { + ret = -ENOMEM; + break; + } + list->numCurrent++; + DRMLISTADD(&node->head, &list->free); + } + + while(list->numCurrent > list->numTarget) { + l = list->free.next; + if (l == &list->free) + break; + DRMLISTDEL(l); + node = DRMLISTENTRY(drmBONode, l, head); + free(node); + list->numCurrent--; + } + return ret; +} + +static int drmBOCreateList(int numTarget, drmBOList *list) +{ + DRMINITLISTHEAD(&list->list); + DRMINITLISTHEAD(&list->free); + list->numTarget = numTarget; + list->numCurrent = 0; + list->numOnList = 0; + return drmAdjustListNodes(list); +} + +static int drmBOResetList(drmBOList *list) +{ + drmMMListHead *l; + int ret; + + ret = drmAdjustListNodes(list); + if (ret) + return ret; + + l = list->list.next; + while (l != &list->list) { + DRMLISTDEL(l); + DRMLISTADD(l, &list->free); + list->numOnList--; + l = list->list.next; + } + return drmAdjustListNodes(list); +} + +void driWriteLockKernelBO(void) +{ + pipe_mutex_lock(bmMutex); + while(kernelReaders != 0) + pipe_condvar_wait(bmCond, bmMutex); +} + +void driWriteUnlockKernelBO(void) +{ + pipe_mutex_unlock(bmMutex); +} + +void driReadLockKernelBO(void) +{ + pipe_mutex_lock(bmMutex); + kernelReaders++; + pipe_mutex_unlock(bmMutex); +} + +void driReadUnlockKernelBO(void) +{ + pipe_mutex_lock(bmMutex); + if (--kernelReaders == 0) + pipe_condvar_broadcast(bmCond); + pipe_mutex_unlock(bmMutex); +} + + + + +/* + * TODO: Introduce fence pools in the same way as + * buffer object pools. + */ + +typedef struct _DriBufferObject +{ + DriBufferPool *pool; + pipe_mutex mutex; + int refCount; + const char *name; + uint64_t flags; + unsigned hint; + unsigned alignment; + unsigned createdByReference; + void *private; + /* user-space buffer: */ + unsigned userBuffer; + void *userData; + unsigned userSize; +} DriBufferObject; + +typedef struct _DriBufferList { + drmBOList drmBuffers; /* List of kernel buffers needing validation */ + drmBOList driBuffers; /* List of user-space buffers needing validation */ +} DriBufferList; + + +void +bmError(int val, const char *file, const char *function, int line) +{ + printf("Fatal video memory manager error \"%s\".\n" + "Check kernel logs or set the LIBGL_DEBUG\n" + "environment variable to \"verbose\" for more info.\n" + "Detected in file %s, line %d, function %s.\n", + strerror(-val), file, line, function); +#ifndef NDEBUG + abort(); +#else + abort(); +#endif +} + +extern drmBO * +driBOKernel(struct _DriBufferObject *buf) +{ + drmBO *ret; + + driReadLockKernelBO(); + pipe_mutex_lock(buf->mutex); + assert(buf->private != NULL); + ret = buf->pool->kernel(buf->pool, buf->private); + if (!ret) + BM_CKFATAL(-EINVAL); + pipe_mutex_unlock(buf->mutex); + driReadUnlockKernelBO(); + + return ret; +} + +void +driBOWaitIdle(struct _DriBufferObject *buf, int lazy) +{ + + /* + * This function may block. Is it sane to keep the mutex held during + * that time?? + */ + + pipe_mutex_lock(buf->mutex); + BM_CKFATAL(buf->pool->waitIdle(buf->pool, buf->private, &buf->mutex, lazy)); + pipe_mutex_unlock(buf->mutex); +} + +void * +driBOMap(struct _DriBufferObject *buf, unsigned flags, unsigned hint) +{ + void *virtual; + int retval; + + if (buf->userBuffer) { + return buf->userData; + } + + pipe_mutex_lock(buf->mutex); + assert(buf->private != NULL); + retval = buf->pool->map(buf->pool, buf->private, flags, hint, + &buf->mutex, &virtual); + pipe_mutex_unlock(buf->mutex); + + return retval == 0 ? virtual : NULL; +} + +void +driBOUnmap(struct _DriBufferObject *buf) +{ + if (buf->userBuffer) + return; + + assert(buf->private != NULL); + pipe_mutex_lock(buf->mutex); + BM_CKFATAL(buf->pool->unmap(buf->pool, buf->private)); + pipe_mutex_unlock(buf->mutex); +} + +unsigned long +driBOOffset(struct _DriBufferObject *buf) +{ + unsigned long ret; + + assert(buf->private != NULL); + + pipe_mutex_lock(buf->mutex); + ret = buf->pool->offset(buf->pool, buf->private); + pipe_mutex_unlock(buf->mutex); + return ret; +} + +unsigned long +driBOPoolOffset(struct _DriBufferObject *buf) +{ + unsigned long ret; + + assert(buf->private != NULL); + + pipe_mutex_lock(buf->mutex); + ret = buf->pool->poolOffset(buf->pool, buf->private); + pipe_mutex_unlock(buf->mutex); + return ret; +} + +uint64_t +driBOFlags(struct _DriBufferObject *buf) +{ + uint64_t ret; + + assert(buf->private != NULL); + + driReadLockKernelBO(); + pipe_mutex_lock(buf->mutex); + ret = buf->pool->flags(buf->pool, buf->private); + pipe_mutex_unlock(buf->mutex); + driReadUnlockKernelBO(); + return ret; +} + +struct _DriBufferObject * +driBOReference(struct _DriBufferObject *buf) +{ + pipe_mutex_lock(buf->mutex); + if (++buf->refCount == 1) { + pipe_mutex_unlock(buf->mutex); + BM_CKFATAL(-EINVAL); + } + pipe_mutex_unlock(buf->mutex); + return buf; +} + +void +driBOUnReference(struct _DriBufferObject *buf) +{ + int tmp; + + if (!buf) + return; + + pipe_mutex_lock(buf->mutex); + tmp = --buf->refCount; + if (!tmp) { + pipe_mutex_unlock(buf->mutex); + if (buf->private) { + if (buf->createdByReference) + buf->pool->unreference(buf->pool, buf->private); + else + buf->pool->destroy(buf->pool, buf->private); + } + if (buf->userBuffer) + num_user_buffers--; + else + num_buffers--; + free(buf); + } else + pipe_mutex_unlock(buf->mutex); + +} + + +int +driBOData(struct _DriBufferObject *buf, + unsigned size, const void *data, + DriBufferPool *newPool, + uint64_t flags) +{ + void *virtual = NULL; + int newBuffer; + int retval = 0; + struct _DriBufferPool *pool; + + assert(!buf->userBuffer); /* XXX just do a memcpy? */ + + pipe_mutex_lock(buf->mutex); + pool = buf->pool; + + if (pool == NULL && newPool != NULL) { + buf->pool = newPool; + pool = newPool; + } + if (newPool == NULL) + newPool = pool; + + if (!pool->create) { + assert((size_t)"driBOData called on invalid buffer\n" & 0); + BM_CKFATAL(-EINVAL); + } + + newBuffer = (!buf->private || pool != newPool || + pool->size(pool, buf->private) < size); + + if (!flags) + flags = buf->flags; + + if (newBuffer) { + + if (buf->createdByReference) { + assert((size_t)"driBOData requiring resizing called on shared buffer.\n" & 0); + BM_CKFATAL(-EINVAL); + } + + if (buf->private) + buf->pool->destroy(buf->pool, buf->private); + + pool = newPool; + buf->pool = newPool; + buf->private = pool->create(pool, size, flags, DRM_BO_HINT_DONT_FENCE, + buf->alignment); + if (!buf->private) + retval = -ENOMEM; + + if (retval == 0) + retval = pool->map(pool, buf->private, + DRM_BO_FLAG_WRITE, + DRM_BO_HINT_DONT_BLOCK, &buf->mutex, &virtual); + } else if (pool->map(pool, buf->private, DRM_BO_FLAG_WRITE, + DRM_BO_HINT_DONT_BLOCK, &buf->mutex, &virtual)) { + /* + * Buffer is busy. need to create a new one. + */ + + void *newBuf; + + newBuf = pool->create(pool, size, flags, DRM_BO_HINT_DONT_FENCE, + buf->alignment); + if (newBuf) { + buf->pool->destroy(buf->pool, buf->private); + buf->private = newBuf; + } + + retval = pool->map(pool, buf->private, + DRM_BO_FLAG_WRITE, 0, &buf->mutex, &virtual); + } else { + uint64_t flag_diff = flags ^ buf->flags; + + /* + * We might need to change buffer flags. + */ + + if (flag_diff){ + assert(pool->setStatus != NULL); + BM_CKFATAL(pool->unmap(pool, buf->private)); + BM_CKFATAL(pool->setStatus(pool, buf->private, flag_diff, + buf->flags)); + if (!data) + goto out; + + retval = pool->map(pool, buf->private, + DRM_BO_FLAG_WRITE, 0, &buf->mutex, &virtual); + } + } + + if (retval == 0) { + if (data) + memcpy(virtual, data, size); + + BM_CKFATAL(pool->unmap(pool, buf->private)); + } + + out: + pipe_mutex_unlock(buf->mutex); + + return retval; +} + +void +driBOSubData(struct _DriBufferObject *buf, + unsigned long offset, unsigned long size, const void *data) +{ + void *virtual; + + assert(!buf->userBuffer); /* XXX just do a memcpy? */ + + pipe_mutex_lock(buf->mutex); + if (size && data) { + BM_CKFATAL(buf->pool->map(buf->pool, buf->private, + DRM_BO_FLAG_WRITE, 0, &buf->mutex, + &virtual)); + memcpy((unsigned char *) virtual + offset, data, size); + BM_CKFATAL(buf->pool->unmap(buf->pool, buf->private)); + } + pipe_mutex_unlock(buf->mutex); +} + +void +driBOGetSubData(struct _DriBufferObject *buf, + unsigned long offset, unsigned long size, void *data) +{ + void *virtual; + + assert(!buf->userBuffer); /* XXX just do a memcpy? */ + + pipe_mutex_lock(buf->mutex); + if (size && data) { + BM_CKFATAL(buf->pool->map(buf->pool, buf->private, + DRM_BO_FLAG_READ, 0, &buf->mutex, &virtual)); + memcpy(data, (unsigned char *) virtual + offset, size); + BM_CKFATAL(buf->pool->unmap(buf->pool, buf->private)); + } + pipe_mutex_unlock(buf->mutex); +} + +void +driBOSetReferenced(struct _DriBufferObject *buf, + unsigned long handle) +{ + pipe_mutex_lock(buf->mutex); + if (buf->private != NULL) { + assert((size_t)"Invalid buffer for setReferenced\n" & 0); + BM_CKFATAL(-EINVAL); + + } + if (buf->pool->reference == NULL) { + assert((size_t)"Invalid buffer pool for setReferenced\n" & 0); + BM_CKFATAL(-EINVAL); + } + buf->private = buf->pool->reference(buf->pool, handle); + if (!buf->private) { + assert((size_t)"Invalid buffer pool for setStatic\n" & 0); + BM_CKFATAL(-ENOMEM); + } + buf->createdByReference = TRUE; + buf->flags = buf->pool->kernel(buf->pool, buf->private)->flags; + pipe_mutex_unlock(buf->mutex); +} + +int +driGenBuffers(struct _DriBufferPool *pool, + const char *name, + unsigned n, + struct _DriBufferObject *buffers[], + unsigned alignment, uint64_t flags, unsigned hint) +{ + struct _DriBufferObject *buf; + int i; + + flags = (flags) ? flags : DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_MEM_VRAM | + DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE; + + ++num_buffers; + + assert(pool); + + for (i = 0; i < n; ++i) { + buf = (struct _DriBufferObject *) calloc(1, sizeof(*buf)); + if (!buf) + return -ENOMEM; + + pipe_mutex_init(buf->mutex); + pipe_mutex_lock(buf->mutex); + buf->refCount = 1; + buf->flags = flags; + buf->hint = hint; + buf->name = name; + buf->alignment = alignment; + buf->pool = pool; + buf->createdByReference = 0; + pipe_mutex_unlock(buf->mutex); + buffers[i] = buf; + } + return 0; +} + +void +driGenUserBuffer(struct _DriBufferPool *pool, + const char *name, + struct _DriBufferObject **buffers, + void *ptr, unsigned bytes) +{ + const unsigned alignment = 1, flags = 0, hint = 0; + + --num_buffers; /* JB: is inced in GenBuffes */ + driGenBuffers(pool, name, 1, buffers, alignment, flags, hint); + ++num_user_buffers; + + (*buffers)->userBuffer = 1; + (*buffers)->userData = ptr; + (*buffers)->userSize = bytes; +} + +void +driDeleteBuffers(unsigned n, struct _DriBufferObject *buffers[]) +{ + int i; + + for (i = 0; i < n; ++i) { + driBOUnReference(buffers[i]); + } +} + + +void +driInitBufMgr(int fd) +{ + ; +} + +/* + * Note that lists are per-context and don't need mutex protection. + */ + +struct _DriBufferList * +driBOCreateList(int target) +{ + struct _DriBufferList *list = calloc(sizeof(*list), 1); + + BM_CKFATAL(drmBOCreateList(target, &list->drmBuffers)); + BM_CKFATAL(drmBOCreateList(target, &list->driBuffers)); + return list; +} + +int +driBOResetList(struct _DriBufferList * list) +{ + int ret; + ret = drmBOResetList(&list->drmBuffers); + if (ret) + return ret; + ret = drmBOResetList(&list->driBuffers); + return ret; +} + +void +driBOFreeList(struct _DriBufferList * list) +{ + drmBOFreeList(&list->drmBuffers); + drmBOFreeList(&list->driBuffers); + free(list); +} + + +/* + * Copied from libdrm, because it is needed by driAddValidateItem. + */ + +static drmBONode * +driAddListItem(drmBOList * list, drmBO * item, + uint64_t arg0, uint64_t arg1) +{ + drmBONode *node; + drmMMListHead *l; + + l = list->free.next; + if (l == &list->free) { + node = (drmBONode *) malloc(sizeof(*node)); + if (!node) { + return NULL; + } + list->numCurrent++; + } else { + DRMLISTDEL(l); + node = DRMLISTENTRY(drmBONode, l, head); + } + memset(&node->bo_arg, 0, sizeof(node->bo_arg)); + node->buf = item; + node->arg0 = arg0; + node->arg1 = arg1; + DRMLISTADDTAIL(&node->head, &list->list); + list->numOnList++; + return node; +} + +/* + * Slightly modified version compared to the libdrm version. + * This one returns the list index of the buffer put on the list. + */ + +static int +driAddValidateItem(drmBOList * list, drmBO * buf, uint64_t flags, + uint64_t mask, int *itemLoc, + struct _drmBONode **pnode) +{ + drmBONode *node, *cur; + drmMMListHead *l; + int count = 0; + + cur = NULL; + + for (l = list->list.next; l != &list->list; l = l->next) { + node = DRMLISTENTRY(drmBONode, l, head); + if (node->buf == buf) { + cur = node; + break; + } + count++; + } + if (!cur) { + cur = driAddListItem(list, buf, flags, mask); + if (!cur) + return -ENOMEM; + + cur->arg0 = flags; + cur->arg1 = mask; + } else { + uint64_t memFlags = cur->arg0 & flags & DRM_BO_MASK_MEM; + uint64_t accFlags = (cur->arg0 | flags) & ~DRM_BO_MASK_MEM; + + if (mask & cur->arg1 & ~DRM_BO_MASK_MEM & (cur->arg0 ^ flags)) { + return -EINVAL; + } + + cur->arg1 |= mask; + cur->arg0 = (cur->arg0 & ~mask) | ((memFlags | accFlags) & mask); + + if (((cur->arg1 & DRM_BO_MASK_MEM) != 0) && + (cur->arg0 & DRM_BO_MASK_MEM) == 0) { + return -EINVAL; + } + } + *itemLoc = count; + *pnode = cur; + return 0; +} + + +void +driBOAddListItem(struct _DriBufferList * list, struct _DriBufferObject *buf, + uint64_t flags, uint64_t mask, int *itemLoc, + struct _drmBONode **node) +{ + int newItem; + + pipe_mutex_lock(buf->mutex); + BM_CKFATAL(driAddValidateItem(&list->drmBuffers, + buf->pool->kernel(buf->pool, buf->private), + flags, mask, itemLoc, node)); + BM_CKFATAL(drmAddValidateItem(&list->driBuffers, (drmBO *) buf, + flags, mask, &newItem)); + if (newItem) + buf->refCount++; + + pipe_mutex_unlock(buf->mutex); +} + +drmBOList *driGetdrmBOList(struct _DriBufferList *list) +{ + driWriteLockKernelBO(); + return &list->drmBuffers; +} + +void driPutdrmBOList(struct _DriBufferList *list) +{ + driWriteUnlockKernelBO(); +} + + +void +driBOFence(struct _DriBufferObject *buf, struct _DriFenceObject *fence) +{ + pipe_mutex_lock(buf->mutex); + if (buf->pool->fence) + BM_CKFATAL(buf->pool->fence(buf->pool, buf->private, fence)); + pipe_mutex_unlock(buf->mutex); + +} + +void +driBOUnrefUserList(struct _DriBufferList *list) +{ + struct _DriBufferObject *buf; + void *curBuf; + + curBuf = drmBOListIterator(&list->driBuffers); + while (curBuf) { + buf = (struct _DriBufferObject *)drmBOListBuf(curBuf); + driBOUnReference(buf); + curBuf = drmBOListNext(&list->driBuffers, curBuf); + } +} + +struct _DriFenceObject * +driBOFenceUserList(struct _DriFenceMgr *mgr, + struct _DriBufferList *list, const char *name, + drmFence *kFence) +{ + struct _DriFenceObject *fence; + struct _DriBufferObject *buf; + void *curBuf; + + fence = driFenceCreate(mgr, kFence->fence_class, kFence->type, + kFence, sizeof(*kFence)); + curBuf = drmBOListIterator(&list->driBuffers); + + /* + * User-space fencing callbacks. + */ + + while (curBuf) { + buf = (struct _DriBufferObject *) drmBOListBuf(curBuf); + driBOFence(buf, fence); + driBOUnReference(buf); + curBuf = drmBOListNext(&list->driBuffers, curBuf); + } + + driBOResetList(list); + return fence; +} + +void +driBOValidateUserList(struct _DriBufferList * list) +{ + void *curBuf; + struct _DriBufferObject *buf; + + curBuf = drmBOListIterator(&list->driBuffers); + + /* + * User-space validation callbacks. + */ + + while (curBuf) { + buf = (struct _DriBufferObject *) drmBOListBuf(curBuf); + pipe_mutex_lock(buf->mutex); + if (buf->pool->validate) + BM_CKFATAL(buf->pool->validate(buf->pool, buf->private, &buf->mutex)); + pipe_mutex_unlock(buf->mutex); + curBuf = drmBOListNext(&list->driBuffers, curBuf); + } +} + + +void +driPoolTakeDown(struct _DriBufferPool *pool) +{ + pool->takeDown(pool); + +} + +unsigned long +driBOSize(struct _DriBufferObject *buf) +{ + unsigned long size; + + pipe_mutex_lock(buf->mutex); + size = buf->pool->size(buf->pool, buf->private); + pipe_mutex_unlock(buf->mutex); + + return size; + +} + +drmBOList *driBOGetDRMBuffers(struct _DriBufferList *list) +{ + return &list->drmBuffers; +} + +drmBOList *driBOGetDRIBuffers(struct _DriBufferList *list) +{ + return &list->driBuffers; +} + diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.h b/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.h new file mode 100644 index 0000000000..e6c0cff0a0 --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.h @@ -0,0 +1,138 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> + * Keith Whitwell <keithw-at-tungstengraphics-dot-com> + */ + +#ifndef _PSB_BUFMGR_H_ +#define _PSB_BUFMGR_H_ +#include <xf86mm.h> +#include "i915_drm.h" +#include "ws_dri_fencemgr.h" + +typedef struct _drmBONode +{ + drmMMListHead head; + drmBO *buf; + struct drm_i915_op_arg bo_arg; + uint64_t arg0; + uint64_t arg1; +} drmBONode; + +typedef struct _drmBOList { + unsigned numTarget; + unsigned numCurrent; + unsigned numOnList; + drmMMListHead list; + drmMMListHead free; +} drmBOList; + + +struct _DriFenceObject; +struct _DriBufferObject; +struct _DriBufferPool; +struct _DriBufferList; + +/* + * Return a pointer to the libdrm buffer object this DriBufferObject + * uses. + */ + +extern drmBO *driBOKernel(struct _DriBufferObject *buf); +extern void *driBOMap(struct _DriBufferObject *buf, unsigned flags, + unsigned hint); +extern void driBOUnmap(struct _DriBufferObject *buf); +extern unsigned long driBOOffset(struct _DriBufferObject *buf); +extern unsigned long driBOPoolOffset(struct _DriBufferObject *buf); + +extern uint64_t driBOFlags(struct _DriBufferObject *buf); +extern struct _DriBufferObject *driBOReference(struct _DriBufferObject *buf); +extern void driBOUnReference(struct _DriBufferObject *buf); + +extern int driBOData(struct _DriBufferObject *r_buf, + unsigned size, const void *data, + struct _DriBufferPool *pool, uint64_t flags); + +extern void driBOSubData(struct _DriBufferObject *buf, + unsigned long offset, unsigned long size, + const void *data); +extern void driBOGetSubData(struct _DriBufferObject *buf, + unsigned long offset, unsigned long size, + void *data); +extern int driGenBuffers(struct _DriBufferPool *pool, + const char *name, + unsigned n, + struct _DriBufferObject *buffers[], + unsigned alignment, uint64_t flags, unsigned hint); +extern void driGenUserBuffer(struct _DriBufferPool *pool, + const char *name, + struct _DriBufferObject *buffers[], + void *ptr, unsigned bytes); +extern void driDeleteBuffers(unsigned n, struct _DriBufferObject *buffers[]); +extern void driInitBufMgr(int fd); +extern struct _DriBufferList *driBOCreateList(int target); +extern int driBOResetList(struct _DriBufferList * list); +extern void driBOAddListItem(struct _DriBufferList * list, + struct _DriBufferObject *buf, + uint64_t flags, uint64_t mask, int *itemLoc, + struct _drmBONode **node); + +extern void driBOValidateList(int fd, struct _DriBufferList * list); +extern void driBOFreeList(struct _DriBufferList * list); +extern struct _DriFenceObject *driBOFenceUserList(struct _DriFenceMgr *mgr, + struct _DriBufferList *list, + const char *name, + drmFence *kFence); +extern void driBOUnrefUserList(struct _DriBufferList *list); +extern void driBOValidateUserList(struct _DriBufferList * list); +extern drmBOList *driGetdrmBOList(struct _DriBufferList *list); +extern void driPutdrmBOList(struct _DriBufferList *list); + +extern void driBOFence(struct _DriBufferObject *buf, + struct _DriFenceObject *fence); + +extern void driPoolTakeDown(struct _DriBufferPool *pool); +extern void driBOSetReferenced(struct _DriBufferObject *buf, + unsigned long handle); +unsigned long driBOSize(struct _DriBufferObject *buf); +extern void driBOWaitIdle(struct _DriBufferObject *buf, int lazy); +extern void driPoolTakeDown(struct _DriBufferPool *pool); + +extern void driReadLockKernelBO(void); +extern void driReadUnlockKernelBO(void); +extern void driWriteLockKernelBO(void); +extern void driWriteUnlockKernelBO(void); + +/* + * For debugging purposes. + */ + +extern drmBOList *driBOGetDRMBuffers(struct _DriBufferList *list); +extern drmBOList *driBOGetDRIBuffers(struct _DriBufferList *list); +#endif diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_bufpool.h b/src/gallium/winsys/drm/intel/common/ws_dri_bufpool.h new file mode 100644 index 0000000000..ad3b6f3931 --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/ws_dri_bufpool.h @@ -0,0 +1,102 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> + */ + +#ifndef _PSB_BUFPOOL_H_ +#define _PSB_BUFPOOL_H_ + +#include <xf86drm.h> +#include "pipe/p_thread.h" +struct _DriFenceObject; + +typedef struct _DriBufferPool +{ + int fd; + int (*map) (struct _DriBufferPool * pool, void *private, + unsigned flags, int hint, pipe_mutex *mutex, + void **virtual); + int (*unmap) (struct _DriBufferPool * pool, void *private); + int (*destroy) (struct _DriBufferPool * pool, void *private); + unsigned long (*offset) (struct _DriBufferPool * pool, void *private); + unsigned long (*poolOffset) (struct _DriBufferPool * pool, void *private); + uint64_t (*flags) (struct _DriBufferPool * pool, void *private); + unsigned long (*size) (struct _DriBufferPool * pool, void *private); + void *(*create) (struct _DriBufferPool * pool, unsigned long size, + uint64_t flags, unsigned hint, unsigned alignment); + void *(*reference) (struct _DriBufferPool * pool, unsigned handle); + int (*unreference) (struct _DriBufferPool * pool, void *private); + int (*fence) (struct _DriBufferPool * pool, void *private, + struct _DriFenceObject * fence); + drmBO *(*kernel) (struct _DriBufferPool * pool, void *private); + int (*validate) (struct _DriBufferPool * pool, void *private, pipe_mutex *mutex); + int (*waitIdle) (struct _DriBufferPool *pool, void *private, pipe_mutex *mutex, + int lazy); + int (*setStatus) (struct _DriBufferPool *pool, void *private, + uint64_t flag_diff, uint64_t old_flags); + void (*takeDown) (struct _DriBufferPool * pool); + void *data; +} DriBufferPool; + +extern void bmError(int val, const char *file, const char *function, + int line); +#define BM_CKFATAL(val) \ + do{ \ + int tstVal = (val); \ + if (tstVal) \ + bmError(tstVal, __FILE__, __FUNCTION__, __LINE__); \ + } while(0); + + +/* + * Builtin pools. + */ + +/* + * Kernel buffer objects. Size in multiples of page size. Page size aligned. + */ + +extern struct _DriBufferPool *driDRMPoolInit(int fd); +extern struct _DriBufferPool *driMallocPoolInit(void); + +struct _DriFreeSlabManager; +extern struct _DriBufferPool * driSlabPoolInit(int fd, uint64_t flags, + uint64_t validMask, + uint32_t smallestSize, + uint32_t numSizes, + uint32_t desiredNumBuffers, + uint32_t maxSlabSize, + uint32_t pageAlignment, + struct _DriFreeSlabManager *fMan); +extern void driFinishFreeSlabManager(struct _DriFreeSlabManager *fMan); +extern struct _DriFreeSlabManager * +driInitFreeSlabManager(uint32_t checkIntervalMsec, uint32_t slabTimeoutMsec); + + +#endif diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_drmpool.c b/src/gallium/winsys/drm/intel/common/ws_dri_drmpool.c new file mode 100644 index 0000000000..54618b1c82 --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/ws_dri_drmpool.c @@ -0,0 +1,268 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> + */ + +#include <xf86drm.h> +#include <stdlib.h> +#include <unistd.h> +#include "ws_dri_bufpool.h" +#include "ws_dri_bufmgr.h" +#include "assert.h" + +/* + * Buffer pool implementation using DRM buffer objects as DRI buffer objects. + */ + +static void * +pool_create(struct _DriBufferPool *pool, + unsigned long size, uint64_t flags, unsigned hint, + unsigned alignment) +{ + drmBO *buf = (drmBO *) malloc(sizeof(*buf)); + int ret; + unsigned pageSize = getpagesize(); + + if (!buf) + return NULL; + + if ((alignment > pageSize) && (alignment % pageSize)) { + free(buf); + return NULL; + } + + ret = drmBOCreate(pool->fd, size, alignment / pageSize, + NULL, + flags, hint, buf); + if (ret) { + free(buf); + return NULL; + } + + return (void *) buf; +} + +static void * +pool_reference(struct _DriBufferPool *pool, unsigned handle) +{ + drmBO *buf = (drmBO *) malloc(sizeof(*buf)); + int ret; + + if (!buf) + return NULL; + + ret = drmBOReference(pool->fd, handle, buf); + + if (ret) { + free(buf); + return NULL; + } + + return (void *) buf; +} + +static int +pool_destroy(struct _DriBufferPool *pool, void *private) +{ + int ret; + drmBO *buf = (drmBO *) private; + driReadLockKernelBO(); + ret = drmBOUnreference(pool->fd, buf); + free(buf); + driReadUnlockKernelBO(); + return ret; +} + +static int +pool_unreference(struct _DriBufferPool *pool, void *private) +{ + int ret; + drmBO *buf = (drmBO *) private; + driReadLockKernelBO(); + ret = drmBOUnreference(pool->fd, buf); + free(buf); + driReadUnlockKernelBO(); + return ret; +} + +static int +pool_map(struct _DriBufferPool *pool, void *private, unsigned flags, + int hint, pipe_mutex *mutex, void **virtual) +{ + drmBO *buf = (drmBO *) private; + int ret; + + driReadLockKernelBO(); + ret = drmBOMap(pool->fd, buf, flags, hint, virtual); + driReadUnlockKernelBO(); + return ret; +} + +static int +pool_unmap(struct _DriBufferPool *pool, void *private) +{ + drmBO *buf = (drmBO *) private; + int ret; + + driReadLockKernelBO(); + ret = drmBOUnmap(pool->fd, buf); + driReadUnlockKernelBO(); + + return ret; +} + +static unsigned long +pool_offset(struct _DriBufferPool *pool, void *private) +{ + drmBO *buf = (drmBO *) private; + unsigned long offset; + + driReadLockKernelBO(); + assert(buf->flags & DRM_BO_FLAG_NO_MOVE); + offset = buf->offset; + driReadUnlockKernelBO(); + + return buf->offset; +} + +static unsigned long +pool_poolOffset(struct _DriBufferPool *pool, void *private) +{ + return 0; +} + +static uint64_t +pool_flags(struct _DriBufferPool *pool, void *private) +{ + drmBO *buf = (drmBO *) private; + uint64_t flags; + + driReadLockKernelBO(); + flags = buf->flags; + driReadUnlockKernelBO(); + + return flags; +} + + +static unsigned long +pool_size(struct _DriBufferPool *pool, void *private) +{ + drmBO *buf = (drmBO *) private; + unsigned long size; + + driReadLockKernelBO(); + size = buf->size; + driReadUnlockKernelBO(); + + return buf->size; +} + +static int +pool_fence(struct _DriBufferPool *pool, void *private, + struct _DriFenceObject *fence) +{ + /* + * Noop. The kernel handles all fencing. + */ + + return 0; +} + +static drmBO * +pool_kernel(struct _DriBufferPool *pool, void *private) +{ + return (drmBO *) private; +} + +static int +pool_waitIdle(struct _DriBufferPool *pool, void *private, pipe_mutex *mutex, + int lazy) +{ + drmBO *buf = (drmBO *) private; + int ret; + + driReadLockKernelBO(); + ret = drmBOWaitIdle(pool->fd, buf, (lazy) ? DRM_BO_HINT_WAIT_LAZY:0); + driReadUnlockKernelBO(); + + return ret; +} + + +static void +pool_takedown(struct _DriBufferPool *pool) +{ + free(pool); +} + +/*static int +pool_setStatus(struct _DriBufferPool *pool, void *private, + uint64_t flag_diff, uint64_t old_flags) +{ + drmBO *buf = (drmBO *) private; + uint64_t new_flags = old_flags ^ flag_diff; + int ret; + + driReadLockKernelBO(); + ret = drmBOSetStatus(pool->fd, buf, new_flags, flag_diff, + 0, 0, 0); + driReadUnlockKernelBO(); + return ret; +}*/ + +struct _DriBufferPool * +driDRMPoolInit(int fd) +{ + struct _DriBufferPool *pool; + + pool = (struct _DriBufferPool *) malloc(sizeof(*pool)); + + if (!pool) + return NULL; + + pool->fd = fd; + pool->map = &pool_map; + pool->unmap = &pool_unmap; + pool->destroy = &pool_destroy; + pool->offset = &pool_offset; + pool->poolOffset = &pool_poolOffset; + pool->flags = &pool_flags; + pool->size = &pool_size; + pool->create = &pool_create; + pool->fence = &pool_fence; + pool->kernel = &pool_kernel; + pool->validate = NULL; + pool->waitIdle = &pool_waitIdle; + pool->takeDown = &pool_takedown; + pool->reference = &pool_reference; + pool->unreference = &pool_unreference; + pool->data = NULL; + return pool; +} diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.c b/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.c new file mode 100644 index 0000000000..831c75d30c --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.c @@ -0,0 +1,377 @@ +#include "ws_dri_fencemgr.h" +#include "pipe/p_thread.h" +#include <xf86mm.h> +#include <string.h> +#include <unistd.h> + +/* + * Note: Locking order is + * _DriFenceObject::mutex + * _DriFenceMgr::mutex + */ + +struct _DriFenceMgr { + /* + * Constant members. Need no mutex protection. + */ + struct _DriFenceMgrCreateInfo info; + void *private; + + /* + * These members are protected by this->mutex + */ + pipe_mutex mutex; + int refCount; + drmMMListHead *heads; + int num_fences; +}; + +struct _DriFenceObject { + + /* + * These members are constant and need no mutex protection. + */ + struct _DriFenceMgr *mgr; + uint32_t fence_class; + uint32_t fence_type; + + /* + * These members are protected by mgr->mutex. + */ + drmMMListHead head; + int refCount; + + /* + * These members are protected by this->mutex. + */ + pipe_mutex mutex; + uint32_t signaled_type; + void *private; +}; + +uint32_t +driFenceType(struct _DriFenceObject *fence) +{ + return fence->fence_type; +} + +struct _DriFenceMgr * +driFenceMgrCreate(const struct _DriFenceMgrCreateInfo *info) +{ + struct _DriFenceMgr *tmp; + uint32_t i; + + tmp = calloc(1, sizeof(*tmp)); + if (!tmp) + return NULL; + + pipe_mutex_init(tmp->mutex); + pipe_mutex_lock(tmp->mutex); + tmp->refCount = 1; + tmp->info = *info; + tmp->num_fences = 0; + tmp->heads = calloc(tmp->info.num_classes, sizeof(*tmp->heads)); + if (!tmp->heads) + goto out_err; + + for (i=0; i<tmp->info.num_classes; ++i) { + DRMINITLISTHEAD(&tmp->heads[i]); + } + pipe_mutex_unlock(tmp->mutex); + return tmp; + + out_err: + if (tmp) + free(tmp); + return NULL; +} + +static void +driFenceMgrUnrefUnlock(struct _DriFenceMgr **pMgr) +{ + struct _DriFenceMgr *mgr = *pMgr; + + *pMgr = NULL; + if (--mgr->refCount == 0) + free(mgr); + else + pipe_mutex_unlock(mgr->mutex); +} + +void +driFenceMgrUnReference(struct _DriFenceMgr **pMgr) +{ + pipe_mutex_lock((*pMgr)->mutex); + driFenceMgrUnrefUnlock(pMgr); +} + +static void +driFenceUnReferenceLocked(struct _DriFenceObject **pFence) +{ + struct _DriFenceObject *fence = *pFence; + struct _DriFenceMgr *mgr = fence->mgr; + + *pFence = NULL; + if (--fence->refCount == 0) { + DRMLISTDELINIT(&fence->head); + if (fence->private) + mgr->info.unreference(mgr, &fence->private); + --mgr->num_fences; + fence->mgr = NULL; + --mgr->refCount; + free(fence); + + } +} + + +static void +driSignalPreviousFencesLocked(struct _DriFenceMgr *mgr, + drmMMListHead *list, + uint32_t fence_class, + uint32_t fence_type) +{ + struct _DriFenceObject *entry; + drmMMListHead *prev; + + while(list != &mgr->heads[fence_class]) { + entry = DRMLISTENTRY(struct _DriFenceObject, list, head); + + /* + * Up refcount so that entry doesn't disappear from under us + * when we unlock-relock mgr to get the correct locking order. + */ + + ++entry->refCount; + pipe_mutex_unlock(mgr->mutex); + pipe_mutex_lock(entry->mutex); + pipe_mutex_lock(mgr->mutex); + + prev = list->prev; + + + + if (list->prev == list) { + + /* + * Somebody else removed the entry from the list. + */ + + pipe_mutex_unlock(entry->mutex); + driFenceUnReferenceLocked(&entry); + return; + } + + entry->signaled_type |= (fence_type & entry->fence_type); + if (entry->signaled_type == entry->fence_type) { + DRMLISTDELINIT(list); + mgr->info.unreference(mgr, &entry->private); + } + pipe_mutex_unlock(entry->mutex); + driFenceUnReferenceLocked(&entry); + list = prev; + } +} + + +int +driFenceFinish(struct _DriFenceObject *fence, uint32_t fence_type, + int lazy_hint) +{ + struct _DriFenceMgr *mgr = fence->mgr; + int ret = 0; + + pipe_mutex_lock(fence->mutex); + + if ((fence->signaled_type & fence_type) == fence_type) + goto out0; + + ret = mgr->info.finish(mgr, fence->private, fence_type, lazy_hint); + if (ret) + goto out0; + + pipe_mutex_lock(mgr->mutex); + pipe_mutex_unlock(fence->mutex); + + driSignalPreviousFencesLocked(mgr, &fence->head, fence->fence_class, + fence_type); + pipe_mutex_unlock(mgr->mutex); + return 0; + + out0: + pipe_mutex_unlock(fence->mutex); + return ret; +} + +uint32_t driFenceSignaledTypeCached(struct _DriFenceObject *fence) +{ + uint32_t ret; + + pipe_mutex_lock(fence->mutex); + ret = fence->signaled_type; + pipe_mutex_unlock(fence->mutex); + + return ret; +} + +int +driFenceSignaledType(struct _DriFenceObject *fence, uint32_t flush_type, + uint32_t *signaled) +{ + int ret = 0; + struct _DriFenceMgr *mgr; + + pipe_mutex_lock(fence->mutex); + mgr = fence->mgr; + *signaled = fence->signaled_type; + if ((fence->signaled_type & flush_type) == flush_type) + goto out0; + + ret = mgr->info.signaled(mgr, fence->private, flush_type, signaled); + if (ret) { + *signaled = fence->signaled_type; + goto out0; + } + + if ((fence->signaled_type | *signaled) == fence->signaled_type) + goto out0; + + pipe_mutex_lock(mgr->mutex); + pipe_mutex_unlock(fence->mutex); + + driSignalPreviousFencesLocked(mgr, &fence->head, fence->fence_class, + *signaled); + + pipe_mutex_unlock(mgr->mutex); + return 0; + out0: + pipe_mutex_unlock(fence->mutex); + return ret; +} + +struct _DriFenceObject * +driFenceReference(struct _DriFenceObject *fence) +{ + pipe_mutex_lock(fence->mgr->mutex); + ++fence->refCount; + pipe_mutex_unlock(fence->mgr->mutex); + return fence; +} + +void +driFenceUnReference(struct _DriFenceObject **pFence) +{ + struct _DriFenceMgr *mgr; + + if (*pFence == NULL) + return; + + mgr = (*pFence)->mgr; + pipe_mutex_lock(mgr->mutex); + ++mgr->refCount; + driFenceUnReferenceLocked(pFence); + driFenceMgrUnrefUnlock(&mgr); +} + +struct _DriFenceObject +*driFenceCreate(struct _DriFenceMgr *mgr, uint32_t fence_class, + uint32_t fence_type, void *private, size_t private_size) +{ + struct _DriFenceObject *fence; + size_t fence_size = sizeof(*fence); + + if (private_size) + fence_size = ((fence_size + 15) & ~15); + + fence = calloc(1, fence_size + private_size); + + if (!fence) { + int ret = mgr->info.finish(mgr, private, fence_type, 0); + + if (ret) + usleep(10000000); + + return NULL; + } + + pipe_mutex_init(fence->mutex); + pipe_mutex_lock(fence->mutex); + pipe_mutex_lock(mgr->mutex); + fence->refCount = 1; + DRMLISTADDTAIL(&fence->head, &mgr->heads[fence_class]); + fence->mgr = mgr; + ++mgr->refCount; + ++mgr->num_fences; + pipe_mutex_unlock(mgr->mutex); + fence->fence_class = fence_class; + fence->fence_type = fence_type; + fence->signaled_type = 0; + fence->private = private; + if (private_size) { + fence->private = (void *)(((uint8_t *) fence) + fence_size); + memcpy(fence->private, private, private_size); + } + + pipe_mutex_unlock(fence->mutex); + return fence; +} + + +static int +tSignaled(struct _DriFenceMgr *mgr, void *private, uint32_t flush_type, + uint32_t *signaled_type) +{ + long fd = (long) mgr->private; + int dummy; + drmFence *fence = (drmFence *) private; + int ret; + + *signaled_type = 0; + ret = drmFenceSignaled((int) fd, fence, flush_type, &dummy); + if (ret) + return ret; + + *signaled_type = fence->signaled; + + return 0; +} + +static int +tFinish(struct _DriFenceMgr *mgr, void *private, uint32_t fence_type, + int lazy_hint) +{ + long fd = (long) mgr->private; + unsigned flags = lazy_hint ? DRM_FENCE_FLAG_WAIT_LAZY : 0; + + return drmFenceWait((int)fd, flags, (drmFence *) private, fence_type); +} + +static int +tUnref(struct _DriFenceMgr *mgr, void **private) +{ + long fd = (long) mgr->private; + drmFence *fence = (drmFence *) *private; + *private = NULL; + + return drmFenceUnreference(fd, fence); +} + +struct _DriFenceMgr *driFenceMgrTTMInit(int fd) +{ + struct _DriFenceMgrCreateInfo info; + struct _DriFenceMgr *mgr; + + info.flags = DRI_FENCE_CLASS_ORDERED; + info.num_classes = 4; + info.signaled = tSignaled; + info.finish = tFinish; + info.unreference = tUnref; + + mgr = driFenceMgrCreate(&info); + if (mgr == NULL) + return NULL; + + mgr->private = (void *) (long) fd; + return mgr; +} + diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.h b/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.h new file mode 100644 index 0000000000..4ea58dfe18 --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.h @@ -0,0 +1,115 @@ +#ifndef DRI_FENCEMGR_H +#define DRI_FENCEMGR_H + +#include <stdint.h> +#include <stdlib.h> + +struct _DriFenceObject; +struct _DriFenceMgr; + +/* + * Do a quick check to see if the fence manager has registered the fence + * object as signaled. Note that this function may return a false negative + * answer. + */ +extern uint32_t driFenceSignaledTypeCached(struct _DriFenceObject *fence); + +/* + * Check if the fence object is signaled. This function can be substantially + * more expensive to call than the above function, but will not return a false + * negative answer. The argument "flush_type" sets the types that the + * underlying mechanism must make sure will eventually signal. + */ +extern int driFenceSignaledType(struct _DriFenceObject *fence, + uint32_t flush_type, uint32_t *signaled); + +/* + * Convenience functions. + */ + +static inline int driFenceSignaled(struct _DriFenceObject *fence, + uint32_t flush_type) +{ + uint32_t signaled_types; + int ret = driFenceSignaledType(fence, flush_type, &signaled_types); + if (ret) + return 0; + return ((signaled_types & flush_type) == flush_type); +} + +static inline int driFenceSignaledCached(struct _DriFenceObject *fence, + uint32_t flush_type) +{ + uint32_t signaled_types = + driFenceSignaledTypeCached(fence); + + return ((signaled_types & flush_type) == flush_type); +} + +/* + * Reference a fence object. + */ +extern struct _DriFenceObject *driFenceReference(struct _DriFenceObject *fence); + +/* + * Unreference a fence object. The fence object pointer will be reset to NULL. + */ + +extern void driFenceUnReference(struct _DriFenceObject **pFence); + + +/* + * Wait for a fence to signal the indicated fence_type. + * If "lazy_hint" is true, it indicates that the wait may sleep to avoid + * busy-wait polling. + */ +extern int driFenceFinish(struct _DriFenceObject *fence, uint32_t fence_type, + int lazy_hint); + +/* + * Create a DriFenceObject for manager "mgr". + * + * "private" is a pointer that should be used for the callbacks in + * struct _DriFenceMgrCreateInfo. + * + * if private_size is nonzero, then the info stored at *private, with size + * private size will be copied and the fence manager will instead use a + * pointer to the copied data for the callbacks in + * struct _DriFenceMgrCreateInfo. In that case, the object pointed to by + * "private" may be destroyed after the call to driFenceCreate. + */ +extern struct _DriFenceObject *driFenceCreate(struct _DriFenceMgr *mgr, + uint32_t fence_class, + uint32_t fence_type, + void *private, + size_t private_size); + +extern uint32_t driFenceType(struct _DriFenceObject *fence); + +/* + * Fence creations are ordered. If a fence signals a fence_type, + * it is safe to assume that all fences of the same class that was + * created before that fence has signaled the same type. + */ + +#define DRI_FENCE_CLASS_ORDERED (1 << 0) + +struct _DriFenceMgrCreateInfo { + uint32_t flags; + uint32_t num_classes; + int (*signaled) (struct _DriFenceMgr *mgr, void *private, uint32_t flush_type, + uint32_t *signaled_type); + int (*finish) (struct _DriFenceMgr *mgr, void *private, uint32_t fence_type, int lazy_hint); + int (*unreference) (struct _DriFenceMgr *mgr, void **private); +}; + +extern struct _DriFenceMgr * +driFenceMgrCreate(const struct _DriFenceMgrCreateInfo *info); + +void +driFenceMgrUnReference(struct _DriFenceMgr **pMgr); + +extern struct _DriFenceMgr * +driFenceMgrTTMInit(int fd); + +#endif diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_mallocpool.c b/src/gallium/winsys/drm/intel/common/ws_dri_mallocpool.c new file mode 100644 index 0000000000..60924eac9e --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/ws_dri_mallocpool.c @@ -0,0 +1,161 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, TX., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> + */ + +#include <xf86drm.h> +#include <stdlib.h> +#include <errno.h> +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "ws_dri_bufpool.h" +#include "ws_dri_bufmgr.h" + +static void * +pool_create(struct _DriBufferPool *pool, + unsigned long size, uint64_t flags, unsigned hint, + unsigned alignment) +{ + unsigned long *private = malloc(size + 2*sizeof(unsigned long)); + if ((flags & DRM_BO_MASK_MEM) != DRM_BO_FLAG_MEM_LOCAL) + abort(); + + *private = size; + return (void *)private; +} + + +static int +pool_destroy(struct _DriBufferPool *pool, void *private) +{ + free(private); + return 0; +} + +static int +pool_waitIdle(struct _DriBufferPool *pool, void *private, + pipe_mutex *mutex, int lazy) +{ + return 0; +} + +static int +pool_map(struct _DriBufferPool *pool, void *private, unsigned flags, + int hint, pipe_mutex *mutex, void **virtual) +{ + *virtual = (void *)((unsigned long *)private + 2); + return 0; +} + +static int +pool_unmap(struct _DriBufferPool *pool, void *private) +{ + return 0; +} + +static unsigned long +pool_offset(struct _DriBufferPool *pool, void *private) +{ + /* + * BUG + */ + abort(); + return 0UL; +} + +static unsigned long +pool_poolOffset(struct _DriBufferPool *pool, void *private) +{ + /* + * BUG + */ + abort(); +} + +static uint64_t +pool_flags(struct _DriBufferPool *pool, void *private) +{ + return DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED; +} + +static unsigned long +pool_size(struct _DriBufferPool *pool, void *private) +{ + return *(unsigned long *) private; +} + + +static int +pool_fence(struct _DriBufferPool *pool, void *private, + struct _DriFenceObject *fence) +{ + abort(); + return 0UL; +} + +static drmBO * +pool_kernel(struct _DriBufferPool *pool, void *private) +{ + abort(); + return NULL; +} + +static void +pool_takedown(struct _DriBufferPool *pool) +{ + free(pool); +} + + +struct _DriBufferPool * +driMallocPoolInit(void) +{ + struct _DriBufferPool *pool; + + pool = (struct _DriBufferPool *) malloc(sizeof(*pool)); + if (!pool) + return NULL; + + pool->data = NULL; + pool->fd = -1; + pool->map = &pool_map; + pool->unmap = &pool_unmap; + pool->destroy = &pool_destroy; + pool->offset = &pool_offset; + pool->poolOffset = &pool_poolOffset; + pool->flags = &pool_flags; + pool->size = &pool_size; + pool->create = &pool_create; + pool->fence = &pool_fence; + pool->kernel = &pool_kernel; + pool->validate = NULL; + pool->waitIdle = &pool_waitIdle; + pool->takeDown = &pool_takedown; + return pool; +} diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_slabpool.c b/src/gallium/winsys/drm/intel/common/ws_dri_slabpool.c new file mode 100644 index 0000000000..391cea50a7 --- /dev/null +++ b/src/gallium/winsys/drm/intel/common/ws_dri_slabpool.c @@ -0,0 +1,968 @@ +/************************************************************************** + * + * Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> + */ + +#include <stdint.h> +#include <sys/time.h> +#include <errno.h> +#include <unistd.h> +#include <assert.h> +#include "ws_dri_bufpool.h" +#include "ws_dri_fencemgr.h" +#include "ws_dri_bufmgr.h" +#include "pipe/p_thread.h" + +#define DRI_SLABPOOL_ALLOC_RETRIES 100 + +struct _DriSlab; + +struct _DriSlabBuffer { + int isSlabBuffer; + drmBO *bo; + struct _DriFenceObject *fence; + struct _DriSlab *parent; + drmMMListHead head; + uint32_t mapCount; + uint32_t start; + uint32_t fenceType; + int unFenced; + pipe_condvar event; +}; + +struct _DriKernelBO { + int fd; + drmBO bo; + drmMMListHead timeoutHead; + drmMMListHead head; + struct timeval timeFreed; + uint32_t pageAlignment; + void *virtual; +}; + +struct _DriSlab{ + drmMMListHead head; + drmMMListHead freeBuffers; + uint32_t numBuffers; + uint32_t numFree; + struct _DriSlabBuffer *buffers; + struct _DriSlabSizeHeader *header; + struct _DriKernelBO *kbo; +}; + + +struct _DriSlabSizeHeader { + drmMMListHead slabs; + drmMMListHead freeSlabs; + drmMMListHead delayedBuffers; + uint32_t numDelayed; + struct _DriSlabPool *slabPool; + uint32_t bufSize; + pipe_mutex mutex; +}; + +struct _DriFreeSlabManager { + struct timeval slabTimeout; + struct timeval checkInterval; + struct timeval nextCheck; + drmMMListHead timeoutList; + drmMMListHead unCached; + drmMMListHead cached; + pipe_mutex mutex; +}; + + +struct _DriSlabPool { + + /* + * The data of this structure remains constant after + * initialization and thus needs no mutex protection. + */ + + struct _DriFreeSlabManager *fMan; + uint64_t proposedFlags; + uint64_t validMask; + uint32_t *bucketSizes; + uint32_t numBuckets; + uint32_t pageSize; + int fd; + int pageAlignment; + int maxSlabSize; + int desiredNumBuffers; + struct _DriSlabSizeHeader *headers; +}; + +/* + * FIXME: Perhaps arrange timeout slabs in size buckets for fast + * retreival?? + */ + + +static inline int +driTimeAfterEq(struct timeval *arg1, struct timeval *arg2) +{ + return ((arg1->tv_sec > arg2->tv_sec) || + ((arg1->tv_sec == arg2->tv_sec) && + (arg1->tv_usec > arg2->tv_usec))); +} + +static inline void +driTimeAdd(struct timeval *arg, struct timeval *add) +{ + unsigned int sec; + + arg->tv_sec += add->tv_sec; + arg->tv_usec += add->tv_usec; + sec = arg->tv_usec / 1000000; + arg->tv_sec += sec; + arg->tv_usec -= sec*1000000; +} + +static void +driFreeKernelBO(struct _DriKernelBO *kbo) +{ + if (!kbo) + return; + + (void) drmBOUnreference(kbo->fd, &kbo->bo); + free(kbo); +} + + +static void +driFreeTimeoutKBOsLocked(struct _DriFreeSlabManager *fMan, + struct timeval *time) +{ + drmMMListHead *list, *next; + struct _DriKernelBO *kbo; + + if (!driTimeAfterEq(time, &fMan->nextCheck)) + return; + + for (list = fMan->timeoutList.next, next = list->next; + list != &fMan->timeoutList; + list = next, next = list->next) { + + kbo = DRMLISTENTRY(struct _DriKernelBO, list, timeoutHead); + + if (!driTimeAfterEq(time, &kbo->timeFreed)) + break; + + DRMLISTDELINIT(&kbo->timeoutHead); + DRMLISTDELINIT(&kbo->head); + driFreeKernelBO(kbo); + } + + fMan->nextCheck = *time; + driTimeAdd(&fMan->nextCheck, &fMan->checkInterval); +} + + +/* + * Add a _DriKernelBO to the free slab manager. + * This means that it is available for reuse, but if it's not + * reused in a while, it will be freed. + */ + +static void +driSetKernelBOFree(struct _DriFreeSlabManager *fMan, + struct _DriKernelBO *kbo) +{ + struct timeval time; + + pipe_mutex_lock(fMan->mutex); + gettimeofday(&time, NULL); + driTimeAdd(&time, &fMan->slabTimeout); + + kbo->timeFreed = time; + + if (kbo->bo.flags & DRM_BO_FLAG_CACHED) + DRMLISTADD(&kbo->head, &fMan->cached); + else + DRMLISTADD(&kbo->head, &fMan->unCached); + + DRMLISTADDTAIL(&kbo->timeoutHead, &fMan->timeoutList); + driFreeTimeoutKBOsLocked(fMan, &time); + + pipe_mutex_unlock(fMan->mutex); +} + +/* + * Get a _DriKernelBO for us to use as storage for a slab. + * + */ + +static struct _DriKernelBO * +driAllocKernelBO(struct _DriSlabSizeHeader *header) + +{ + struct _DriSlabPool *slabPool = header->slabPool; + struct _DriFreeSlabManager *fMan = slabPool->fMan; + drmMMListHead *list, *next, *head; + uint32_t size = header->bufSize * slabPool->desiredNumBuffers; + struct _DriKernelBO *kbo; + struct _DriKernelBO *kboTmp; + int ret; + + /* + * FIXME: We should perhaps allow some variation in slabsize in order + * to efficiently reuse slabs. + */ + + size = (size <= slabPool->maxSlabSize) ? size : slabPool->maxSlabSize; + size = (size + slabPool->pageSize - 1) & ~(slabPool->pageSize - 1); + pipe_mutex_lock(fMan->mutex); + + kbo = NULL; + + retry: + head = (slabPool->proposedFlags & DRM_BO_FLAG_CACHED) ? + &fMan->cached : &fMan->unCached; + + for (list = head->next, next = list->next; + list != head; + list = next, next = list->next) { + + kboTmp = DRMLISTENTRY(struct _DriKernelBO, list, head); + + if ((kboTmp->bo.size == size) && + (slabPool->pageAlignment == 0 || + (kboTmp->pageAlignment % slabPool->pageAlignment) == 0)) { + + if (!kbo) + kbo = kboTmp; + + if ((kbo->bo.proposedFlags ^ slabPool->proposedFlags) == 0) + break; + + } + } + + if (kbo) { + DRMLISTDELINIT(&kbo->head); + DRMLISTDELINIT(&kbo->timeoutHead); + } + + pipe_mutex_unlock(fMan->mutex); + + if (kbo) { + uint64_t new_mask = kbo->bo.proposedFlags ^ slabPool->proposedFlags; + + ret = 0; + if (new_mask) { + ret = drmBOSetStatus(kbo->fd, &kbo->bo, slabPool->proposedFlags, + new_mask, DRM_BO_HINT_DONT_FENCE, 0, 0); + } + if (ret == 0) + return kbo; + + driFreeKernelBO(kbo); + kbo = NULL; + goto retry; + } + + kbo = calloc(1, sizeof(struct _DriKernelBO)); + if (!kbo) + return NULL; + + kbo->fd = slabPool->fd; + DRMINITLISTHEAD(&kbo->head); + DRMINITLISTHEAD(&kbo->timeoutHead); + ret = drmBOCreate(kbo->fd, size, slabPool->pageAlignment, NULL, + slabPool->proposedFlags, + DRM_BO_HINT_DONT_FENCE, &kbo->bo); + if (ret) + goto out_err0; + + ret = drmBOMap(kbo->fd, &kbo->bo, + DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + 0, &kbo->virtual); + + if (ret) + goto out_err1; + + ret = drmBOUnmap(kbo->fd, &kbo->bo); + if (ret) + goto out_err1; + + return kbo; + + out_err1: + drmBOUnreference(kbo->fd, &kbo->bo); + out_err0: + free(kbo); + return NULL; +} + + +static int +driAllocSlab(struct _DriSlabSizeHeader *header) +{ + struct _DriSlab *slab; + struct _DriSlabBuffer *buf; + uint32_t numBuffers; + int ret; + int i; + + slab = calloc(1, sizeof(*slab)); + if (!slab) + return -ENOMEM; + + slab->kbo = driAllocKernelBO(header); + if (!slab->kbo) { + ret = -ENOMEM; + goto out_err0; + } + + numBuffers = slab->kbo->bo.size / header->bufSize; + + slab->buffers = calloc(numBuffers, sizeof(*slab->buffers)); + if (!slab->buffers) { + ret = -ENOMEM; + goto out_err1; + } + + DRMINITLISTHEAD(&slab->head); + DRMINITLISTHEAD(&slab->freeBuffers); + slab->numBuffers = numBuffers; + slab->numFree = 0; + slab->header = header; + + buf = slab->buffers; + for (i=0; i < numBuffers; ++i) { + buf->parent = slab; + buf->start = i* header->bufSize; + buf->mapCount = 0; + buf->isSlabBuffer = 1; + pipe_condvar_init(buf->event); + DRMLISTADDTAIL(&buf->head, &slab->freeBuffers); + slab->numFree++; + buf++; + } + + DRMLISTADDTAIL(&slab->head, &header->slabs); + + return 0; + + out_err1: + driSetKernelBOFree(header->slabPool->fMan, slab->kbo); + free(slab->buffers); + out_err0: + free(slab); + return ret; +} + +/* + * Delete a buffer from the slab header delayed list and put + * it on the slab free list. + */ + +static void +driSlabFreeBufferLocked(struct _DriSlabBuffer *buf) +{ + struct _DriSlab *slab = buf->parent; + struct _DriSlabSizeHeader *header = slab->header; + drmMMListHead *list = &buf->head; + + DRMLISTDEL(list); + DRMLISTADDTAIL(list, &slab->freeBuffers); + slab->numFree++; + + if (slab->head.next == &slab->head) + DRMLISTADDTAIL(&slab->head, &header->slabs); + + if (slab->numFree == slab->numBuffers) { + list = &slab->head; + DRMLISTDEL(list); + DRMLISTADDTAIL(list, &header->freeSlabs); + } + + if (header->slabs.next == &header->slabs || + slab->numFree != slab->numBuffers) { + + drmMMListHead *next; + struct _DriFreeSlabManager *fMan = header->slabPool->fMan; + + for (list = header->freeSlabs.next, next = list->next; + list != &header->freeSlabs; + list = next, next = list->next) { + + slab = DRMLISTENTRY(struct _DriSlab, list, head); + + DRMLISTDELINIT(list); + driSetKernelBOFree(fMan, slab->kbo); + free(slab->buffers); + free(slab); + } + } +} + +static void +driSlabCheckFreeLocked(struct _DriSlabSizeHeader *header, int wait) +{ + drmMMListHead *list, *prev, *first; + struct _DriSlabBuffer *buf; + struct _DriSlab *slab; + int firstWasSignaled = 1; + int signaled; + int i; + int ret; + + /* + * Rerun the freeing test if the youngest tested buffer + * was signaled, since there might be more idle buffers + * in the delay list. + */ + + while (firstWasSignaled) { + firstWasSignaled = 0; + signaled = 0; + first = header->delayedBuffers.next; + + /* Only examine the oldest 1/3 of delayed buffers: + */ + if (header->numDelayed > 3) { + for (i = 0; i < header->numDelayed; i += 3) { + first = first->next; + } + } + + for (list = first, prev = list->prev; + list != &header->delayedBuffers; + list = prev, prev = list->prev) { + buf = DRMLISTENTRY(struct _DriSlabBuffer, list, head); + slab = buf->parent; + + if (!signaled) { + if (wait) { + ret = driFenceFinish(buf->fence, buf->fenceType, 0); + if (ret) + break; + signaled = 1; + wait = 0; + } else { + signaled = driFenceSignaled(buf->fence, buf->fenceType); + } + if (signaled) { + if (list == first) + firstWasSignaled = 1; + driFenceUnReference(&buf->fence); + header->numDelayed--; + driSlabFreeBufferLocked(buf); + } + } else if (driFenceSignaledCached(buf->fence, buf->fenceType)) { + driFenceUnReference(&buf->fence); + header->numDelayed--; + driSlabFreeBufferLocked(buf); + } + } + } +} + + +static struct _DriSlabBuffer * +driSlabAllocBuffer(struct _DriSlabSizeHeader *header) +{ + static struct _DriSlabBuffer *buf; + struct _DriSlab *slab; + drmMMListHead *list; + int count = DRI_SLABPOOL_ALLOC_RETRIES; + + pipe_mutex_lock(header->mutex); + while(header->slabs.next == &header->slabs && count > 0) { + driSlabCheckFreeLocked(header, 0); + if (header->slabs.next != &header->slabs) + break; + + pipe_mutex_unlock(header->mutex); + if (count != DRI_SLABPOOL_ALLOC_RETRIES) + usleep(1); + pipe_mutex_lock(header->mutex); + (void) driAllocSlab(header); + count--; + } + + list = header->slabs.next; + if (list == &header->slabs) { + pipe_mutex_unlock(header->mutex); + return NULL; + } + slab = DRMLISTENTRY(struct _DriSlab, list, head); + if (--slab->numFree == 0) + DRMLISTDELINIT(list); + + list = slab->freeBuffers.next; + DRMLISTDELINIT(list); + + pipe_mutex_unlock(header->mutex); + buf = DRMLISTENTRY(struct _DriSlabBuffer, list, head); + return buf; +} + +static void * +pool_create(struct _DriBufferPool *driPool, unsigned long size, + uint64_t flags, unsigned hint, unsigned alignment) +{ + struct _DriSlabPool *pool = (struct _DriSlabPool *) driPool->data; + struct _DriSlabSizeHeader *header; + struct _DriSlabBuffer *buf; + void *dummy; + int i; + int ret; + + /* + * FIXME: Check for compatibility. + */ + + header = pool->headers; + for (i=0; i<pool->numBuckets; ++i) { + if (header->bufSize >= size) + break; + header++; + } + + if (i < pool->numBuckets) + return driSlabAllocBuffer(header); + + + /* + * Fall back to allocate a buffer object directly from DRM. + * and wrap it in a driBO structure. + */ + + + buf = calloc(1, sizeof(*buf)); + + if (!buf) + return NULL; + + buf->bo = calloc(1, sizeof(*buf->bo)); + if (!buf->bo) + goto out_err0; + + if (alignment) { + if ((alignment < pool->pageSize) && (pool->pageSize % alignment)) + goto out_err1; + if ((alignment > pool->pageSize) && (alignment % pool->pageSize)) + goto out_err1; + } + + ret = drmBOCreate(pool->fd, size, alignment / pool->pageSize, NULL, + flags, hint, buf->bo); + if (ret) + goto out_err1; + + ret = drmBOMap(pool->fd, buf->bo, DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, + 0, &dummy); + if (ret) + goto out_err2; + + ret = drmBOUnmap(pool->fd, buf->bo); + if (ret) + goto out_err2; + + return buf; + out_err2: + drmBOUnreference(pool->fd, buf->bo); + out_err1: + free(buf->bo); + out_err0: + free(buf); + return NULL; +} + +static int +pool_destroy(struct _DriBufferPool *driPool, void *private) +{ + struct _DriSlabBuffer *buf = + (struct _DriSlabBuffer *) private; + struct _DriSlab *slab; + struct _DriSlabSizeHeader *header; + + if (!buf->isSlabBuffer) { + struct _DriSlabPool *pool = (struct _DriSlabPool *) driPool->data; + int ret; + + ret = drmBOUnreference(pool->fd, buf->bo); + free(buf->bo); + free(buf); + return ret; + } + + slab = buf->parent; + header = slab->header; + + pipe_mutex_lock(header->mutex); + buf->unFenced = 0; + buf->mapCount = 0; + + if (buf->fence && !driFenceSignaledCached(buf->fence, buf->fenceType)) { + DRMLISTADDTAIL(&buf->head, &header->delayedBuffers); + header->numDelayed++; + } else { + if (buf->fence) + driFenceUnReference(&buf->fence); + driSlabFreeBufferLocked(buf); + } + + pipe_mutex_unlock(header->mutex); + return 0; +} + +static int +pool_waitIdle(struct _DriBufferPool *driPool, void *private, + pipe_mutex *mutex, int lazy) +{ + struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; + + while(buf->unFenced) + pipe_condvar_wait(buf->event, *mutex); + + if (!buf->fence) + return 0; + + driFenceFinish(buf->fence, buf->fenceType, lazy); + driFenceUnReference(&buf->fence); + + return 0; +} + +static int +pool_map(struct _DriBufferPool *pool, void *private, unsigned flags, + int hint, pipe_mutex *mutex, void **virtual) +{ + struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; + int busy; + + if (buf->isSlabBuffer) + busy = buf->unFenced || (buf->fence && !driFenceSignaledCached(buf->fence, buf->fenceType)); + else + busy = buf->fence && !driFenceSignaled(buf->fence, buf->fenceType); + + + if (busy) { + if (hint & DRM_BO_HINT_DONT_BLOCK) + return -EBUSY; + else { + (void) pool_waitIdle(pool, private, mutex, 0); + } + } + + ++buf->mapCount; + *virtual = (buf->isSlabBuffer) ? + (void *) ((uint8_t *) buf->parent->kbo->virtual + buf->start) : + (void *) buf->bo->virtual; + + return 0; +} + +static int +pool_unmap(struct _DriBufferPool *pool, void *private) +{ + struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; + + --buf->mapCount; + if (buf->mapCount == 0 && buf->isSlabBuffer) + pipe_condvar_broadcast(buf->event); + + return 0; +} + +static unsigned long +pool_offset(struct _DriBufferPool *pool, void *private) +{ + struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; + struct _DriSlab *slab; + struct _DriSlabSizeHeader *header; + + if (!buf->isSlabBuffer) { + assert(buf->bo->proposedFlags & DRM_BO_FLAG_NO_MOVE); + return buf->bo->offset; + } + + slab = buf->parent; + header = slab->header; + + (void) header; + assert(header->slabPool->proposedFlags & DRM_BO_FLAG_NO_MOVE); + return slab->kbo->bo.offset + buf->start; +} + +static unsigned long +pool_poolOffset(struct _DriBufferPool *pool, void *private) +{ + struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; + + return buf->start; +} + +static uint64_t +pool_flags(struct _DriBufferPool *pool, void *private) +{ + struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; + + if (!buf->isSlabBuffer) + return buf->bo->flags; + + return buf->parent->kbo->bo.flags; +} + +static unsigned long +pool_size(struct _DriBufferPool *pool, void *private) +{ + struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; + if (!buf->isSlabBuffer) + return buf->bo->size; + + return buf->parent->header->bufSize; +} + +static int +pool_fence(struct _DriBufferPool *pool, void *private, + struct _DriFenceObject *fence) +{ + struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; + drmBO *bo; + + if (buf->fence) + driFenceUnReference(&buf->fence); + + buf->fence = driFenceReference(fence); + bo = (buf->isSlabBuffer) ? + &buf->parent->kbo->bo: + buf->bo; + buf->fenceType = bo->fenceFlags; + + buf->unFenced = 0; + pipe_condvar_broadcast(buf->event); + + return 0; +} + +static drmBO * +pool_kernel(struct _DriBufferPool *pool, void *private) +{ + struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; + + return (buf->isSlabBuffer) ? &buf->parent->kbo->bo : buf->bo; +} + +static int +pool_validate(struct _DriBufferPool *pool, void *private, + pipe_mutex *mutex) +{ + struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; + + if (!buf->isSlabBuffer) + return 0; + + while(buf->mapCount != 0) + pipe_condvar_wait(buf->event, *mutex); + + buf->unFenced = 1; + return 0; +} + + +struct _DriFreeSlabManager * +driInitFreeSlabManager(uint32_t checkIntervalMsec, uint32_t slabTimeoutMsec) +{ + struct _DriFreeSlabManager *tmp; + + tmp = calloc(1, sizeof(*tmp)); + if (!tmp) + return NULL; + + pipe_mutex_init(tmp->mutex); + pipe_mutex_lock(tmp->mutex); + tmp->slabTimeout.tv_usec = slabTimeoutMsec*1000; + tmp->slabTimeout.tv_sec = tmp->slabTimeout.tv_usec / 1000000; + tmp->slabTimeout.tv_usec -= tmp->slabTimeout.tv_sec*1000000; + + tmp->checkInterval.tv_usec = checkIntervalMsec*1000; + tmp->checkInterval.tv_sec = tmp->checkInterval.tv_usec / 1000000; + tmp->checkInterval.tv_usec -= tmp->checkInterval.tv_sec*1000000; + + gettimeofday(&tmp->nextCheck, NULL); + driTimeAdd(&tmp->nextCheck, &tmp->checkInterval); + DRMINITLISTHEAD(&tmp->timeoutList); + DRMINITLISTHEAD(&tmp->unCached); + DRMINITLISTHEAD(&tmp->cached); + pipe_mutex_unlock(tmp->mutex); + + return tmp; +} + +void +driFinishFreeSlabManager(struct _DriFreeSlabManager *fMan) +{ + struct timeval time; + + time = fMan->nextCheck; + driTimeAdd(&time, &fMan->checkInterval); + + pipe_mutex_lock(fMan->mutex); + driFreeTimeoutKBOsLocked(fMan, &time); + pipe_mutex_unlock(fMan->mutex); + + assert(fMan->timeoutList.next == &fMan->timeoutList); + assert(fMan->unCached.next == &fMan->unCached); + assert(fMan->cached.next == &fMan->cached); + + free(fMan); +} + +static void +driInitSizeHeader(struct _DriSlabPool *pool, uint32_t size, + struct _DriSlabSizeHeader *header) +{ + pipe_mutex_init(header->mutex); + pipe_mutex_lock(header->mutex); + + DRMINITLISTHEAD(&header->slabs); + DRMINITLISTHEAD(&header->freeSlabs); + DRMINITLISTHEAD(&header->delayedBuffers); + + header->numDelayed = 0; + header->slabPool = pool; + header->bufSize = size; + + pipe_mutex_unlock(header->mutex); +} + +static void +driFinishSizeHeader(struct _DriSlabSizeHeader *header) +{ + drmMMListHead *list, *next; + struct _DriSlabBuffer *buf; + + pipe_mutex_lock(header->mutex); + for (list = header->delayedBuffers.next, next = list->next; + list != &header->delayedBuffers; + list = next, next = list->next) { + + buf = DRMLISTENTRY(struct _DriSlabBuffer, list , head); + if (buf->fence) { + (void) driFenceFinish(buf->fence, buf->fenceType, 0); + driFenceUnReference(&buf->fence); + } + header->numDelayed--; + driSlabFreeBufferLocked(buf); + } + pipe_mutex_unlock(header->mutex); +} + +static void +pool_takedown(struct _DriBufferPool *driPool) +{ + struct _DriSlabPool *pool = driPool->data; + int i; + + for (i=0; i<pool->numBuckets; ++i) { + driFinishSizeHeader(&pool->headers[i]); + } + + free(pool->headers); + free(pool->bucketSizes); + free(pool); + free(driPool); +} + +struct _DriBufferPool * +driSlabPoolInit(int fd, uint64_t flags, + uint64_t validMask, + uint32_t smallestSize, + uint32_t numSizes, + uint32_t desiredNumBuffers, + uint32_t maxSlabSize, + uint32_t pageAlignment, + struct _DriFreeSlabManager *fMan) +{ + struct _DriBufferPool *driPool; + struct _DriSlabPool *pool; + uint32_t i; + + driPool = calloc(1, sizeof(*driPool)); + if (!driPool) + return NULL; + + pool = calloc(1, sizeof(*pool)); + if (!pool) + goto out_err0; + + pool->bucketSizes = calloc(numSizes, sizeof(*pool->bucketSizes)); + if (!pool->bucketSizes) + goto out_err1; + + pool->headers = calloc(numSizes, sizeof(*pool->headers)); + if (!pool->headers) + goto out_err2; + + pool->fMan = fMan; + pool->proposedFlags = flags; + pool->validMask = validMask; + pool->numBuckets = numSizes; + pool->pageSize = getpagesize(); + pool->fd = fd; + pool->pageAlignment = pageAlignment; + pool->maxSlabSize = maxSlabSize; + pool->desiredNumBuffers = desiredNumBuffers; + + for (i=0; i<pool->numBuckets; ++i) { + pool->bucketSizes[i] = (smallestSize << i); + driInitSizeHeader(pool, pool->bucketSizes[i], + &pool->headers[i]); + } + + driPool->data = (void *) pool; + driPool->map = &pool_map; + driPool->unmap = &pool_unmap; + driPool->destroy = &pool_destroy; + driPool->offset = &pool_offset; + driPool->poolOffset = &pool_poolOffset; + driPool->flags = &pool_flags; + driPool->size = &pool_size; + driPool->create = &pool_create; + driPool->fence = &pool_fence; + driPool->kernel = &pool_kernel; + driPool->validate = &pool_validate; + driPool->waitIdle = &pool_waitIdle; + driPool->takeDown = &pool_takedown; + + return driPool; + + out_err2: + free(pool->bucketSizes); + out_err1: + free(pool); + out_err0: + free(driPool); + + return NULL; +} diff --git a/src/gallium/winsys/drm/intel/dri/Makefile b/src/gallium/winsys/drm/intel/dri/Makefile new file mode 100644 index 0000000000..2046441a22 --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/Makefile @@ -0,0 +1,33 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = i915_dri.so +LIBNAME_EGL = egl_i915_dri.so + +PIPE_DRIVERS = \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + ../common/libinteldrm.a \ + $(TOP)/src/gallium/drivers/i915simple/libi915simple.a + + +DRIVER_SOURCES = \ + intel_winsys_softpipe.c \ + intel_swapbuffers.c \ + intel_context.c \ + intel_lock.c \ + intel_screen.c + +C_SOURCES = \ + $(COMMON_GALLIUM_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +DRIVER_DEFINES = -I../common $(shell pkg-config libdrm --atleast-version=2.3.1 \ + && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") + +include ../../Makefile.template + +#intel_tex_layout.o: $(TOP)/src/mesa/drivers/dri/intel/intel_tex_layout.c + +symlinks: diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript new file mode 100644 index 0000000000..6a4f50afcc --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/SConscript @@ -0,0 +1,41 @@ +Import('*') + +if 'mesa' in env['statetrackers']: + + env = drienv.Clone() + + env.Append(CPPPATH = [ + '../intel', + 'server' + ]) + + #MINIGLX_SOURCES = server/intel_dri.c + + DRIVER_SOURCES = [ + 'intel_winsys_pipe.c', + 'intel_winsys_softpipe.c', + 'intel_winsys_i915.c', + 'intel_batchbuffer.c', + 'intel_swapbuffers.c', + 'intel_context.c', + 'intel_lock.c', + 'intel_screen.c', + 'intel_batchpool.c', + ] + + sources = \ + COMMON_GALLIUM_SOURCES + \ + COMMON_BM_SOURCES + \ + DRIVER_SOURCES + + drivers = [ + softpipe, + i915simple + ] + + # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions + env.SharedLibrary( + target ='i915tex_dri.so', + source = sources, + LIBS = drivers + mesa + auxiliaries + env['LIBS'], + ) diff --git a/src/gallium/winsys/drm/intel/dri/intel_batchbuffer.h b/src/gallium/winsys/drm/intel/dri/intel_batchbuffer.h new file mode 100644 index 0000000000..3e95326168 --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/intel_batchbuffer.h @@ -0,0 +1,24 @@ +#ifndef INTEL_BATCHBUFFER_H +#define INTEL_BATCHBUFFER_H + +#include "intel_be_batchbuffer.h" + +/* + * Need to redefine the BATCH defines + */ + +#undef BEGIN_BATCH +#define BEGIN_BATCH(dwords, relocs) \ + (i915_batchbuffer_check(&intel->base.batch->base, dwords, relocs)) + +#undef OUT_BATCH +#define OUT_BATCH(d) \ + i915_batchbuffer_dword(&intel->base.batch->base, d) + +#undef OUT_RELOC +#define OUT_RELOC(buf,flags,mask,delta) do { \ + assert((delta) >= 0); \ + intel_be_offset_relocation(intel->base.batch, delta, buf, flags, mask); \ +} while (0) + +#endif diff --git a/src/gallium/winsys/drm/intel/dri/intel_context.c b/src/gallium/winsys/drm/intel/dri/intel_context.c new file mode 100644 index 0000000000..97ef731aaa --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/intel_context.c @@ -0,0 +1,337 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "i830_dri.h" + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_swapbuffers.h" +#include "intel_batchbuffer.h" +#include "intel_winsys_softpipe.h" + +#include "i915simple/i915_screen.h" + +#include "state_tracker/st_public.h" +#include "state_tracker/st_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_context.h" + +#include "utils.h" + + +#ifdef DEBUG +int __intel_debug = 0; +#endif + + +#define need_GL_ARB_multisample +#define need_GL_ARB_point_parameters +#define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object +#define need_GL_ARB_vertex_program +#define need_GL_ARB_window_pos +#define need_GL_EXT_blend_color +#define need_GL_EXT_blend_equation_separate +#define need_GL_EXT_blend_func_separate +#define need_GL_EXT_blend_minmax +#define need_GL_EXT_cull_vertex +#define need_GL_EXT_fog_coord +#define need_GL_EXT_framebuffer_object +#define need_GL_EXT_multi_draw_arrays +#define need_GL_EXT_secondary_color +#define need_GL_NV_vertex_program +#include "extension_helper.h" + + +/** + * Extension strings exported by the intel driver. + * + * \note + * It appears that ARB_texture_env_crossbar has "disappeared" compared to the + * old i830-specific driver. + */ +const struct dri_extension card_extensions[] = { + {"GL_ARB_multisample", GL_ARB_multisample_functions}, + {"GL_ARB_multitexture", NULL}, + {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, + {"GL_ARB_texture_border_clamp", NULL}, + {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, + {"GL_ARB_texture_cube_map", NULL}, + {"GL_ARB_texture_env_add", NULL}, + {"GL_ARB_texture_env_combine", NULL}, + {"GL_ARB_texture_env_dot3", NULL}, + {"GL_ARB_texture_mirrored_repeat", NULL}, + {"GL_ARB_texture_rectangle", NULL}, + {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions}, + {"GL_ARB_pixel_buffer_object", NULL}, + {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, + {"GL_ARB_window_pos", GL_ARB_window_pos_functions}, + {"GL_EXT_blend_color", GL_EXT_blend_color_functions}, + {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, + {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, + {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, + {"GL_EXT_blend_subtract", NULL}, + {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions}, + {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions}, + {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions}, + {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions}, + {"GL_EXT_packed_depth_stencil", NULL}, + {"GL_EXT_pixel_buffer_object", NULL}, + {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, + {"GL_EXT_stencil_wrap", NULL}, + {"GL_EXT_texture_edge_clamp", NULL}, + {"GL_EXT_texture_env_combine", NULL}, + {"GL_EXT_texture_env_dot3", NULL}, + {"GL_EXT_texture_filter_anisotropic", NULL}, + {"GL_EXT_texture_lod_bias", NULL}, + {"GL_3DFX_texture_compression_FXT1", NULL}, + {"GL_APPLE_client_storage", NULL}, + {"GL_MESA_pack_invert", NULL}, + {"GL_MESA_ycbcr_texture", NULL}, + {"GL_NV_blend_square", NULL}, + {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, + {"GL_NV_vertex_program1_1", NULL}, + {"GL_SGIS_generate_mipmap", NULL }, + {NULL, NULL} +}; + + + +#ifdef DEBUG +static const struct dri_debug_control debug_control[] = { + {"ioctl", DEBUG_IOCTL}, + {"bat", DEBUG_BATCH}, + {"lock", DEBUG_LOCK}, + {"swap", DEBUG_SWAP}, + {NULL, 0} +}; +#endif + + + +static void +intel_lock_hardware(struct intel_be_context *context) +{ + struct intel_context *intel = (struct intel_context *)context; + LOCK_HARDWARE(intel); +} + +static void +intel_unlock_hardware(struct intel_be_context *context) +{ + struct intel_context *intel = (struct intel_context *)context; + UNLOCK_HARDWARE(intel); +} + +static boolean +intel_locked_hardware(struct intel_be_context *context) +{ + struct intel_context *intel = (struct intel_context *)context; + return intel->locked ? TRUE : FALSE; +} + +GLboolean +intelCreateContext(const __GLcontextModes * visual, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate) +{ + struct intel_context *intel = CALLOC_STRUCT(intel_context); + __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; + struct intel_screen *intelScreen = intel_screen(sPriv); + drmI830Sarea *saPriv = intelScreen->sarea; + int fthrottle_mode; + GLboolean havePools; + struct pipe_context *pipe; + struct st_context *st_share = NULL; + + if (sharedContextPrivate) { + st_share = ((struct intel_context *) sharedContextPrivate)->st; + } + + driContextPriv->driverPrivate = intel; + intel->intelScreen = intelScreen; + intel->driScreen = sPriv; + intel->sarea = saPriv; + + driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache, + intel->driScreen->myNum, "i915"); + + + /* + * memory pools + */ + DRM_LIGHT_LOCK(sPriv->fd, &sPriv->pSAREA->lock, driContextPriv->hHWContext); + // ZZZ JB should be per screen and not be done per context + havePools = intelCreatePools(sPriv); + DRM_UNLOCK(sPriv->fd, &sPriv->pSAREA->lock, driContextPriv->hHWContext); + if (!havePools) + return GL_FALSE; + + + /* Dri stuff */ + intel->hHWContext = driContextPriv->hHWContext; + intel->driFd = sPriv->fd; + intel->driHwLock = (drmLock *) & sPriv->pSAREA->lock; + + fthrottle_mode = driQueryOptioni(&intel->optionCache, "fthrottle_mode"); + intel->iw.irq_seq = -1; + intel->irqsEmitted = 0; + + intel->last_swap_fence = NULL; + intel->first_swap_fence = NULL; + +#ifdef DEBUG + __intel_debug = driParseDebugString(getenv("INTEL_DEBUG"), debug_control); +#endif + intel->base.hardware_lock = intel_lock_hardware; + intel->base.hardware_unlock = intel_unlock_hardware; + intel->base.hardware_locked = intel_locked_hardware; + + intel_be_init_context(&intel->base, &intelScreen->base); + + /* + * Pipe-related setup + */ + if (getenv("INTEL_SP")) { + /* use softpipe driver instead of hw */ + pipe = intel_create_softpipe( intel, &intelScreen->base.base ); + } + else { + switch (intel->intelScreen->deviceID) { + case PCI_CHIP_I945_G: + case PCI_CHIP_I945_GM: + case PCI_CHIP_I945_GME: + case PCI_CHIP_G33_G: + case PCI_CHIP_Q33_G: + case PCI_CHIP_Q35_G: + case PCI_CHIP_I915_G: + case PCI_CHIP_I915_GM: + pipe = i915_create_context(intelScreen->base.screen, + &intelScreen->base.base, + &intel->base.base); + break; + default: + fprintf(stderr, "Unknown PCIID %x in %s, using software driver\n", + intel->intelScreen->deviceID, __FUNCTION__); + + pipe = intel_create_softpipe( intel, &intelScreen->base.base ); + break; + } + } + + pipe->priv = intel; + + intel->st = st_create_context(pipe, visual, st_share); + + driInitExtensions( intel->st->ctx, card_extensions, GL_TRUE ); + + return GL_TRUE; +} + + +void +intelDestroyContext(__DRIcontextPrivate * driContextPriv) +{ + struct intel_context *intel = intel_context(driContextPriv); + + assert(intel); /* should never be null */ + if (intel) { + st_finish(intel->st); + + if (intel->last_swap_fence) { + driFenceFinish(intel->last_swap_fence, DRM_FENCE_TYPE_EXE, GL_TRUE); + driFenceUnReference(&intel->last_swap_fence); + intel->last_swap_fence = NULL; + } + if (intel->first_swap_fence) { + driFenceFinish(intel->first_swap_fence, DRM_FENCE_TYPE_EXE, GL_TRUE); + driFenceUnReference(&intel->first_swap_fence); + intel->first_swap_fence = NULL; + } + + if (intel->intelScreen->dummyContext == intel) + intel->intelScreen->dummyContext = NULL; + + st_destroy_context(intel->st); + intel_be_destroy_context(&intel->base); + free(intel); + } +} + + +GLboolean +intelUnbindContext(__DRIcontextPrivate * driContextPriv) +{ + struct intel_context *intel = intel_context(driContextPriv); + st_flush(intel->st, PIPE_FLUSH_RENDER_CACHE, NULL); + /* XXX make_current(NULL)? */ + return GL_TRUE; +} + + +GLboolean +intelMakeCurrent(__DRIcontextPrivate * driContextPriv, + __DRIdrawablePrivate * driDrawPriv, + __DRIdrawablePrivate * driReadPriv) +{ + if (driContextPriv) { + struct intel_context *intel = intel_context(driContextPriv); + struct intel_framebuffer *draw_fb = intel_framebuffer(driDrawPriv); + struct intel_framebuffer *read_fb = intel_framebuffer(driReadPriv); + + assert(draw_fb->stfb); + assert(read_fb->stfb); + + /* This is for situations in which we need a rendering context but + * there may not be any currently bound. + */ + intel->intelScreen->dummyContext = intel; + + st_make_current(intel->st, draw_fb->stfb, read_fb->stfb); + + if ((intel->driDrawable != driDrawPriv) || + (intel->lastStamp != driDrawPriv->lastStamp)) { + intel->driDrawable = driDrawPriv; + intelUpdateWindowSize(driDrawPriv); + intel->lastStamp = driDrawPriv->lastStamp; + } + + /* The size of the draw buffer will have been updated above. + * If the readbuffer is a different window, check/update its size now. + */ + if (driReadPriv != driDrawPriv) { + intelUpdateWindowSize(driReadPriv); + } + + } + else { + st_make_current(NULL, NULL, NULL); + } + + return GL_TRUE; +} diff --git a/src/gallium/winsys/drm/intel/dri/intel_context.h b/src/gallium/winsys/drm/intel/dri/intel_context.h new file mode 100644 index 0000000000..5d22a422af --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/intel_context.h @@ -0,0 +1,164 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_CONTEXT_H +#define INTEL_CONTEXT_H + +#include <stdint.h> +#include "drm.h" + +#include "pipe/p_debug.h" + +#include "intel_screen.h" +#include "i915_drm.h" + +#include "intel_be_context.h" + + +struct pipe_context; +struct intel_context; +struct _DriBufferObject; +struct st_context; + + +#define INTEL_MAX_FIXUP 64 + +/** + * Intel rendering context, contains a state tracker and intel-specific info. + */ +struct intel_context +{ + struct intel_be_context base; + struct st_context *st; + + struct _DriFenceObject *last_swap_fence; + struct _DriFenceObject *first_swap_fence; + +// struct intel_batchbuffer *batch; + + boolean locked; + char *prevLockFile; + int prevLockLine; + + uint irqsEmitted; + drm_i915_irq_wait_t iw; + + drm_context_t hHWContext; + drmLock *driHwLock; + int driFd; + + __DRIdrawablePrivate *driDrawable; + __DRIscreenPrivate *driScreen; + struct intel_screen *intelScreen; + drmI830Sarea *sarea; + + uint lastStamp; + + /** + * Configuration cache + */ + driOptionCache optionCache; +}; + + + +/** + * Intel framebuffer. + */ +struct intel_framebuffer +{ + struct st_framebuffer *stfb; + + /* other fields TBD */ + int other; +}; + + + + +/* These are functions now: + */ +void LOCK_HARDWARE( struct intel_context *intel ); +void UNLOCK_HARDWARE( struct intel_context *intel ); + +extern char *__progname; + + + +/* ================================================================ + * Debugging: + */ +#ifdef DEBUG +extern int __intel_debug; + +#define DEBUG_SWAP 0x1 +#define DEBUG_LOCK 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_BATCH 0x8 + +#define DBG(flag, ...) do { \ + if (__intel_debug & (DEBUG_##flag)) \ + printf(__VA_ARGS__); \ +} while(0) + +#else +#define DBG(flag, ...) +#endif + + + +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_Q33_G 0x29D2 + + +/** Cast wrapper */ +static INLINE struct intel_context * +intel_context(__DRIcontextPrivate *driContextPriv) +{ + return (struct intel_context *) driContextPriv->driverPrivate; +} + + +/** Cast wrapper */ +static INLINE struct intel_framebuffer * +intel_framebuffer(__DRIdrawablePrivate * driDrawPriv) +{ + return (struct intel_framebuffer *) driDrawPriv->driverPrivate; +} + + +#endif diff --git a/src/gallium/winsys/drm/intel/dri/intel_lock.c b/src/gallium/winsys/drm/intel/dri/intel_lock.c new file mode 100644 index 0000000000..ad1c202429 --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/intel_lock.c @@ -0,0 +1,102 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/glheader.h" +#include "pipe/p_thread.h" +#include <GL/internal/glcore.h> +#include "state_tracker/st_public.h" +#include "intel_context.h" +#include "i830_dri.h" + + + +pipe_static_mutex( lockMutex ); + + +static void +intelContendedLock(struct intel_context *intel, uint flags) +{ + __DRIdrawablePrivate *dPriv = intel->driDrawable; + __DRIscreenPrivate *sPriv = intel->driScreen; + struct intel_screen *intelScreen = intel_screen(sPriv); + drmI830Sarea *sarea = intel->sarea; + + drmGetLock(intel->driFd, intel->hHWContext, flags); + + DBG(LOCK, "%s - got contended lock\n", __progname); + + /* If the window moved, may need to set a new cliprect now. + * + * NOTE: This releases and regains the hw lock, so all state + * checking must be done *after* this call: + */ + if (dPriv) + DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); + + if (sarea->width != intelScreen->front.width || + sarea->height != intelScreen->front.height) { + + intelUpdateScreenRotation(sPriv, sarea); + } +} + + +/* Lock the hardware and validate our state. + */ +void LOCK_HARDWARE( struct intel_context *intel ) +{ + char __ret = 0; + + pipe_mutex_lock(lockMutex); + assert(!intel->locked); + + DRM_CAS(intel->driHwLock, intel->hHWContext, + (DRM_LOCK_HELD|intel->hHWContext), __ret); + + if (__ret) + intelContendedLock( intel, 0 ); + + DBG(LOCK, "%s - locked\n", __progname); + + intel->locked = 1; +} + + +/* Unlock the hardware using the global current context + */ +void UNLOCK_HARDWARE( struct intel_context *intel ) +{ + assert(intel->locked); + intel->locked = 0; + + DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext); + + pipe_mutex_unlock(lockMutex); + + DBG(LOCK, "%s - unlocked\n", __progname); +} diff --git a/src/gallium/winsys/drm/intel/dri/intel_reg.h b/src/gallium/winsys/drm/intel/dri/intel_reg.h new file mode 100644 index 0000000000..4f33bee438 --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/intel_reg.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef _INTEL_REG_H_ +#define _INTEL_REG_H_ + + +#define BR00_BITBLT_CLIENT 0x40000000 +#define BR00_OP_COLOR_BLT 0x10000000 +#define BR00_OP_SRC_COPY_BLT 0x10C00000 +#define BR13_SOLID_PATTERN 0x80000000 + +#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4) +#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) +#define XY_COLOR_BLT_WRITE_RGB (1<<20) + +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) +#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) + +#define MI_WAIT_FOR_EVENT ((0x3<<23)) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) + +#define MI_BATCH_BUFFER_END (0xA<<23) + + +#endif diff --git a/src/gallium/winsys/drm/intel/dri/intel_screen.c b/src/gallium/winsys/drm/intel/dri/intel_screen.c new file mode 100644 index 0000000000..ed75368982 --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/intel_screen.c @@ -0,0 +1,703 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "utils.h" +#include "vblank.h" +#include "xmlpool.h" + +#include "intel_context.h" +#include "intel_screen.h" +#include "intel_batchbuffer.h" +#include "intel_swapbuffers.h" + +#include "i830_dri.h" +#include "ws_dri_bufpool.h" + +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_inlines.h" +#include "state_tracker/st_public.h" +#include "state_tracker/st_cb_fbo.h" + +static void +intelCreateSurface(struct intel_screen *intelScreen, struct pipe_winsys *winsys, unsigned handle); + +static void +intelCreateSurface(struct intel_screen *intelScreen, struct pipe_winsys *winsys, unsigned handle) +{ + struct pipe_screen *screen = intelScreen->base.screen; + struct pipe_texture *texture; + struct pipe_texture templat; + struct pipe_surface *surface; + struct pipe_buffer *buffer; + unsigned pitch; + + assert(intelScreen->front.cpp == 4); + + buffer = intel_be_buffer_from_handle(&intelScreen->base, + "front", handle); + + if (!buffer) + return; + + intelScreen->front.buffer = dri_bo(buffer); + + memset(&templat, 0, sizeof(templat)); + templat.tex_usage |= PIPE_TEXTURE_USAGE_DISPLAY_TARGET; + templat.target = PIPE_TEXTURE_2D; + templat.last_level = 0; + templat.depth[0] = 1; + templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; + templat.width[0] = intelScreen->front.width; + templat.height[0] = intelScreen->front.height; + pf_get_block(templat.format, &templat.block); + pitch = intelScreen->front.pitch; + + texture = screen->texture_blanket(screen, + &templat, + &pitch, + buffer); + + /* Unref the buffer we don't need it anyways */ + pipe_buffer_reference(screen, &buffer, NULL); + + surface = screen->get_tex_surface(screen, + texture, + 0, + 0, + 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + + intelScreen->front.texture = texture; + intelScreen->front.surface = surface; +} + +PUBLIC const char __driConfigOptions[] = + DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE + DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) + DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) + DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY +// DRI_CONF_FORCE_S3TC_ENABLE(false) + DRI_CONF_ALLOW_LARGE_TEXTURES(1) + DRI_CONF_SECTION_END DRI_CONF_END; + +const uint __driNConfigOptions = 3; + +#ifdef USE_NEW_INTERFACE +static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; +#endif /*USE_NEW_INTERFACE */ + +extern const struct dri_extension card_extensions[]; + +static GLboolean +intel_get_param(__DRIscreenPrivate *psp, int param, int *value) +{ + int ret; + struct drm_i915_getparam gp; + + gp.param = param; + gp.value = value; + + ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); + if (ret) { + fprintf(stderr, "drm_i915_getparam: %d\n", ret); + return GL_FALSE; + } + + return GL_TRUE; +} + +static void +intelSetTexOffset(__DRIcontext *pDRICtx, int texname, + unsigned long long offset, int depth, uint pitch) +{ + abort(); +#if 0 + struct intel_context *intel = (struct intel_context*) + ((__DRIcontextPrivate*)pDRICtx->private)->driverPrivate; + struct gl_texture_object *tObj = _mesa_lookup_texture(&intel->ctx, texname); + struct st_texture_object *stObj = st_texture_object(tObj); + + if (!stObj) + return; + + if (stObj->pt) + st->pipe->texture_release(intel->st->pipe, &stObj->pt); + + stObj->imageOverride = GL_TRUE; + stObj->depthOverride = depth; + stObj->pitchOverride = pitch; + + if (offset) + stObj->textureOffset = offset; +#endif +} + + +#if 0 +static void +intelHandleDrawableConfig(__DRIdrawablePrivate *dPriv, + __DRIcontextPrivate *pcp, + __DRIDrawableConfigEvent *event) +{ + (void) dPriv; + (void) pcp; + (void) event; +} +#endif + +#if 0 +static void +intelHandleBufferAttach(__DRIdrawablePrivate *dPriv, + __DRIcontextPrivate *pcp, + __DRIBufferAttachEvent *ba) +{ + struct intel_screen *intelScreen = intel_screen(dPriv->driScreenPriv); + + switch (ba->buffer.attachment) { + case DRI_DRAWABLE_BUFFER_FRONT_LEFT: + intelScreen->front.width = dPriv->w; + intelScreen->front.height = dPriv->h; + intelScreen->front.cpp = ba->buffer.cpp; + intelScreen->front.pitch = ba->buffer.pitch; + driGenBuffers(intelScreen->base.staticPool, "front", 1, &intelScreen->front.buffer, 0, 0, 0); + driBOSetReferenced(intelScreen->front.buffer, ba->buffer.handle); + break; + + case DRI_DRAWABLE_BUFFER_BACK_LEFT: + case DRI_DRAWABLE_BUFFER_DEPTH: + case DRI_DRAWABLE_BUFFER_STENCIL: + case DRI_DRAWABLE_BUFFER_ACCUM: + /* anything ?? */ + break; + + default: + fprintf(stderr, "unhandled buffer attach event, attachment type %d\n", + ba->buffer.attachment); + return; + } +} +#endif + +static const __DRItexOffsetExtension intelTexOffsetExtension = { + { __DRI_TEX_OFFSET }, + intelSetTexOffset, +}; + +#if 0 +static const __DRItexBufferExtension intelTexBufferExtension = { + { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, + intelSetTexBuffer, +}; +#endif + +static const __DRIextension *intelScreenExtensions[] = { + &driReadDrawableExtension, + &driCopySubBufferExtension.base, + &driSwapControlExtension.base, + &driFrameTrackingExtension.base, + &driMediaStreamCounterExtension.base, + &intelTexOffsetExtension.base, +// &intelTexBufferExtension.base, + NULL +}; + + +static void +intelPrintDRIInfo(struct intel_screen * intelScreen, + __DRIscreenPrivate * sPriv, I830DRIPtr gDRIPriv) +{ + fprintf(stderr, "*** Front size: 0x%x offset: 0x%x pitch: %d\n", + intelScreen->front.size, intelScreen->front.offset, + intelScreen->front.pitch); + fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem); +} + + +#if 0 +static void +intelPrintSAREA(const drmI830Sarea * sarea) +{ + fprintf(stderr, "SAREA: sarea width %d height %d\n", sarea->width, + sarea->height); + fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch); + fprintf(stderr, + "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x\n", + sarea->front_offset, sarea->front_size, + (unsigned) sarea->front_handle); + fprintf(stderr, + "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x\n", + sarea->back_offset, sarea->back_size, + (unsigned) sarea->back_handle); + fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x\n", + sarea->depth_offset, sarea->depth_size, + (unsigned) sarea->depth_handle); + fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n", + sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle); + fprintf(stderr, "SAREA: rotation: %d\n", sarea->rotation); + fprintf(stderr, + "SAREA: rotated offset: 0x%08x size: 0x%x\n", + sarea->rotated_offset, sarea->rotated_size); + fprintf(stderr, "SAREA: rotated pitch: %d\n", sarea->rotated_pitch); +} +#endif + + +/** + * Use the information in the sarea to update the screen parameters + * related to screen rotation. Needs to be called locked. + */ +void +intelUpdateScreenRotation(__DRIscreenPrivate * sPriv, drmI830Sarea * sarea) +{ + struct intel_screen *intelScreen = intel_screen(sPriv); + + if (intelScreen->front.map) { + drmUnmap(intelScreen->front.map, intelScreen->front.size); + intelScreen->front.map = NULL; + } + + if (intelScreen->front.buffer) + driDeleteBuffers(1, &intelScreen->front.buffer); + + intelScreen->front.width = sarea->width; + intelScreen->front.height = sarea->height; + intelScreen->front.offset = sarea->front_offset; + intelScreen->front.pitch = sarea->pitch * intelScreen->front.cpp; + intelScreen->front.size = sarea->front_size; + intelScreen->front.handle = sarea->front_handle; + + assert( sarea->front_size >= + intelScreen->front.pitch * intelScreen->front.height ); + +#if 0 /* JB not important */ + if (!sarea->front_handle) + return; + + if (drmMap(sPriv->fd, + sarea->front_handle, + intelScreen->front.size, + (drmAddress *) & intelScreen->front.map) != 0) { + fprintf(stderr, "drmMap(frontbuffer) failed!\n"); + return; + } +#endif + +#if 0 /* JB */ + if (intelScreen->staticPool) { + driGenBuffers(intelScreen->staticPool, "static region", 1, + &intelScreen->front.buffer, 64, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_NO_MOVE | + DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0); + + driBOSetStatic(intelScreen->front.buffer, + intelScreen->front.offset, + intelScreen->front.pitch * intelScreen->front.height, + intelScreen->front.map, 0); + } +#else + if (intelScreen->base.staticPool) { + if (intelScreen->front.buffer) { + driBOUnReference(intelScreen->front.buffer); + pipe_surface_reference(&intelScreen->front.surface, NULL); + pipe_texture_reference(&intelScreen->front.texture, NULL); + } + intelCreateSurface(intelScreen, &intelScreen->base.base, sarea->front_bo_handle); + } +#endif +} + + +boolean +intelCreatePools(__DRIscreenPrivate * sPriv) +{ + //unsigned batchPoolSize = 1024*1024; + struct intel_screen *intelScreen = intel_screen(sPriv); + + if (intelScreen->havePools) + return GL_TRUE; + + intelScreen->havePools = GL_TRUE; + + if (intelScreen->sarea) + intelUpdateScreenRotation(sPriv, intelScreen->sarea); + + return GL_TRUE; +} + +static const char * +intel_get_name( struct pipe_winsys *winsys ) +{ + return "Intel/DRI/ttm"; +} + +/* + * The state tracker (should!) keep track of whether the fake + * frontbuffer has been touched by any rendering since the last time + * we copied its contents to the real frontbuffer. Our task is easy: + */ +static void +intel_flush_frontbuffer( struct pipe_winsys *winsys, + struct pipe_surface *surf, + void *context_private) +{ + struct intel_context *intel = (struct intel_context *) context_private; + __DRIdrawablePrivate *dPriv = intel->driDrawable; + + intelDisplaySurface(dPriv, surf, NULL); +} + +static boolean +intelInitDriver(__DRIscreenPrivate * sPriv) +{ + struct intel_screen *intelScreen; + I830DRIPtr gDRIPriv = (I830DRIPtr) sPriv->pDevPriv; + + if (sPriv->devPrivSize != sizeof(I830DRIRec)) { + fprintf(stderr, + "\nERROR! sizeof(I830DRIRec) does not match passed size from device driver\n"); + return GL_FALSE; + } + + /* Allocate the private area */ + intelScreen = CALLOC_STRUCT(intel_screen); + if (!intelScreen) + return GL_FALSE; + + /* parse information in __driConfigOptions */ + driParseOptionInfo(&intelScreen->optionCache, + __driConfigOptions, __driNConfigOptions); + + sPriv->private = (void *) intelScreen; + intelScreen->sarea = (drmI830Sarea *) (((GLubyte *) sPriv->pSAREA) + + gDRIPriv->sarea_priv_offset); + + intelScreen->deviceID = gDRIPriv->deviceID; + + intelScreen->front.cpp = gDRIPriv->cpp; + intelScreen->drmMinor = sPriv->drm_version.minor; + intelUpdateScreenRotation(sPriv, intelScreen->sarea); + + if (0) + intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv); + + sPriv->extensions = intelScreenExtensions; + + intelScreen->base.base.flush_frontbuffer = intel_flush_frontbuffer; + intelScreen->base.base.get_name = intel_get_name; + intel_be_init_device(&intelScreen->base, sPriv->fd, intelScreen->deviceID); + + return GL_TRUE; +} + + +static void +intelDestroyScreen(__DRIscreenPrivate * sPriv) +{ + struct intel_screen *intelScreen = intel_screen(sPriv); + + intel_be_destroy_device(&intelScreen->base); + /* intelUnmapScreenRegions(intelScreen); */ + + FREE(intelScreen); + sPriv->private = NULL; +} + + +/** + * This is called when we need to set up GL rendering to a new X window. + */ +static boolean +intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, + __DRIdrawablePrivate * driDrawPriv, + const __GLcontextModes * visual, boolean isPixmap) +{ + if (isPixmap) { + return GL_FALSE; /* not implemented */ + } + else { + enum pipe_format colorFormat, depthFormat, stencilFormat; + struct intel_framebuffer *intelfb = CALLOC_STRUCT(intel_framebuffer); + + if (!intelfb) + return GL_FALSE; + + if (visual->redBits == 5) + colorFormat = PIPE_FORMAT_R5G6B5_UNORM; + else + colorFormat = PIPE_FORMAT_A8R8G8B8_UNORM; + + if (visual->depthBits == 16) + depthFormat = PIPE_FORMAT_Z16_UNORM; + else if (visual->depthBits == 24) + depthFormat = PIPE_FORMAT_S8Z24_UNORM; + else + depthFormat = PIPE_FORMAT_NONE; + + if (visual->stencilBits == 8) + stencilFormat = PIPE_FORMAT_S8Z24_UNORM; + else + stencilFormat = PIPE_FORMAT_NONE; + + intelfb->stfb = st_create_framebuffer(visual, + colorFormat, + depthFormat, + stencilFormat, + driDrawPriv->w, + driDrawPriv->h, + (void*) intelfb); + if (!intelfb->stfb) { + free(intelfb); + return GL_FALSE; + } + + driDrawPriv->driverPrivate = (void *) intelfb; + return GL_TRUE; + } +} + +static void +intelDestroyBuffer(__DRIdrawablePrivate * driDrawPriv) +{ + struct intel_framebuffer *intelfb = intel_framebuffer(driDrawPriv); + assert(intelfb->stfb); + st_unreference_framebuffer(intelfb->stfb); + free(intelfb); +} + + +/** + * Get information about previous buffer swaps. + */ +static int +intelGetSwapInfo(__DRIdrawablePrivate * dPriv, __DRIswapInfo * sInfo) +{ + if ((dPriv == NULL) || (dPriv->driverPrivate == NULL) + || (sInfo == NULL)) { + return -1; + } + + return 0; +} + +static __DRIconfig ** +intelFillInModes(__DRIscreenPrivate *psp, + unsigned pixel_bits, unsigned depth_bits, + unsigned stencil_bits, GLboolean have_back_buffer) +{ + __DRIconfig **configs; + __GLcontextModes *m; + unsigned num_modes; + unsigned depth_buffer_factor; + unsigned back_buffer_factor; + GLenum fb_format; + GLenum fb_type; + int i; + + /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't + * support pageflipping at all. + */ + static const GLenum back_buffer_modes[] = { + GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML + }; + + uint8_t depth_bits_array[3]; + uint8_t stencil_bits_array[3]; + uint8_t msaa_samples_array[1]; + + + depth_bits_array[0] = 0; + depth_bits_array[1] = depth_bits; + depth_bits_array[2] = depth_bits; + msaa_samples_array[0] = 0; + + /* Just like with the accumulation buffer, always provide some modes + * with a stencil buffer. It will be a sw fallback, but some apps won't + * care about that. + */ + stencil_bits_array[0] = 0; + stencil_bits_array[1] = 0; + if (depth_bits == 24) + stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits; + + stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits; + + depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1; + back_buffer_factor = (have_back_buffer) ? 3 : 1; + + num_modes = depth_buffer_factor * back_buffer_factor * 4; + + if (pixel_bits == 16) { + fb_format = GL_RGB; + fb_type = GL_UNSIGNED_SHORT_5_6_5; + } + else { + fb_format = GL_BGRA; + fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; + } + + configs = driCreateConfigs(fb_format, fb_type, + depth_bits_array, stencil_bits_array, + depth_buffer_factor, back_buffer_modes, + back_buffer_factor, msaa_samples_array, 1); + if (configs == NULL) { + fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, + __LINE__); + return NULL; + } + + /* Mark the visual as slow if there are "fake" stencil bits. + */ + for (i = 0; configs[i]; i++) { + m = &configs[i]->modes; + if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) { + m->visualRating = GLX_SLOW_CONFIG; + } + } + + return configs; +} + +/** + * This is the driver specific part of the createNewScreen entry point. + * + * \todo maybe fold this into intelInitDriver + * + * \return the __GLcontextModes supported by this driver + */ +static const __DRIconfig **intelInitScreen(__DRIscreenPrivate *psp) +{ +#ifdef I915 + static const __DRIversion ddx_expected = { 1, 5, 0 }; +#else + static const __DRIversion ddx_expected = { 1, 6, 0 }; +#endif + static const __DRIversion dri_expected = { 4, 0, 0 }; + static const __DRIversion drm_expected = { 1, 5, 0 }; + I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv; + + if (!driCheckDriDdxDrmVersions2("i915", + &psp->dri_version, &dri_expected, + &psp->ddx_version, &ddx_expected, + &psp->drm_version, &drm_expected)) { + return NULL; + } + + /* Calling driInitExtensions here, with a NULL context pointer, + * does not actually enable the extensions. It just makes sure + * that all the dispatch offsets for all the extensions that + * *might* be enables are known. This is needed because the + * dispatch offsets need to be known when _mesa_context_create is + * called, but we can't enable the extensions until we have a + * context pointer. + * + * Hello chicken. Hello egg. How are you two today? + */ + driInitExtensions( NULL, card_extensions, GL_FALSE ); + //intelInitExtensions(NULL, GL_TRUE); + + if (!intelInitDriver(psp)) + return NULL; + + psp->extensions = intelScreenExtensions; + + return (const __DRIconfig **) + intelFillInModes(psp, dri_priv->cpp * 8, + (dri_priv->cpp == 2) ? 16 : 24, + (dri_priv->cpp == 2) ? 0 : 8, 1); +} + +/** + * This is the driver specific part of the createNewScreen entry point. + * + * \return the __GLcontextModes supported by this driver + */ +static const +__DRIconfig **intelInitScreen2(__DRIscreenPrivate *psp) +{ + struct intel_screen *intelScreen; + + /* Calling driInitExtensions here, with a NULL context pointer, + * does not actually enable the extensions. It just makes sure + * that all the dispatch offsets for all the extensions that + * *might* be enables are known. This is needed because the + * dispatch offsets need to be known when _mesa_context_create is + * called, but we can't enable the extensions until we have a + * context pointer. + * + * Hello chicken. Hello egg. How are you two today? + */ + //intelInitExtensions(NULL, GL_TRUE); + + /* Allocate the private area */ + intelScreen = CALLOC_STRUCT(intel_screen); + if (!intelScreen) { + fprintf(stderr, "\nERROR! Allocating private area failed\n"); + return GL_FALSE; + } + /* parse information in __driConfigOptions */ + driParseOptionInfo(&intelScreen->optionCache, + __driConfigOptions, __driNConfigOptions); + + psp->private = (void *) intelScreen; + + intelScreen->drmMinor = psp->drm_version.minor; + + /* Determine chipset ID? */ + if (!intel_get_param(psp, I915_PARAM_CHIPSET_ID, + &intelScreen->deviceID)) + return GL_FALSE; + + psp->extensions = intelScreenExtensions; + + intel_be_init_device(&intelScreen->base, psp->fd, intelScreen->deviceID); + intelScreen->base.base.flush_frontbuffer = intel_flush_frontbuffer; + intelScreen->base.base.get_name = intel_get_name; + + return driConcatConfigs(intelFillInModes(psp, 16, 16, 0, 1), + intelFillInModes(psp, 32, 24, 8, 1)); +} + +const struct __DriverAPIRec driDriverAPI = { + .InitScreen = intelInitScreen, + .DestroyScreen = intelDestroyScreen, + .CreateContext = intelCreateContext, + .DestroyContext = intelDestroyContext, + .CreateBuffer = intelCreateBuffer, + .DestroyBuffer = intelDestroyBuffer, + .SwapBuffers = intelSwapBuffers, + .MakeCurrent = intelMakeCurrent, + .UnbindContext = intelUnbindContext, + .GetSwapInfo = intelGetSwapInfo, + .GetDrawableMSC = driDrawableGetMSC32, + .WaitForMSC = driWaitForMSC32, + .CopySubBuffer = intelCopySubBuffer, + + //.InitScreen2 = intelInitScreen2, + //.HandleDrawableConfig = intelHandleDrawableConfig, + //.HandleBufferAttach = intelHandleBufferAttach, +}; diff --git a/src/gallium/winsys/drm/intel/dri/intel_screen.h b/src/gallium/winsys/drm/intel/dri/intel_screen.h new file mode 100644 index 0000000000..0bb43a915c --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/intel_screen.h @@ -0,0 +1,122 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _INTEL_SCREEN_H_ +#define _INTEL_SCREEN_H_ + +#include "dri_util.h" +#include "i830_common.h" +#include "xmlconfig.h" +#include "ws_dri_bufpool.h" + +#include "pipe/p_compiler.h" + +#include "intel_be_device.h" + +struct intel_screen +{ + struct intel_be_device base; + + struct { + drm_handle_t handle; + + /* We create a static dri buffer for the frontbuffer. + */ + struct _DriBufferObject *buffer; + struct pipe_surface *surface; + struct pipe_texture *texture; + + char *map; /* memory map */ + int offset; /* from start of video mem, in bytes */ + int pitch; /* row stride, in bytes */ + int width; + int height; + int size; + int cpp; /* for front and back buffers */ + } front; + + int deviceID; + int drmMinor; + + drmI830Sarea *sarea; + + /** + * Configuration cache with default values for all contexts + */ + driOptionCache optionCache; + + boolean havePools; + + /** + * Temporary(?) context to use for SwapBuffers or other situations in + * which we need a rendering context, but none is currently bound. + */ + struct intel_context *dummyContext; + + /* + * New stuff form the i915tex integration + */ + unsigned batch_id; + + + struct pipe_winsys *winsys; +}; + + + +/** cast wrapper */ +static INLINE struct intel_screen * +intel_screen(__DRIscreenPrivate *sPriv) +{ + return (struct intel_screen *) sPriv->private; +} + + +extern void +intelUpdateScreenRotation(__DRIscreenPrivate * sPriv, drmI830Sarea * sarea); + + +extern void intelDestroyContext(__DRIcontextPrivate * driContextPriv); + +extern boolean intelUnbindContext(__DRIcontextPrivate * driContextPriv); + +extern boolean +intelMakeCurrent(__DRIcontextPrivate * driContextPriv, + __DRIdrawablePrivate * driDrawPriv, + __DRIdrawablePrivate * driReadPriv); + + +extern boolean +intelCreatePools(__DRIscreenPrivate *sPriv); + +extern boolean +intelCreateContext(const __GLcontextModes * visual, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate); + + +#endif diff --git a/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.c b/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.c new file mode 100644 index 0000000000..34ad7eebe1 --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.c @@ -0,0 +1,260 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "intel_screen.h" +#include "intel_context.h" +#include "intel_swapbuffers.h" + +#include "intel_reg.h" + +#include "pipe/p_context.h" +#include "state_tracker/st_public.h" +#include "state_tracker/st_context.h" +#include "state_tracker/st_cb_fbo.h" + +#include "ws_dri_bufmgr.h" +#include "intel_batchbuffer.h" + +/** + * Display a colorbuffer surface in an X window. + * Used for SwapBuffers and flushing front buffer rendering. + * + * \param dPriv the window/drawable to display into + * \param surf the surface to display + * \param rect optional subrect of surface to display (may be NULL). + */ +void +intelDisplaySurface(__DRIdrawablePrivate *dPriv, + struct pipe_surface *surf, + const drm_clip_rect_t *rect) +{ + struct intel_screen *intelScreen = intel_screen(dPriv->driScreenPriv); + struct intel_context *intel = intelScreen->dummyContext; + + DBG(SWAP, "%s\n", __FUNCTION__); + + if (!intel) { + /* XXX this is where some kind of extra/meta context could be useful */ + return; + } + + if (intel->last_swap_fence) { + driFenceFinish(intel->last_swap_fence, DRM_FENCE_TYPE_EXE, TRUE); + driFenceUnReference(&intel->last_swap_fence); + intel->last_swap_fence = NULL; + } + intel->last_swap_fence = intel->first_swap_fence; + intel->first_swap_fence = NULL; + + /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets + * should work regardless. + */ + LOCK_HARDWARE(intel); + /* if this drawable isn't currently bound the LOCK_HARDWARE done on the + * current context (which is what intelScreenContext should return) might + * not get a contended lock and thus cliprects not updated (tests/manywin) + */ + if (intel_context(dPriv->driContextPriv) != intel) + DRI_VALIDATE_DRAWABLE_INFO(intel->driScreen, dPriv); + + + if (dPriv && dPriv->numClipRects) { + const int srcWidth = surf->width; + const int srcHeight = surf->height; + const int nbox = dPriv->numClipRects; + const drm_clip_rect_t *pbox = dPriv->pClipRects; + const int pitch = intelScreen->front.pitch / intelScreen->front.cpp; + const int cpp = intelScreen->front.cpp; + const int srcpitch = surf->stride / cpp; + int BR13, CMD; + int i; + + ASSERT(surf->buffer); + + DBG(SWAP, "screen pitch %d src surface pitch %d\n", + pitch, surf->stride); + + if (cpp == 2) { + BR13 = (pitch * cpp) | (0xCC << 16) | (1 << 24); + CMD = XY_SRC_COPY_BLT_CMD; + } + else { + BR13 = (pitch * cpp) | (0xCC << 16) | (1 << 24) | (1 << 25); + CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB); + } + + for (i = 0; i < nbox; i++, pbox++) { + drm_clip_rect_t box; + drm_clip_rect_t sbox; + + if (pbox->x1 > pbox->x2 || + pbox->y1 > pbox->y2 || + pbox->x2 > intelScreen->front.width || + pbox->y2 > intelScreen->front.height) { + /* invalid cliprect, skip it */ + continue; + } + + box = *pbox; + + if (rect) { + /* intersect cliprect with user-provided src rect */ + drm_clip_rect_t rrect; + + rrect.x1 = dPriv->x + rect->x1; + rrect.y1 = (dPriv->h - rect->y1 - rect->y2) + dPriv->y; + rrect.x2 = rect->x2 + rrect.x1; + rrect.y2 = rect->y2 + rrect.y1; + if (rrect.x1 > box.x1) + box.x1 = rrect.x1; + if (rrect.y1 > box.y1) + box.y1 = rrect.y1; + if (rrect.x2 < box.x2) + box.x2 = rrect.x2; + if (rrect.y2 < box.y2) + box.y2 = rrect.y2; + + if (box.x1 > box.x2 || box.y1 > box.y2) + continue; + } + + /* restrict blit to size of actually rendered area */ + if (box.x2 - box.x1 > srcWidth) + box.x2 = srcWidth + box.x1; + if (box.y2 - box.y1 > srcHeight) + box.y2 = srcHeight + box.y1; + + DBG(SWAP, "box x1 x2 y1 y2 %d %d %d %d\n", + box.x1, box.x2, box.y1, box.y2); + + sbox.x1 = box.x1 - dPriv->x; + sbox.y1 = box.y1 - dPriv->y; + + assert(box.x1 < box.x2); + assert(box.y1 < box.y2); + + /* XXX this could be done with pipe->surface_copy() */ + /* XXX should have its own batch buffer */ + if (!BEGIN_BATCH(8, 2)) { + /* + * Since we share this batch buffer with a context + * we can't flush it since that risks a GPU lockup + */ + assert(0); + continue; + } + + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((box.y1 << 16) | box.x1); + OUT_BATCH((box.y2 << 16) | box.x2); + + OUT_RELOC(intelScreen->front.buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE, 0); + OUT_BATCH((sbox.y1 << 16) | sbox.x1); + OUT_BATCH((srcpitch * cpp) & 0xffff); + OUT_RELOC(dri_bo(surf->buffer), + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_BO_MASK_MEM | DRM_BO_FLAG_READ, 0); + + } + + if (intel->first_swap_fence) + driFenceUnReference(&intel->first_swap_fence); + intel->first_swap_fence = intel_be_batchbuffer_flush(intel->base.batch); + } + + UNLOCK_HARDWARE(intel); + + if (intel->lastStamp != dPriv->lastStamp) { + intelUpdateWindowSize(dPriv); + intel->lastStamp = dPriv->lastStamp; + } +} + + + +/** + * This will be called whenever the currently bound window is moved/resized. + */ +void +intelUpdateWindowSize(__DRIdrawablePrivate *dPriv) +{ + struct intel_framebuffer *intelfb = intel_framebuffer(dPriv); + assert(intelfb->stfb); + st_resize_framebuffer(intelfb->stfb, dPriv->w, dPriv->h); +} + + + +void +intelSwapBuffers(__DRIdrawablePrivate * dPriv) +{ + struct intel_framebuffer *intel_fb = intel_framebuffer(dPriv); + struct pipe_surface *back_surf; + + assert(intel_fb); + assert(intel_fb->stfb); + + back_surf = st_get_framebuffer_surface(intel_fb->stfb, + ST_SURFACE_BACK_LEFT); + if (back_surf) { + st_notify_swapbuffers(intel_fb->stfb); + intelDisplaySurface(dPriv, back_surf, NULL); + st_notify_swapbuffers_complete(intel_fb->stfb); + } +} + + +/** + * Called via glXCopySubBufferMESA() to copy a subrect of the back + * buffer to the front buffer/screen. + */ +void +intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h) +{ + struct intel_framebuffer *intel_fb = intel_framebuffer(dPriv); + struct pipe_surface *back_surf; + + assert(intel_fb); + assert(intel_fb->stfb); + + back_surf = st_get_framebuffer_surface(intel_fb->stfb, + ST_SURFACE_BACK_LEFT); + if (back_surf) { + drm_clip_rect_t rect; + rect.x1 = x; + rect.y1 = y; + rect.x2 = w; + rect.y2 = h; + + st_notify_swapbuffers(intel_fb->stfb); + intelDisplaySurface(dPriv, back_surf, &rect); + } +} diff --git a/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.h b/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.h new file mode 100644 index 0000000000..46c9bab3af --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_SWAPBUFFERS_H +#define INTEL_SWAPBUFFERS_H + + +struct pipe_surface; + + +extern void intelDisplaySurface(__DRIdrawablePrivate * dPriv, + struct pipe_surface *surf, + const drm_clip_rect_t * rect); + +extern void intelSwapBuffers(__DRIdrawablePrivate * dPriv); + +extern void intelCopySubBuffer(__DRIdrawablePrivate * dPriv, + int x, int y, int w, int h); + +extern void intelUpdateWindowSize(__DRIdrawablePrivate *dPriv); + + +#endif /* INTEL_SWAPBUFFERS_H */ diff --git a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c b/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c new file mode 100644 index 0000000000..20920a2052 --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c @@ -0,0 +1,82 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> + */ + +#include "intel_context.h" +#include "intel_winsys_softpipe.h" +#include "pipe/p_defines.h" +#include "pipe/p_format.h" +#include "util/u_memory.h" +#include "softpipe/sp_winsys.h" + + +struct intel_softpipe_winsys { + struct softpipe_winsys sws; + struct intel_context *intel; +}; + +/** + * Return list of surface formats supported by this driver. + */ +static boolean +intel_is_format_supported(struct softpipe_winsys *sws, + enum pipe_format format) +{ + switch(format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + return TRUE; + default: + return FALSE; + } +} + + +/** + * Create rendering context which uses software rendering. + */ +struct pipe_context * +intel_create_softpipe( struct intel_context *intel, + struct pipe_winsys *winsys ) +{ + struct intel_softpipe_winsys *isws = CALLOC_STRUCT( intel_softpipe_winsys ); + struct pipe_screen *screen = softpipe_create_screen(winsys); + + /* Fill in this struct with callbacks that softpipe will need to + * communicate with the window system, buffer manager, etc. + */ + isws->sws.is_format_supported = intel_is_format_supported; + isws->intel = intel; + + /* Create the softpipe context: + */ + return softpipe_create( screen, winsys, &isws->sws ); +} diff --git a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.h b/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.h new file mode 100644 index 0000000000..5fa14cb749 --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_SOFTPIPE_H +#define INTEL_SOFTPIPE_H + +struct pipe_winsys; +struct pipe_context; +struct intel_context; + +struct pipe_context * +intel_create_softpipe( struct intel_context *intel, + struct pipe_winsys *winsys ); + +#endif diff --git a/src/gallium/winsys/drm/intel/dri/server/i830_common.h b/src/gallium/winsys/drm/intel/dri/server/i830_common.h new file mode 100644 index 0000000000..3452ddb3c9 --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/server/i830_common.h @@ -0,0 +1,255 @@ +/************************************************************************** + +Copyright 2001 VA Linux Systems Inc., Fremont, California. +Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + + +#ifndef _I830_COMMON_H_ +#define _I830_COMMON_H_ + + +#define I830_NR_TEX_REGIONS 255 /* maximum due to use of chars for next/prev */ +#define I830_LOG_MIN_TEX_REGION_SIZE 14 + + +/* Driver specific DRM command indices + * NOTE: these are not OS specific, but they are driver specific + */ +#define DRM_I830_INIT 0x00 +#define DRM_I830_FLUSH 0x01 +#define DRM_I830_FLIP 0x02 +#define DRM_I830_BATCHBUFFER 0x03 +#define DRM_I830_IRQ_EMIT 0x04 +#define DRM_I830_IRQ_WAIT 0x05 +#define DRM_I830_GETPARAM 0x06 +#define DRM_I830_SETPARAM 0x07 +#define DRM_I830_ALLOC 0x08 +#define DRM_I830_FREE 0x09 +#define DRM_I830_INIT_HEAP 0x0a +#define DRM_I830_CMDBUFFER 0x0b +#define DRM_I830_DESTROY_HEAP 0x0c +#define DRM_I830_SET_VBLANK_PIPE 0x0d +#define DRM_I830_GET_VBLANK_PIPE 0x0e +#define DRM_I830_MMIO 0x10 + +typedef struct { + enum { + I830_INIT_DMA = 0x01, + I830_CLEANUP_DMA = 0x02, + I830_RESUME_DMA = 0x03 + } func; + unsigned int mmio_offset; + int sarea_priv_offset; + unsigned int ring_start; + unsigned int ring_end; + unsigned int ring_size; + unsigned int front_offset; + unsigned int back_offset; + unsigned int depth_offset; + unsigned int w; + unsigned int h; + unsigned int pitch; + unsigned int pitch_bits; + unsigned int back_pitch; + unsigned int depth_pitch; + unsigned int cpp; + unsigned int chipset; +} drmI830Init; + +typedef struct { + drmTextureRegion texList[I830_NR_TEX_REGIONS+1]; + int last_upload; /* last time texture was uploaded */ + int last_enqueue; /* last time a buffer was enqueued */ + int last_dispatch; /* age of the most recently dispatched buffer */ + int ctxOwner; /* last context to upload state */ + /** Last context that used the buffer manager. */ + int texAge; + int pf_enabled; /* is pageflipping allowed? */ + int pf_active; + int pf_current_page; /* which buffer is being displayed? */ + int perf_boxes; /* performance boxes to be displayed */ + int width, height; /* screen size in pixels */ + + drm_handle_t front_handle; + int front_offset; + int front_size; + + drm_handle_t back_handle; + int back_offset; + int back_size; + + drm_handle_t depth_handle; + int depth_offset; + int depth_size; + + drm_handle_t tex_handle; + int tex_offset; + int tex_size; + int log_tex_granularity; + int pitch; + int rotation; /* 0, 90, 180 or 270 */ + int rotated_offset; + int rotated_size; + int rotated_pitch; + int virtualX, virtualY; + + unsigned int front_tiled; + unsigned int back_tiled; + unsigned int depth_tiled; + unsigned int rotated_tiled; + unsigned int rotated2_tiled; + + int planeA_x; + int planeA_y; + int planeA_w; + int planeA_h; + int planeB_x; + int planeB_y; + int planeB_w; + int planeB_h; + + /* Triple buffering */ + drm_handle_t third_handle; + int third_offset; + int third_size; + unsigned int third_tiled; + + /* buffer object handles for the static buffers. May change + * over the lifetime of the client, though it doesn't in our current + * implementation. + */ + unsigned int front_bo_handle; + unsigned int back_bo_handle; + unsigned int third_bo_handle; + unsigned int depth_bo_handle; +} drmI830Sarea; + +/* Flags for perf_boxes + */ +#define I830_BOX_RING_EMPTY 0x1 /* populated by kernel */ +#define I830_BOX_FLIP 0x2 /* populated by kernel */ +#define I830_BOX_WAIT 0x4 /* populated by kernel & client */ +#define I830_BOX_TEXTURE_LOAD 0x8 /* populated by kernel */ +#define I830_BOX_LOST_CONTEXT 0x10 /* populated by client */ + + +typedef struct { + int start; /* agp offset */ + int used; /* nr bytes in use */ + int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ + int DR4; /* window origin for GFX_OP_DRAWRECT_INFO*/ + int num_cliprects; /* mulitpass with multiple cliprects? */ + drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */ +} drmI830BatchBuffer; + +typedef struct { + char *buf; /* agp offset */ + int sz; /* nr bytes in use */ + int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ + int DR4; /* window origin for GFX_OP_DRAWRECT_INFO*/ + int num_cliprects; /* mulitpass with multiple cliprects? */ + drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */ +} drmI830CmdBuffer; + +typedef struct { + int *irq_seq; +} drmI830IrqEmit; + +typedef struct { + int irq_seq; +} drmI830IrqWait; + +typedef struct { + int param; + int *value; +} drmI830GetParam; + +#define I830_PARAM_IRQ_ACTIVE 1 +#define I830_PARAM_ALLOW_BATCHBUFFER 2 + +typedef struct { + int param; + int value; +} drmI830SetParam; + +#define I830_SETPARAM_USE_MI_BATCHBUFFER_START 1 +#define I830_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 +#define I830_SETPARAM_ALLOW_BATCHBUFFER 3 + + +/* A memory manager for regions of shared memory: + */ +#define I830_MEM_REGION_AGP 1 + +typedef struct { + int region; + int alignment; + int size; + int *region_offset; /* offset from start of fb or agp */ +} drmI830MemAlloc; + +typedef struct { + int region; + int region_offset; +} drmI830MemFree; + +typedef struct { + int region; + int size; + int start; +} drmI830MemInitHeap; + +typedef struct { + int region; +} drmI830MemDestroyHeap; + +#define DRM_I830_VBLANK_PIPE_A 1 +#define DRM_I830_VBLANK_PIPE_B 2 + +typedef struct { + int pipe; +} drmI830VBlankPipe; + +#define MMIO_READ 0 +#define MMIO_WRITE 1 + +#define MMIO_REGS_IA_PRIMATIVES_COUNT 0 +#define MMIO_REGS_IA_VERTICES_COUNT 1 +#define MMIO_REGS_VS_INVOCATION_COUNT 2 +#define MMIO_REGS_GS_PRIMITIVES_COUNT 3 +#define MMIO_REGS_GS_INVOCATION_COUNT 4 +#define MMIO_REGS_CL_PRIMITIVES_COUNT 5 +#define MMIO_REGS_CL_INVOCATION_COUNT 6 +#define MMIO_REGS_PS_INVOCATION_COUNT 7 +#define MMIO_REGS_PS_DEPTH_COUNT 8 + +typedef struct { + unsigned int read_write:1; + unsigned int reg:31; + void __user *data; +} drmI830MMIO; + +#endif /* _I830_DRM_H_ */ diff --git a/src/gallium/winsys/drm/intel/dri/server/i830_dri.h b/src/gallium/winsys/drm/intel/dri/server/i830_dri.h new file mode 100644 index 0000000000..0d514b6c38 --- /dev/null +++ b/src/gallium/winsys/drm/intel/dri/server/i830_dri.h @@ -0,0 +1,62 @@ + +#ifndef _I830_DRI_H +#define _I830_DRI_H + +#include "xf86drm.h" +#include "i830_common.h" + +#define I830_MAX_DRAWABLES 256 + +#define I830_MAJOR_VERSION 1 +#define I830_MINOR_VERSION 7 +#define I830_PATCHLEVEL 2 + +#define I830_REG_SIZE 0x80000 + +typedef struct _I830DRIRec { + drm_handle_t regs; + drmSize regsSize; + + drmSize unused1; /* backbufferSize */ + drm_handle_t unused2; /* backbuffer */ + + drmSize unused3; /* depthbufferSize */ + drm_handle_t unused4; /* depthbuffer */ + + drmSize unused5; /* rotatedSize */ + drm_handle_t unused6; /* rotatedbuffer */ + + drm_handle_t unused7; /* textures */ + int unused8; /* textureSize */ + + drm_handle_t unused9; /* agp_buffers */ + drmSize unused10; /* agp_buf_size */ + + int deviceID; + int width; + int height; + int mem; + int cpp; + int bitsPerPixel; + + int unused11[8]; /* was front/back/depth/rotated offset/pitch */ + + int unused12; /* logTextureGranularity */ + int unused13; /* textureOffset */ + + int irq; + int sarea_priv_offset; +} I830DRIRec, *I830DRIPtr; + +typedef struct { + /* Nothing here yet */ + int dummy; +} I830ConfigPrivRec, *I830ConfigPrivPtr; + +typedef struct { + /* Nothing here yet */ + int dummy; +} I830DRIContextRec, *I830DRIContextPtr; + + +#endif diff --git a/src/gallium/winsys/drm/intel/egl/Makefile b/src/gallium/winsys/drm/intel/egl/Makefile new file mode 100644 index 0000000000..f0b5a44389 --- /dev/null +++ b/src/gallium/winsys/drm/intel/egl/Makefile @@ -0,0 +1,28 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = EGL_i915.so + +PIPE_DRIVERS = \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/i915simple/libi915simple.a \ + ../common/libinteldrm.a + +DRIVER_SOURCES = \ + intel_swapbuffers.c \ + intel_context.c \ + intel_device.c \ + intel_egl.c + +C_SOURCES = \ + $(COMMON_GALLIUM_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +DRIVER_DEFINES = -I../common $(shell pkg-config libdrm --atleast-version=2.3.1 \ + && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") + +include ../../Makefile.template + +symlinks: diff --git a/src/gallium/winsys/drm/intel/egl/SConscript b/src/gallium/winsys/drm/intel/egl/SConscript new file mode 100644 index 0000000000..0ad19d42a8 --- /dev/null +++ b/src/gallium/winsys/drm/intel/egl/SConscript @@ -0,0 +1,39 @@ +Import('*') + +env = drienv.Clone() + +env.Append(CPPPATH = [ + '../intel', + 'server' +]) + +#MINIGLX_SOURCES = server/intel_dri.c + +DRIVER_SOURCES = [ + 'intel_winsys_pipe.c', + 'intel_winsys_softpipe.c', + 'intel_winsys_i915.c', + 'intel_batchbuffer.c', + 'intel_swapbuffers.c', + 'intel_context.c', + 'intel_lock.c', + 'intel_screen.c', + 'intel_batchpool.c', +] + +sources = \ + COMMON_GALLIUM_SOURCES + \ + COMMON_BM_SOURCES + \ + DRIVER_SOURCES + +drivers = [ + softpipe, + i915simple +] + +# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions +env.SharedLibrary( + target ='i915tex_dri.so', + source = sources, + LIBS = drivers + mesa + auxiliaries + env['LIBS'], +)
\ No newline at end of file diff --git a/src/gallium/winsys/drm/intel/egl/intel_batchbuffer.h b/src/gallium/winsys/drm/intel/egl/intel_batchbuffer.h new file mode 100644 index 0000000000..3e95326168 --- /dev/null +++ b/src/gallium/winsys/drm/intel/egl/intel_batchbuffer.h @@ -0,0 +1,24 @@ +#ifndef INTEL_BATCHBUFFER_H +#define INTEL_BATCHBUFFER_H + +#include "intel_be_batchbuffer.h" + +/* + * Need to redefine the BATCH defines + */ + +#undef BEGIN_BATCH +#define BEGIN_BATCH(dwords, relocs) \ + (i915_batchbuffer_check(&intel->base.batch->base, dwords, relocs)) + +#undef OUT_BATCH +#define OUT_BATCH(d) \ + i915_batchbuffer_dword(&intel->base.batch->base, d) + +#undef OUT_RELOC +#define OUT_RELOC(buf,flags,mask,delta) do { \ + assert((delta) >= 0); \ + intel_be_offset_relocation(intel->base.batch, delta, buf, flags, mask); \ +} while (0) + +#endif diff --git a/src/gallium/winsys/drm/intel/egl/intel_context.c b/src/gallium/winsys/drm/intel/egl/intel_context.c new file mode 100644 index 0000000000..927addb834 --- /dev/null +++ b/src/gallium/winsys/drm/intel/egl/intel_context.c @@ -0,0 +1,242 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "i915simple/i915_screen.h" + +#include "intel_device.h" +#include "intel_context.h" +#include "intel_batchbuffer.h" + +#include "state_tracker/st_public.h" +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "intel_egl.h" +#include "utils.h" + +#ifdef DEBUG +int __intel_debug = 0; +#endif + + +#define need_GL_ARB_multisample +#define need_GL_ARB_point_parameters +#define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object +#define need_GL_ARB_vertex_program +#define need_GL_ARB_window_pos +#define need_GL_EXT_blend_color +#define need_GL_EXT_blend_equation_separate +#define need_GL_EXT_blend_func_separate +#define need_GL_EXT_blend_minmax +#define need_GL_EXT_cull_vertex +#define need_GL_EXT_fog_coord +#define need_GL_EXT_framebuffer_object +#define need_GL_EXT_multi_draw_arrays +#define need_GL_EXT_secondary_color +#define need_GL_NV_vertex_program +#include "extension_helper.h" + + +/** + * Extension strings exported by the intel driver. + * + * \note + * It appears that ARB_texture_env_crossbar has "disappeared" compared to the + * old i830-specific driver. + */ +const struct dri_extension card_extensions[] = { + {"GL_ARB_multisample", GL_ARB_multisample_functions}, + {"GL_ARB_multitexture", NULL}, + {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, + {"GL_ARB_texture_border_clamp", NULL}, + {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, + {"GL_ARB_texture_cube_map", NULL}, + {"GL_ARB_texture_env_add", NULL}, + {"GL_ARB_texture_env_combine", NULL}, + {"GL_ARB_texture_env_dot3", NULL}, + {"GL_ARB_texture_mirrored_repeat", NULL}, + {"GL_ARB_texture_rectangle", NULL}, + {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions}, + {"GL_ARB_pixel_buffer_object", NULL}, + {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, + {"GL_ARB_window_pos", GL_ARB_window_pos_functions}, + {"GL_EXT_blend_color", GL_EXT_blend_color_functions}, + {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, + {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, + {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, + {"GL_EXT_blend_subtract", NULL}, + {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions}, + {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions}, + {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions}, + {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions}, + {"GL_EXT_packed_depth_stencil", NULL}, + {"GL_EXT_pixel_buffer_object", NULL}, + {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, + {"GL_EXT_stencil_wrap", NULL}, + {"GL_EXT_texture_edge_clamp", NULL}, + {"GL_EXT_texture_env_combine", NULL}, + {"GL_EXT_texture_env_dot3", NULL}, + {"GL_EXT_texture_filter_anisotropic", NULL}, + {"GL_EXT_texture_lod_bias", NULL}, + {"GL_3DFX_texture_compression_FXT1", NULL}, + {"GL_APPLE_client_storage", NULL}, + {"GL_MESA_pack_invert", NULL}, + {"GL_MESA_ycbcr_texture", NULL}, + {"GL_NV_blend_square", NULL}, + {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, + {"GL_NV_vertex_program1_1", NULL}, + {"GL_SGIS_generate_mipmap", NULL }, + {NULL, NULL} +}; + + +/* + * Hardware lock functions. + * Doesn't do anything in EGL + */ + +static void +intel_lock_hardware(struct intel_be_context *context) +{ + (void)context; +} + +static void +intel_unlock_hardware(struct intel_be_context *context) +{ + (void)context; +} + +static boolean +intel_locked_hardware(struct intel_be_context *context) +{ + (void)context; + return FALSE; +} + + +/* + * Misc functions. + */ + +int +intel_create_context(struct egl_drm_context *egl_context, const __GLcontextModes *visual, void *sharedContextPrivate) +{ + struct intel_context *intel = CALLOC_STRUCT(intel_context); + struct intel_device *device = (struct intel_device *)egl_context->device->priv; + struct pipe_context *pipe; + struct st_context *st_share = NULL; + + egl_context->priv = intel; + + intel->intel_device = device; + intel->egl_context = egl_context; + intel->egl_device = egl_context->device; + + intel->base.hardware_lock = intel_lock_hardware; + intel->base.hardware_unlock = intel_unlock_hardware; + intel->base.hardware_locked = intel_locked_hardware; + + intel_be_init_context(&intel->base, &device->base); + +#if 0 + pipe = intel_create_softpipe(intel, screen->winsys); +#else + pipe = i915_create_context(device->pipe, &device->base.base, &intel->base.base); +#endif + + pipe->priv = intel; + + intel->st = st_create_context(pipe, visual, st_share); + + device->dummy = intel; + + return TRUE; +} + +int +intel_destroy_context(struct egl_drm_context *egl_context) +{ + struct intel_context *intel = egl_context->priv; + + if (intel->intel_device->dummy == intel) + intel->intel_device->dummy = NULL; + + st_destroy_context(intel->st); + intel_be_destroy_context(&intel->base); + free(intel); + return TRUE; +} + +void +intel_make_current(struct egl_drm_context *context, struct egl_drm_drawable *draw, struct egl_drm_drawable *read) +{ + if (context) { + struct intel_context *intel = (struct intel_context *)context->priv; + struct intel_framebuffer *draw_fb = (struct intel_framebuffer *)draw->priv; + struct intel_framebuffer *read_fb = (struct intel_framebuffer *)read->priv; + + assert(draw_fb->stfb); + assert(read_fb->stfb); + + st_make_current(intel->st, draw_fb->stfb, read_fb->stfb); + + intel->egl_drawable = draw; + + st_resize_framebuffer(draw_fb->stfb, draw->w, draw->h); + + if (draw != read) + st_resize_framebuffer(read_fb->stfb, read->w, read->h); + + } else { + st_make_current(NULL, NULL, NULL); + } +} + +void +intel_bind_frontbuffer(struct egl_drm_drawable *draw, struct egl_drm_frontbuffer *front) +{ + struct intel_device *device = (struct intel_device *)draw->device->priv; + struct intel_framebuffer *draw_fb = (struct intel_framebuffer *)draw->priv; + + if (draw_fb->front_buffer) + driBOUnReference(draw_fb->front_buffer); + + draw_fb->front_buffer = NULL; + draw_fb->front = NULL; + + /* to unbind just call this function with front == NULL */ + if (!front) + return; + + draw_fb->front = front; + + driGenBuffers(device->base.staticPool, "front", 1, &draw_fb->front_buffer, 0, 0, 0); + driBOSetReferenced(draw_fb->front_buffer, front->handle); + + st_resize_framebuffer(draw_fb->stfb, draw->w, draw->h); +} diff --git a/src/gallium/winsys/drm/intel/egl/intel_context.h b/src/gallium/winsys/drm/intel/egl/intel_context.h new file mode 100644 index 0000000000..477fdec7f7 --- /dev/null +++ b/src/gallium/winsys/drm/intel/egl/intel_context.h @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INTEL_CONTEXT_H +#define INTEL_CONTEXT_H + +#include "pipe/p_debug.h" +#include "intel_be_context.h" + + +struct st_context; +struct egl_drm_device; +struct egl_drm_context; +struct egl_drm_frontbuffer; + + +/** + * Intel rendering context, contains a state tracker and intel-specific info. + */ +struct intel_context +{ + struct intel_be_context base; + + struct st_context *st; + + struct intel_device *intel_device; + + /* new egl stuff */ + struct egl_drm_device *egl_device; + struct egl_drm_context *egl_context; + struct egl_drm_drawable *egl_drawable; +}; + + + +/** + * Intel framebuffer. + */ +struct intel_framebuffer +{ + struct st_framebuffer *stfb; + + struct intel_device *device; + struct _DriBufferObject *front_buffer; + struct egl_drm_frontbuffer *front; +}; + + + + +/* These are functions now: + */ +void LOCK_HARDWARE( struct intel_context *intel ); +void UNLOCK_HARDWARE( struct intel_context *intel ); + +extern char *__progname; + + + +/* ================================================================ + * Debugging: + */ +#ifdef DEBUG +extern int __intel_debug; + +#define DEBUG_SWAP 0x1 +#define DEBUG_LOCK 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_BATCH 0x8 + +#define DBG(flag, ...) do { \ + if (__intel_debug & (DEBUG_##flag)) \ + printf(__VA_ARGS__); \ +} while(0) + +#else +#define DBG(flag, ...) +#endif + + +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_Q33_G 0x29D2 + +#endif diff --git a/src/gallium/winsys/drm/intel/egl/intel_device.c b/src/gallium/winsys/drm/intel/egl/intel_device.c new file mode 100644 index 0000000000..1964745c99 --- /dev/null +++ b/src/gallium/winsys/drm/intel/egl/intel_device.c @@ -0,0 +1,137 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "utils.h" + +#include "state_tracker/st_public.h" +#include "i915simple/i915_screen.h" + +#include "intel_context.h" +#include "intel_device.h" +#include "intel_batchbuffer.h" +#include "intel_egl.h" + + +extern const struct dri_extension card_extensions[]; + + +int +intel_create_device(struct egl_drm_device *device) +{ + struct intel_device *intel_device; + + /* Allocate the private area */ + intel_device = CALLOC_STRUCT(intel_device); + if (!intel_device) + return FALSE; + + device->priv = (void *)intel_device; + intel_device->device = device; + + intel_device->deviceID = device->deviceID; + + intel_be_init_device(&intel_device->base, device->drmFD, intel_device->deviceID); + + intel_device->pipe = i915_create_screen(&intel_device->base.base, intel_device->deviceID); + + /* hack */ + driInitExtensions(NULL, card_extensions, GL_FALSE); + + return TRUE; +} + +int +intel_destroy_device(struct egl_drm_device *device) +{ + struct intel_device *intel_device = (struct intel_device *)device->priv; + + intel_be_destroy_device(&intel_device->base); + + free(intel_device); + device->priv = NULL; + + return TRUE; +} + +int +intel_create_drawable(struct egl_drm_drawable *drawable, + const __GLcontextModes * visual) +{ + enum pipe_format colorFormat, depthFormat, stencilFormat; + struct intel_framebuffer *intelfb = CALLOC_STRUCT(intel_framebuffer); + + if (!intelfb) + return GL_FALSE; + + intelfb->device = drawable->device->priv; + + if (visual->redBits == 5) + colorFormat = PIPE_FORMAT_R5G6B5_UNORM; + else + colorFormat = PIPE_FORMAT_A8R8G8B8_UNORM; + + if (visual->depthBits == 16) + depthFormat = PIPE_FORMAT_Z16_UNORM; + else if (visual->depthBits == 24) + depthFormat = PIPE_FORMAT_S8Z24_UNORM; + else + depthFormat = PIPE_FORMAT_NONE; + + if (visual->stencilBits == 8) + stencilFormat = PIPE_FORMAT_S8Z24_UNORM; + else + stencilFormat = PIPE_FORMAT_NONE; + + intelfb->stfb = st_create_framebuffer(visual, + colorFormat, + depthFormat, + stencilFormat, + drawable->w, + drawable->h, + (void*) intelfb); + + if (!intelfb->stfb) { + free(intelfb); + return GL_FALSE; + } + + drawable->priv = (void *) intelfb; + return GL_TRUE; +} + +int +intel_destroy_drawable(struct egl_drm_drawable *drawable) +{ + struct intel_framebuffer *intelfb = (struct intel_framebuffer *)drawable->priv; + drawable->priv = NULL; + + assert(intelfb->stfb); + st_unreference_framebuffer(intelfb->stfb); + free(intelfb); + return TRUE; +} diff --git a/src/gallium/winsys/drm/intel/egl/intel_device.h b/src/gallium/winsys/drm/intel/egl/intel_device.h new file mode 100644 index 0000000000..323a7c2aef --- /dev/null +++ b/src/gallium/winsys/drm/intel/egl/intel_device.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _INTEL_SCREEN_H_ +#define _INTEL_SCREEN_H_ + +#include "intel_be_device.h" + +#include "pipe/p_compiler.h" + +struct pipe_screen; +struct egl_drm_device; +struct intel_context; + +struct intel_device +{ + struct intel_be_device base; + struct pipe_screen *pipe; + + int deviceID; + struct egl_drm_device *device; + + struct intel_context *dummy; +}; + +#endif diff --git a/src/gallium/winsys/drm/intel/egl/intel_egl.c b/src/gallium/winsys/drm/intel/egl/intel_egl.c new file mode 100644 index 0000000000..3204ed3131 --- /dev/null +++ b/src/gallium/winsys/drm/intel/egl/intel_egl.c @@ -0,0 +1,796 @@ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <stdint.h> + +#include "eglconfig.h" +#include "eglcontext.h" +#include "egldisplay.h" +#include "egldriver.h" +#include "eglglobals.h" +#include "eglmode.h" +#include "eglscreen.h" +#include "eglsurface.h" +#include "egllog.h" + +#include "intel_egl.h" + +#include "xf86drm.h" +#include "xf86drmMode.h" + +#include "intel_context.h" + +#include "state_tracker/st_public.h" + +#define MAX_SCREENS 16 + +static void +drm_get_device_id(struct egl_drm_device *device) +{ + char path[512]; + FILE *file; + + /* TODO get the real minor */ + int minor = 0; + + snprintf(path, sizeof(path), "/sys/class/drm/card%d/device/device", minor); + file = fopen(path, "r"); + if (!file) { + _eglLog(_EGL_WARNING, "Could not retrive device ID\n"); + return; + } + + fgets(path, sizeof( path ), file); + sscanf(path, "%x", &device->deviceID); + fclose(file); +} + +static struct egl_drm_device* +egl_drm_create_device(int drmFD) +{ + struct egl_drm_device *device = malloc(sizeof(*device)); + memset(device, 0, sizeof(*device)); + device->drmFD = drmFD; + + device->version = drmGetVersion(device->drmFD); + + drm_get_device_id(device); + + if (!intel_create_device(device)) { + free(device); + return NULL; + } + + return device; +} + +static void +_egl_context_modes_destroy(__GLcontextModes *modes) +{ + _eglLog(_EGL_DEBUG, "%s", __FUNCTION__); + + while (modes) { + __GLcontextModes * const next = modes->next; + free(modes); + modes = next; + } +} +/** + * Create a linked list of 'count' GLcontextModes. + * These are used during the client/server visual negotiation phase, + * then discarded. + */ +static __GLcontextModes * +_egl_context_modes_create(unsigned count, size_t minimum_size) +{ + /* This code copied from libGLX, and modified */ + const size_t size = (minimum_size > sizeof(__GLcontextModes)) + ? minimum_size : sizeof(__GLcontextModes); + __GLcontextModes * head = NULL; + __GLcontextModes ** next; + unsigned i; + + _eglLog(_EGL_DEBUG, "%s %d %d", __FUNCTION__, count, minimum_size); + + next = & head; + for (i = 0 ; i < count ; i++) { + *next = (__GLcontextModes *) calloc(1, size); + if (*next == NULL) { + _egl_context_modes_destroy(head); + head = NULL; + break; + } + + (*next)->doubleBufferMode = 1; + (*next)->visualID = GLX_DONT_CARE; + (*next)->visualType = GLX_DONT_CARE; + (*next)->visualRating = GLX_NONE; + (*next)->transparentPixel = GLX_NONE; + (*next)->transparentRed = GLX_DONT_CARE; + (*next)->transparentGreen = GLX_DONT_CARE; + (*next)->transparentBlue = GLX_DONT_CARE; + (*next)->transparentAlpha = GLX_DONT_CARE; + (*next)->transparentIndex = GLX_DONT_CARE; + (*next)->xRenderable = GLX_DONT_CARE; + (*next)->fbconfigID = GLX_DONT_CARE; + (*next)->swapMethod = GLX_SWAP_UNDEFINED_OML; + (*next)->bindToTextureRgb = GLX_DONT_CARE; + (*next)->bindToTextureRgba = GLX_DONT_CARE; + (*next)->bindToMipmapTexture = GLX_DONT_CARE; + (*next)->bindToTextureTargets = 0; + (*next)->yInverted = GLX_DONT_CARE; + + next = & ((*next)->next); + } + + return head; +} + +struct drm_screen; + +struct drm_driver +{ + _EGLDriver base; /* base class/object */ + + drmModeResPtr res; + + struct drm_screen *screens[MAX_SCREENS]; + size_t count_screens; + + struct egl_drm_device *device; +}; + +struct drm_surface +{ + _EGLSurface base; /* base class/object */ + + struct egl_drm_drawable *drawable; +}; + +struct drm_context +{ + _EGLContext base; /* base class/object */ + + struct egl_drm_context *context; +}; + +struct drm_screen +{ + _EGLScreen base; + + /* currently only support one connector */ + drmModeConnectorPtr connector; + + /* Has this screen been shown */ + int shown; + + /* Surface that is currently attached to this screen */ + struct drm_surface *surf; + + /* backing buffer */ + drmBO buffer; + + /* framebuffer */ + drmModeFBPtr fb; + uint32_t fbID; + + /* crtc and mode used */ + drmModeCrtcPtr crtc; + uint32_t crtcID; + + struct drm_mode_modeinfo *mode; + + /* geometry of the screen */ + struct egl_drm_frontbuffer front; +}; + +static void +drm_update_res(struct drm_driver *drm_drv) +{ + drmModeFreeResources(drm_drv->res); + drm_drv->res = drmModeGetResources(drm_drv->device->drmFD); +} + +static void +drm_add_modes_from_connector(_EGLScreen *screen, drmModeConnectorPtr connector) +{ + struct drm_mode_modeinfo *m; + int i; + + for (i = 0; i < connector->count_modes; i++) { + m = &connector->modes[i]; + _eglAddNewMode(screen, m->hdisplay, m->vdisplay, m->vrefresh, m->name); + } +} + + +static EGLBoolean +drm_initialize(_EGLDriver *drv, EGLDisplay dpy, EGLint *major, EGLint *minor) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + struct drm_driver *drm_drv = (struct drm_driver *)drv; + struct drm_screen *screen = NULL; + drmModeConnectorPtr connector = NULL; + drmModeResPtr res = NULL; + unsigned count_connectors = 0; + int num_screens = 0; + + EGLint i; + int fd; + + fd = drmOpen("i915", NULL); + if (fd < 0) { + return EGL_FALSE; + } + + drm_drv->device = egl_drm_create_device(fd); + if (!drm_drv->device) { + drmClose(fd); + return EGL_FALSE; + } + + drm_update_res(drm_drv); + res = drm_drv->res; + if (res) + count_connectors = res->count_connectors; + + for(i = 0; i < count_connectors && i < MAX_SCREENS; i++) { + connector = drmModeGetConnector(fd, res->connectors[i]); + + if (!connector) + continue; + + if (connector->connection != DRM_MODE_CONNECTED) { + drmModeFreeConnector(connector); + continue; + } + + screen = malloc(sizeof(struct drm_screen)); + memset(screen, 0, sizeof(*screen)); + screen->connector = connector; + _eglInitScreen(&screen->base); + _eglAddScreen(disp, &screen->base); + drm_add_modes_from_connector(&screen->base, connector); + drm_drv->screens[num_screens++] = screen; + } + drm_drv->count_screens = num_screens; + + /* for now we only have one config */ + _EGLConfig *config = calloc(1, sizeof(*config)); + memset(config, 1, sizeof(*config)); + _eglInitConfig(config, 1); + _eglSetConfigAttrib(config, EGL_RED_SIZE, 8); + _eglSetConfigAttrib(config, EGL_GREEN_SIZE, 8); + _eglSetConfigAttrib(config, EGL_BLUE_SIZE, 8); + _eglSetConfigAttrib(config, EGL_ALPHA_SIZE, 8); + _eglSetConfigAttrib(config, EGL_BUFFER_SIZE, 32); + _eglSetConfigAttrib(config, EGL_DEPTH_SIZE, 24); + _eglSetConfigAttrib(config, EGL_STENCIL_SIZE, 8); + _eglSetConfigAttrib(config, EGL_SURFACE_TYPE, EGL_PBUFFER_BIT); + _eglAddConfig(disp, config); + + drv->Initialized = EGL_TRUE; + + *major = 1; + *minor = 4; + + return EGL_TRUE; +} + +static void +drm_takedown_shown_screen(_EGLDriver *drv, struct drm_screen *screen) +{ + struct drm_driver *drm_drv = (struct drm_driver *)drv; + unsigned int i; + + intel_bind_frontbuffer(screen->surf->drawable, NULL); + screen->surf = NULL; + + for (i = 0; i < drm_drv->res->count_crtcs; i++) { + drmModeSetCrtc( + drm_drv->device->drmFD, + drm_drv->res->crtcs[i], + 0, // FD + 0, 0, + NULL, 0, // List of output ids + NULL); + } + + drmModeRmFB(drm_drv->device->drmFD, screen->fbID); + drmModeFreeFB(screen->fb); + screen->fb = NULL; + + drmBOUnreference(drm_drv->device->drmFD, &screen->buffer); + + screen->shown = 0; +} + +static EGLBoolean +drm_terminate(_EGLDriver *drv, EGLDisplay dpy) +{ + struct drm_driver *drm_drv = (struct drm_driver *)drv; + struct drm_screen *screen; + int i = 0; + + intel_destroy_device(drm_drv->device); + drmFreeVersion(drm_drv->device->version); + + for (i = 0; i < drm_drv->count_screens; i++) { + screen = drm_drv->screens[i]; + + if (screen->shown) + drm_takedown_shown_screen(drv, screen); + + drmModeFreeConnector(screen->connector); + _eglDestroyScreen(&screen->base); + drm_drv->screens[i] = NULL; + } + + drmClose(drm_drv->device->drmFD); + + free(drm_drv->device); + + _eglCleanupDisplay(_eglLookupDisplay(dpy)); + free(drm_drv); + + return EGL_TRUE; +} + + +static struct drm_context * +lookup_drm_context(EGLContext context) +{ + _EGLContext *c = _eglLookupContext(context); + return (struct drm_context *) c; +} + + +static struct drm_surface * +lookup_drm_surface(EGLSurface surface) +{ + _EGLSurface *s = _eglLookupSurface(surface); + return (struct drm_surface *) s; +} + +static struct drm_screen * +lookup_drm_screen(EGLDisplay dpy, EGLScreenMESA screen) +{ + _EGLScreen *s = _eglLookupScreen(dpy, screen); + return (struct drm_screen *) s; +} + +static __GLcontextModes* +visual_from_config(_EGLConfig *conf) +{ + __GLcontextModes *visual; + (void)conf; + + visual = _egl_context_modes_create(1, sizeof(*visual)); + visual->redBits = 8; + visual->greenBits = 8; + visual->blueBits = 8; + visual->alphaBits = 8; + + visual->rgbBits = 32; + visual->doubleBufferMode = 1; + + visual->depthBits = 24; + visual->haveDepthBuffer = visual->depthBits > 0; + visual->stencilBits = 8; + visual->haveStencilBuffer = visual->stencilBits > 0; + + return visual; +} + + + +static EGLContext +drm_create_context(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, EGLContext share_list, const EGLint *attrib_list) +{ + struct drm_driver *drm_drv = (struct drm_driver *)drv; + struct drm_context *c; + struct drm_egl_context *share = NULL; + _EGLConfig *conf; + int i; + int ret; + __GLcontextModes *visual; + struct egl_drm_context *context; + + conf = _eglLookupConfig(drv, dpy, config); + if (!conf) { + _eglError(EGL_BAD_CONFIG, "eglCreateContext"); + return EGL_NO_CONTEXT; + } + + for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i++) { + switch (attrib_list[i]) { + /* no attribs defined for now */ + default: + _eglError(EGL_BAD_ATTRIBUTE, "eglCreateContext"); + return EGL_NO_CONTEXT; + } + } + + c = (struct drm_context *) calloc(1, sizeof(struct drm_context)); + if (!c) + return EGL_NO_CONTEXT; + + _eglInitContext(drv, dpy, &c->base, config, attrib_list); + + context = malloc(sizeof(*context)); + memset(context, 0, sizeof(*context)); + + if (!context) + goto err_c; + + context->device = drm_drv->device; + visual = visual_from_config(conf); + + ret = intel_create_context(context, visual, share); + free(visual); + + if (!ret) + goto err_gl; + + c->context = context; + + /* generate handle and insert into hash table */ + _eglSaveContext(&c->base); + assert(_eglGetContextHandle(&c->base)); + + return _eglGetContextHandle(&c->base); +err_gl: + free(context); +err_c: + free(c); + return EGL_NO_CONTEXT; +} + +static EGLBoolean +drm_destroy_context(_EGLDriver *drv, EGLDisplay dpy, EGLContext context) +{ + struct drm_context *fc = lookup_drm_context(context); + _eglRemoveContext(&fc->base); + if (fc->base.IsBound) { + fc->base.DeletePending = EGL_TRUE; + } else { + intel_destroy_context(fc->context); + free(fc->context); + free(fc); + } + return EGL_TRUE; +} + + +static EGLSurface +drm_create_window_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, NativeWindowType window, const EGLint *attrib_list) +{ + return EGL_NO_SURFACE; +} + + +static EGLSurface +drm_create_pixmap_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, NativePixmapType pixmap, const EGLint *attrib_list) +{ + return EGL_NO_SURFACE; +} + + +static EGLSurface +drm_create_pbuffer_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, + const EGLint *attrib_list) +{ + struct drm_driver *drm_drv = (struct drm_driver *)drv; + int i; + int ret; + int width = -1; + int height = -1; + struct drm_surface *surf = NULL; + struct egl_drm_drawable *drawable = NULL; + __GLcontextModes *visual; + _EGLConfig *conf; + + conf = _eglLookupConfig(drv, dpy, config); + if (!conf) { + _eglError(EGL_BAD_CONFIG, "eglCreatePbufferSurface"); + return EGL_NO_CONTEXT; + } + + for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i++) { + switch (attrib_list[i]) { + case EGL_WIDTH: + width = attrib_list[++i]; + break; + case EGL_HEIGHT: + height = attrib_list[++i]; + break; + default: + _eglError(EGL_BAD_ATTRIBUTE, "eglCreatePbufferSurface"); + return EGL_NO_SURFACE; + } + } + + if (width < 1 || height < 1) { + _eglError(EGL_BAD_ATTRIBUTE, "eglCreatePbufferSurface"); + return EGL_NO_SURFACE; + } + + surf = (struct drm_surface *) calloc(1, sizeof(struct drm_surface)); + if (!surf) + goto err; + + if (!_eglInitSurface(drv, dpy, &surf->base, EGL_PBUFFER_BIT, config, attrib_list)) + goto err_surf; + + drawable = malloc(sizeof(*drawable)); + memset(drawable, 0, sizeof(*drawable)); + + drawable->w = width; + drawable->h = height; + + visual = visual_from_config(conf); + + drawable->device = drm_drv->device; + ret = intel_create_drawable(drawable, visual); + free(visual); + + if (!ret) + goto err_draw; + + surf->drawable = drawable; + + _eglSaveSurface(&surf->base); + return surf->base.Handle; + +err_draw: + free(drawable); +err_surf: + free(surf); +err: + return EGL_NO_SURFACE; +} + +static EGLSurface +drm_create_screen_surface_mesa(_EGLDriver *drv, EGLDisplay dpy, EGLConfig cfg, + const EGLint *attrib_list) +{ + EGLSurface surf = drm_create_pbuffer_surface(drv, dpy, cfg, attrib_list); + + return surf; +} + +static struct drm_mode_modeinfo * +drm_find_mode(drmModeConnectorPtr connector, _EGLMode *mode) +{ + int i; + struct drm_mode_modeinfo *m; + + for (i = 0; i < connector->count_modes; i++) { + m = &connector->modes[i]; + if (m->hdisplay == mode->Width && m->vdisplay == mode->Height && m->vrefresh == mode->RefreshRate) + break; + m = &connector->modes[0]; /* if we can't find one, return first */ + } + + return m; +} +static void +draw(size_t x, size_t y, size_t w, size_t h, size_t pitch, size_t v, unsigned int *ptr) +{ + int i, j; + + for (i = x; i < x + w; i++) + for(j = y; j < y + h; j++) + ptr[(i * pitch / 4) + j] = v; + +} + +static void +prettyColors(int fd, unsigned int handle, size_t pitch) +{ + drmBO bo; + unsigned int *ptr; + void *p; + int i; + + drmBOReference(fd, handle, &bo); + drmBOMap(fd, &bo, DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0, &p); + ptr = (unsigned int*)p; + + for (i = 0; i < (bo.size / 4); i++) + ptr[i] = 0xFFFFFFFF; + + for (i = 0; i < 4; i++) + draw(i * 40, i * 40, 40, 40, pitch, 0, ptr); + + + draw(200, 100, 40, 40, pitch, 0xff00ff, ptr); + draw(100, 200, 40, 40, pitch, 0xff00ff, ptr); + + drmBOUnmap(fd, &bo); +} + +static EGLBoolean +drm_show_screen_surface_mesa(_EGLDriver *drv, EGLDisplay dpy, + EGLScreenMESA screen, + EGLSurface surface, EGLModeMESA m) +{ + struct drm_driver *drm_drv = (struct drm_driver *)drv; + struct drm_surface *surf = lookup_drm_surface(surface); + struct drm_screen *scrn = lookup_drm_screen(dpy, screen); + _EGLMode *mode = _eglLookupMode(dpy, m); + size_t pitch = mode->Width * 4; + size_t size = mode->Height * pitch; + int ret; + unsigned int i,j,k; + + if (scrn->shown) + drm_takedown_shown_screen(drv, scrn); + + ret = drmBOCreate(drm_drv->device->drmFD, size, 0, 0, + DRM_BO_FLAG_READ | + DRM_BO_FLAG_WRITE | + DRM_BO_FLAG_MEM_TT | + DRM_BO_FLAG_MEM_VRAM | + DRM_BO_FLAG_NO_EVICT, + DRM_BO_HINT_DONT_FENCE, &scrn->buffer); + + if (ret) + return EGL_FALSE; + + prettyColors(drm_drv->device->drmFD, scrn->buffer.handle, pitch); + + ret = drmModeAddFB(drm_drv->device->drmFD, mode->Width, mode->Height, + 32, 32, pitch, + scrn->buffer.handle, + &scrn->fbID); + + if (ret) + goto err_bo; + + scrn->fb = drmModeGetFB(drm_drv->device->drmFD, scrn->fbID); + if (!scrn->fb) + goto err_bo; + + for (j = 0; j < drm_drv->res->count_connectors; j++) { + drmModeConnector *con = drmModeGetConnector(drm_drv->device->drmFD, drm_drv->res->connectors[j]); + scrn->mode = drm_find_mode(con, mode); + if (!scrn->mode) + goto err_fb; + + for (k = 0; k < con->count_encoders; k++) { + drmModeEncoder *enc = drmModeGetEncoder(drm_drv->device->drmFD, con->encoders[k]); + for (i = 0; i < drm_drv->res->count_crtcs; i++) { + if (enc->possible_crtcs & (1<<i)) { + ret = drmModeSetCrtc( + drm_drv->device->drmFD, + drm_drv->res->crtcs[i], + scrn->fbID, + 0, 0, + &drm_drv->res->connectors[j], 1, + scrn->mode); + /* skip the other crtcs now */ + i = drm_drv->res->count_crtcs; + } + } + } + } + + scrn->front.handle = scrn->buffer.handle; + scrn->front.pitch = pitch; + scrn->front.width = mode->Width; + scrn->front.height = mode->Height; + + scrn->surf = surf; + intel_bind_frontbuffer(surf->drawable, &scrn->front); + + scrn->shown = 1; + + return EGL_TRUE; + +err_fb: + /* TODO remove fb */ + +err_bo: + drmBOUnreference(drm_drv->device->drmFD, &scrn->buffer); + return EGL_FALSE; +} + +static EGLBoolean +drm_destroy_surface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface) +{ + struct drm_surface *fs = lookup_drm_surface(surface); + _eglRemoveSurface(&fs->base); + if (fs->base.IsBound) { + fs->base.DeletePending = EGL_TRUE; + } else { + intel_bind_frontbuffer(fs->drawable, NULL); + intel_destroy_drawable(fs->drawable); + free(fs->drawable); + free(fs); + } + return EGL_TRUE; +} + + +static EGLBoolean +drm_make_current(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext context) +{ + struct drm_surface *readSurf = lookup_drm_surface(read); + struct drm_surface *drawSurf = lookup_drm_surface(draw); + struct drm_context *ctx = lookup_drm_context(context); + EGLBoolean b; + + b = _eglMakeCurrent(drv, dpy, draw, read, context); + if (!b) + return EGL_FALSE; + + if (ctx) { + if (!drawSurf || !readSurf) + return EGL_FALSE; + + intel_make_current(ctx->context, drawSurf->drawable, readSurf->drawable); + } else { + intel_make_current(NULL, NULL, NULL); + } + + return EGL_TRUE; +} + +static EGLBoolean +drm_swap_buffers(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw) +{ + struct drm_surface *surf = lookup_drm_surface(draw); + if (!surf) + return EGL_FALSE; + + /* error checking */ + if (!_eglSwapBuffers(drv, dpy, draw)) + return EGL_FALSE; + + intel_swap_buffers(surf->drawable); + return EGL_TRUE; +} + + +/** + * The bootstrap function. Return a new drm_driver object and + * plug in API functions. + */ +_EGLDriver * +_eglMain(_EGLDisplay *dpy, const char *args) +{ + struct drm_driver *drm; + + drm = (struct drm_driver *) calloc(1, sizeof(struct drm_driver)); + if (!drm) { + return NULL; + } + + /* First fill in the dispatch table with defaults */ + _eglInitDriverFallbacks(&drm->base); + /* then plug in our Drm-specific functions */ + drm->base.API.Initialize = drm_initialize; + drm->base.API.Terminate = drm_terminate; + drm->base.API.CreateContext = drm_create_context; + drm->base.API.MakeCurrent = drm_make_current; + drm->base.API.CreateWindowSurface = drm_create_window_surface; + drm->base.API.CreatePixmapSurface = drm_create_pixmap_surface; + drm->base.API.CreatePbufferSurface = drm_create_pbuffer_surface; + drm->base.API.DestroySurface = drm_destroy_surface; + drm->base.API.DestroyContext = drm_destroy_context; + drm->base.API.CreateScreenSurfaceMESA = drm_create_screen_surface_mesa; + drm->base.API.ShowScreenSurfaceMESA = drm_show_screen_surface_mesa; + drm->base.API.SwapBuffers = drm_swap_buffers; + + drm->base.ClientAPIsMask = EGL_OPENGL_BIT /*| EGL_OPENGL_ES_BIT*/; + drm->base.Name = "DRM/Gallium"; + + /* enable supported extensions */ + drm->base.Extensions.MESA_screen_surface = EGL_TRUE; + drm->base.Extensions.MESA_copy_context = EGL_TRUE; + + return &drm->base; +} diff --git a/src/gallium/winsys/drm/intel/egl/intel_egl.h b/src/gallium/winsys/drm/intel/egl/intel_egl.h new file mode 100644 index 0000000000..1ee27e0847 --- /dev/null +++ b/src/gallium/winsys/drm/intel/egl/intel_egl.h @@ -0,0 +1,53 @@ + +#ifndef _INTEL_EGL_H_ +#define _INTEL_EGL_H_ + +#include <xf86drm.h> + +struct egl_drm_device +{ + void *priv; + int drmFD; + + drmVersionPtr version; + int deviceID; +}; + +struct egl_drm_context +{ + void *priv; + struct egl_drm_device *device; +}; + +struct egl_drm_drawable +{ + void *priv; + struct egl_drm_device *device; + size_t h; + size_t w; +}; + +struct egl_drm_frontbuffer +{ + uint32_t handle; + uint32_t pitch; + uint32_t width; + uint32_t height; +}; + +#include "GL/internal/glcore.h" + +int intel_create_device(struct egl_drm_device *device); +int intel_destroy_device(struct egl_drm_device *device); + +int intel_create_context(struct egl_drm_context *context, const __GLcontextModes *visual, void *sharedContextPrivate); +int intel_destroy_context(struct egl_drm_context *context); + +int intel_create_drawable(struct egl_drm_drawable *drawable, const __GLcontextModes * visual); +int intel_destroy_drawable(struct egl_drm_drawable *drawable); + +void intel_make_current(struct egl_drm_context *context, struct egl_drm_drawable *draw, struct egl_drm_drawable *read); +void intel_swap_buffers(struct egl_drm_drawable *draw); +void intel_bind_frontbuffer(struct egl_drm_drawable *draw, struct egl_drm_frontbuffer *front); + +#endif diff --git a/src/gallium/winsys/drm/intel/egl/intel_reg.h b/src/gallium/winsys/drm/intel/egl/intel_reg.h new file mode 100644 index 0000000000..4f33bee438 --- /dev/null +++ b/src/gallium/winsys/drm/intel/egl/intel_reg.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef _INTEL_REG_H_ +#define _INTEL_REG_H_ + + +#define BR00_BITBLT_CLIENT 0x40000000 +#define BR00_OP_COLOR_BLT 0x10000000 +#define BR00_OP_SRC_COPY_BLT 0x10C00000 +#define BR13_SOLID_PATTERN 0x80000000 + +#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4) +#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) +#define XY_COLOR_BLT_WRITE_RGB (1<<20) + +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) +#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) + +#define MI_WAIT_FOR_EVENT ((0x3<<23)) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) + +#define MI_BATCH_BUFFER_END (0xA<<23) + + +#endif diff --git a/src/gallium/winsys/drm/intel/egl/intel_swapbuffers.c b/src/gallium/winsys/drm/intel/egl/intel_swapbuffers.c new file mode 100644 index 0000000000..2edcbc79ff --- /dev/null +++ b/src/gallium/winsys/drm/intel/egl/intel_swapbuffers.c @@ -0,0 +1,111 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "intel_device.h" +#include "intel_context.h" +#include "intel_batchbuffer.h" +#include "intel_reg.h" + +#include "pipe/p_context.h" +#include "state_tracker/st_public.h" +#include "state_tracker/st_context.h" +#include "state_tracker/st_cb_fbo.h" +#include "intel_egl.h" + + +static void +intel_display_surface(struct egl_drm_drawable *draw, + struct pipe_surface *surf); + +void intel_swap_buffers(struct egl_drm_drawable *draw) +{ + struct intel_framebuffer *intel_fb = (struct intel_framebuffer *)draw->priv; + struct pipe_surface *back_surf; + + assert(intel_fb); + assert(intel_fb->stfb); + + back_surf = st_get_framebuffer_surface(intel_fb->stfb, ST_SURFACE_BACK_LEFT); + if (back_surf) { + st_notify_swapbuffers(intel_fb->stfb); + if (intel_fb->front) + intel_display_surface(draw, back_surf); + st_notify_swapbuffers_complete(intel_fb->stfb); + } +} + +static void +intel_display_surface(struct egl_drm_drawable *draw, + struct pipe_surface *surf) +{ + struct intel_context *intel = NULL; + struct intel_framebuffer *intel_fb = (struct intel_framebuffer *)draw->priv; + struct _DriFenceObject *fence; + + //const int srcWidth = surf->width; + //const int srcHeight = surf->height; + + intel = intel_fb->device->dummy; + if (!intel) { + printf("No dummy context\n"); + return; + } + + const int dstWidth = intel_fb->front->width; + const int dstHeight = intel_fb->front->height; + const int dstPitch = intel_fb->front->pitch / 4;//draw->front.cpp; + + const int cpp = 4;//intel_fb->front->cpp; + const int srcPitch = surf->stride / cpp; + + int BR13, CMD; + //int i; + + BR13 = (dstPitch * cpp) | (0xCC << 16) | (1 << 24) | (1 << 25); + CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB); + + BEGIN_BATCH(8, 2); + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((0 << 16) | 0); + OUT_BATCH((dstHeight << 16) | dstWidth); + + OUT_RELOC(intel_fb->front_buffer, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, + DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE, 0); + + OUT_BATCH((0 << 16) | 0); + OUT_BATCH((srcPitch * cpp) & 0xffff); + OUT_RELOC(dri_bo(surf->buffer), + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + DRM_BO_MASK_MEM | DRM_BO_FLAG_READ, 0); + + fence = intel_be_batchbuffer_flush(intel->base.batch); + driFenceUnReference(&fence); + intel_be_batchbuffer_finish(intel->base.batch); +} diff --git a/src/gallium/winsys/egl_xlib/Makefile b/src/gallium/winsys/egl_xlib/Makefile new file mode 100644 index 0000000000..76f1b56da4 --- /dev/null +++ b/src/gallium/winsys/egl_xlib/Makefile @@ -0,0 +1,89 @@ +# src/gallium/winsys/egl_xlib/Makefile + +# Build softpipe/xlib/EGL driver library/object: "egl_softpipe.so" + + +TOP = ../../../.. +include $(TOP)/configs/current + + +DRIVER_NAME = egl_softpipe.so + + +INCLUDE_DIRS = \ + -I$(TOP)/include \ + -I$(TOP)/src/egl/main \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/auxiliary + +WINSYS_SOURCES = \ + egl_xlib.c \ + sw_winsys.c + +WINSYS_OBJECTS = $(WINSYS_SOURCES:.c=.o) + + +LIBS = \ + $(GALLIUM_DRIVERS) \ + $(GALLIUM_AUXILIARIES) + +# XXX temporary (should create a separate lib with the GL API funcs and +# mesa code, as done for ES 1.x, 2.x, OpenVG, etc) +UNUSED_LIBS = \ + $(TOP)/src/mesa/libglapi.a \ + $(TOP)/src/mesa/libmesa.a \ + + +LOCAL_CFLAGS = -D_EGL_PLATFORM_X=1 + + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(LOCAL_CFLAGS) $< -o $@ + + +.PHONY: library + + +default: depend library Makefile + + +library: $(TOP)/$(LIB_DIR)/$(DRIVER_NAME) + + +# Make the egl_softpipe.so library +$(TOP)/$(LIB_DIR)/$(DRIVER_NAME): $(WINSYS_OBJECTS) $(LIBS) + $(TOP)/bin/mklib -o $(DRIVER_NAME) \ + -linker "$(CC)" \ + -noprefix \ + -install $(TOP)/$(LIB_DIR) \ + $(MKLIB_OPTIONS) $(WINSYS_OBJECTS) \ + --whole-archive $(LIBS) --no-whole-archive + + +depend: $(ALL_SOURCES) + @ echo "running $(MKDEP)" + @ rm -f depend # workaround oops on gutsy?!? + @ touch depend + @ $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(ALL_SOURCES) \ + > /dev/null 2>/dev/null + + +install: default + $(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR) + @if [ -e $(TOP)/$(LIB_DIR) ]; then \ + $(INSTALL) $(TOP)/$(LIB_DIR)/$(DRIVER_NAME) $(INSTALL_DIR)/$(LIB_DIR); \ + fi + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h + +clean: + -rm -f *.o *~ *.bak + + +include depend diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c new file mode 100644 index 0000000000..477d766925 --- /dev/null +++ b/src/gallium/winsys/egl_xlib/egl_xlib.c @@ -0,0 +1,655 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * EGL / softpipe / xlib winsys module + * + * Authors: Brian Paul + */ + + +#include <dlfcn.h> +#include <X11/Xutil.h> + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "pipe/p_winsys.h" +#include "util/u_memory.h" +#include "softpipe/sp_winsys.h" + +#include "eglconfig.h" +#include "eglconfigutil.h" +#include "eglcontext.h" +#include "egldisplay.h" +#include "egldriver.h" +#include "eglglobals.h" +#include "egllog.h" +#include "eglsurface.h" + +#include "state_tracker/st_public.h" + +#include "sw_winsys.h" + + +/** subclass of _EGLDriver */ +struct xlib_egl_driver +{ + _EGLDriver Base; /**< base class */ + + struct pipe_winsys *winsys; + struct pipe_screen *screen; +}; + + +/** subclass of _EGLContext */ +struct xlib_egl_context +{ + _EGLContext Base; /**< base class */ + + struct pipe_context *pipe; /**< Gallium driver context */ + struct st_context *Context; /**< Mesa/gallium state tracker context */ +}; + + +/** subclass of _EGLSurface */ +struct xlib_egl_surface +{ + _EGLSurface Base; /**< base class */ + + Display *Dpy; /**< The X Display of the window */ + Window Win; /**< The user-created window ID */ + GC Gc; + XVisualInfo VisInfo; + + struct pipe_winsys *winsys; + + struct st_framebuffer *Framebuffer; +}; + + +/** cast wrapper */ +static INLINE struct xlib_egl_driver * +xlib_egl_driver(_EGLDriver *drv) +{ + return (struct xlib_egl_driver *) drv; +} + + +static struct xlib_egl_surface * +lookup_surface(EGLSurface surf) +{ + _EGLSurface *surface = _eglLookupSurface(surf); + return (struct xlib_egl_surface *) surface; +} + + +static struct xlib_egl_context * +lookup_context(EGLContext surf) +{ + _EGLContext *context = _eglLookupContext(surf); + return (struct xlib_egl_context *) context; +} + + +static unsigned int +bitcount(unsigned int n) +{ + unsigned int bits; + for (bits = 0; n > 0; n = n >> 1) { + bits += (n & 1); + } + return bits; +} + + +/** + * Create the EGLConfigs. (one per X visual) + */ +static void +create_configs(_EGLDriver *drv, EGLDisplay dpy) +{ + static const EGLint all_apis = (EGL_OPENGL_ES_BIT | + EGL_OPENGL_ES2_BIT | + EGL_OPENVG_BIT | + EGL_OPENGL_BIT); + _EGLDisplay *disp = _eglLookupDisplay(dpy); + XVisualInfo *visInfo, visTemplate; + int num_visuals, i; + + /* get list of all X visuals, create an EGL config for each */ + visTemplate.screen = DefaultScreen(disp->Xdpy); + visInfo = XGetVisualInfo(disp->Xdpy, VisualScreenMask, + &visTemplate, &num_visuals); + if (!visInfo) { + printf("egl_xlib.c: couldn't get any X visuals\n"); + abort(); + } + + for (i = 0; i < num_visuals; i++) { + _EGLConfig *config = calloc(1, sizeof(_EGLConfig)); + int id = i + 1; + int rbits = bitcount(visInfo[i].red_mask); + int gbits = bitcount(visInfo[i].green_mask); + int bbits = bitcount(visInfo[i].blue_mask); + int abits = bbits == 8 ? 8 : 0; + int zbits = 24; + int sbits = 8; + int visid = visInfo[i].visualid; +#if defined(__cplusplus) || defined(c_plusplus) + int vistype = visInfo[i].c_class; +#else + int vistype = visInfo[i].class; +#endif + + _eglInitConfig(config, id); + SET_CONFIG_ATTRIB(config, EGL_BUFFER_SIZE, rbits + gbits + bbits + abits); + SET_CONFIG_ATTRIB(config, EGL_RED_SIZE, rbits); + SET_CONFIG_ATTRIB(config, EGL_GREEN_SIZE, gbits); + SET_CONFIG_ATTRIB(config, EGL_BLUE_SIZE, bbits); + SET_CONFIG_ATTRIB(config, EGL_ALPHA_SIZE, abits); + SET_CONFIG_ATTRIB(config, EGL_DEPTH_SIZE, zbits); + SET_CONFIG_ATTRIB(config, EGL_STENCIL_SIZE, sbits); + SET_CONFIG_ATTRIB(config, EGL_NATIVE_VISUAL_ID, visid); + SET_CONFIG_ATTRIB(config, EGL_NATIVE_VISUAL_TYPE, vistype); + SET_CONFIG_ATTRIB(config, EGL_NATIVE_RENDERABLE, EGL_FALSE); + SET_CONFIG_ATTRIB(config, EGL_CONFORMANT, all_apis); + SET_CONFIG_ATTRIB(config, EGL_RENDERABLE_TYPE, all_apis); + SET_CONFIG_ATTRIB(config, EGL_SURFACE_TYPE, EGL_WINDOW_BIT); + + _eglAddConfig(disp, config); + } +} + + +/** + * Called via eglInitialize(), drv->API.Initialize(). + */ +static EGLBoolean +xlib_eglInitialize(_EGLDriver *drv, EGLDisplay dpy, + EGLint *minor, EGLint *major) +{ + create_configs(drv, dpy); + + drv->Initialized = EGL_TRUE; + + /* we're supporting EGL 1.4 */ + *minor = 1; + *major = 4; + + return EGL_TRUE; +} + + +/** + * Called via eglTerminate(), drv->API.Terminate(). + */ +static EGLBoolean +xlib_eglTerminate(_EGLDriver *drv, EGLDisplay dpy) +{ + return EGL_TRUE; +} + + +static _EGLProc +xlib_eglGetProcAddress(const char *procname) +{ + return (_EGLProc) st_get_proc_address(procname); +} + + +static void +get_drawable_visual_info(Display *dpy, Drawable d, XVisualInfo *visInfo) +{ + XWindowAttributes attr; + XVisualInfo visTemp, *vis; + int num_visuals; + + XGetWindowAttributes(dpy, d, &attr); + + visTemp.screen = DefaultScreen(dpy); + visTemp.visualid = attr.visual->visualid; + vis = XGetVisualInfo(dpy, + (VisualScreenMask | VisualIDMask), + &visTemp, &num_visuals); + if (vis) + *visInfo = *vis; + + XFree(vis); +} + + + +/** Get size of given window */ +static Status +get_drawable_size(Display *dpy, Drawable d, uint *width, uint *height) +{ + Window root; + Status stat; + int xpos, ypos; + unsigned int w, h, bw, depth; + stat = XGetGeometry(dpy, d, &root, &xpos, &ypos, &w, &h, &bw, &depth); + *width = w; + *height = h; + return stat; +} + + +static void +check_and_update_buffer_size(struct xlib_egl_surface *surface) +{ + uint width, height; + get_drawable_size(surface->Dpy, surface->Win, &width, &height); + st_resize_framebuffer(surface->Framebuffer, width, height); + surface->Base.Width = width; + surface->Base.Height = height; +} + + + +static void +display_surface(struct pipe_winsys *pws, + struct pipe_surface *psurf, + struct xlib_egl_surface *xsurf) +{ + XImage *ximage; + void *data; + + ximage = XCreateImage(xsurf->Dpy, + xsurf->VisInfo.visual, + xsurf->VisInfo.depth, + ZPixmap, 0, /* format, offset */ + NULL, /* data */ + 0, 0, /* size */ + 32, /* bitmap_pad */ + 0); /* bytes_per_line */ + + + assert(ximage->format); + assert(ximage->bitmap_unit); + + data = pws->buffer_map(pws, psurf->buffer, 0); + + /* update XImage's fields */ + ximage->data = data; + ximage->width = psurf->width; + ximage->height = psurf->height; + ximage->bytes_per_line = psurf->stride; + + XPutImage(xsurf->Dpy, xsurf->Win, xsurf->Gc, + ximage, 0, 0, 0, 0, psurf->width, psurf->height); + + XSync(xsurf->Dpy, 0); + + ximage->data = NULL; + XDestroyImage(ximage); + + pws->buffer_unmap(pws, psurf->buffer); +} + + + +/** Display gallium surface in X window */ +static void +flush_frontbuffer(struct pipe_winsys *pws, + struct pipe_surface *psurf, + void *context_private) +{ + struct xlib_egl_surface *xsurf = (struct xlib_egl_surface *) context_private; + display_surface(pws, psurf, xsurf); +} + + + +/** + * Called via eglCreateContext(), drv->API.CreateContext(). + */ +static EGLContext +xlib_eglCreateContext(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, + EGLContext share_list, const EGLint *attrib_list) +{ + struct xlib_egl_driver *xdrv = xlib_egl_driver(drv); + _EGLConfig *conf = _eglLookupConfig(drv, dpy, config); + struct xlib_egl_context *ctx; + struct st_context *share_ctx = NULL; /* XXX fix */ + __GLcontextModes visual; + + ctx = CALLOC_STRUCT(xlib_egl_context); + if (!ctx) + return EGL_NO_CONTEXT; + + /* let EGL lib init the common stuff */ + if (!_eglInitContext(drv, dpy, &ctx->Base, config, attrib_list)) { + free(ctx); + return EGL_NO_CONTEXT; + } + + /* API-dependent context creation */ + switch (ctx->Base.ClientAPI) { + case EGL_OPENVG_API: + case EGL_OPENGL_ES_API: + _eglLog(_EGL_DEBUG, "Create Context for ES version %d\n", + ctx->Base.ClientVersion); + /* fall-through */ + case EGL_OPENGL_API: + /* create a softpipe context */ + ctx->pipe = softpipe_create(xdrv->screen, xdrv->winsys, NULL); + /* Now do xlib / state tracker inits here */ + _eglConfigToContextModesRec(conf, &visual); + ctx->Context = st_create_context(ctx->pipe, &visual, share_ctx); + break; + default: + _eglError(EGL_BAD_MATCH, "eglCreateContext(unsupported API)"); + free(ctx); + return EGL_NO_CONTEXT; + } + + _eglSaveContext(&ctx->Base); + + return _eglGetContextHandle(&ctx->Base); +} + + +static EGLBoolean +xlib_eglDestroyContext(_EGLDriver *drv, EGLDisplay dpy, EGLContext ctx) +{ + struct xlib_egl_context *context = lookup_context(ctx); + if (context) { + if (context->Base.IsBound) { + context->Base.DeletePending = EGL_TRUE; + } + else { + /* API-dependent clean-up */ + switch (context->Base.ClientAPI) { + case EGL_OPENGL_ES_API: + /* fall-through */ + case EGL_OPENGL_API: + st_destroy_context(context->Context); + break; + default: + assert(0); + } + free(context); + } + return EGL_TRUE; + } + else { + _eglError(EGL_BAD_CONTEXT, "eglDestroyContext"); + return EGL_TRUE; + } +} + + +/** + * Called via eglMakeCurrent(), drv->API.MakeCurrent(). + */ +static EGLBoolean +xlib_eglMakeCurrent(_EGLDriver *drv, EGLDisplay dpy, + EGLSurface draw, EGLSurface read, EGLContext ctx) +{ + struct xlib_egl_context *context = lookup_context(ctx); + struct xlib_egl_surface *draw_surf = lookup_surface(draw); + struct xlib_egl_surface *read_surf = lookup_surface(read); + + if (!_eglMakeCurrent(drv, dpy, draw, read, context)) + return EGL_FALSE; + + st_make_current((context ? context->Context : NULL), + (draw_surf ? draw_surf->Framebuffer : NULL), + (read_surf ? read_surf->Framebuffer : NULL)); + + if (draw_surf) + check_and_update_buffer_size(draw_surf); + if (read_surf && read_surf != draw_surf) + check_and_update_buffer_size(draw_surf); + + return EGL_TRUE; +} + + +static enum pipe_format +choose_color_format(const __GLcontextModes *visual) +{ + if (visual->redBits == 8 && + visual->greenBits == 8 && + visual->blueBits == 8 && + visual->alphaBits == 8) { + /* XXX this really also depends on the ordering of R,G,B,A */ + return PIPE_FORMAT_A8R8G8B8_UNORM; + } + else { + assert(0); + return PIPE_FORMAT_NONE; + } +} + + +static enum pipe_format +choose_depth_format(const __GLcontextModes *visual) +{ + if (visual->depthBits > 0) + return PIPE_FORMAT_S8Z24_UNORM; + else + return PIPE_FORMAT_NONE; +} + + +static enum pipe_format +choose_stencil_format(const __GLcontextModes *visual) +{ + if (visual->stencilBits > 0) + return PIPE_FORMAT_S8Z24_UNORM; + else + return PIPE_FORMAT_NONE; +} + + +/** + * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface(). + */ +static EGLSurface +xlib_eglCreateWindowSurface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, + NativeWindowType window, const EGLint *attrib_list) +{ + struct xlib_egl_driver *xdrv = xlib_egl_driver(drv); + _EGLDisplay *disp = _eglLookupDisplay(dpy); + _EGLConfig *conf = _eglLookupConfig(drv, dpy, config); + + struct xlib_egl_surface *surf; + __GLcontextModes visual; + uint width, height; + + surf = CALLOC_STRUCT(xlib_egl_surface); + if (!surf) + return EGL_NO_SURFACE; + + /* Let EGL lib init the common stuff */ + if (!_eglInitSurface(drv, dpy, &surf->Base, EGL_WINDOW_BIT, + config, attrib_list)) { + free(surf); + return EGL_NO_SURFACE; + } + + _eglSaveSurface(&surf->Base); + + /* + * Now init the Xlib and gallium stuff + */ + surf->Win = (Window) window; /* The X window ID */ + surf->Dpy = disp->Xdpy; /* The X display */ + surf->Gc = XCreateGC(surf->Dpy, surf->Win, 0, NULL); + + surf->winsys = xdrv->winsys; + + _eglConfigToContextModesRec(conf, &visual); + get_drawable_size(surf->Dpy, surf->Win, &width, &height); + get_drawable_visual_info(surf->Dpy, surf->Win, &surf->VisInfo); + + surf->Base.Width = width; + surf->Base.Height = height; + + /* Create GL statetracker framebuffer */ + surf->Framebuffer = st_create_framebuffer(&visual, + choose_color_format(&visual), + choose_depth_format(&visual), + choose_stencil_format(&visual), + width, height, + (void *) surf); + + st_resize_framebuffer(surf->Framebuffer, width, height); + + return _eglGetSurfaceHandle(&surf->Base); +} + + +static EGLBoolean +xlib_eglDestroySurface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface) +{ + struct xlib_egl_surface *surf = lookup_surface(surface); + if (surf) { + _eglHashRemove(_eglGlobal.Surfaces, (EGLuint) surface); + if (surf->Base.IsBound) { + surf->Base.DeletePending = EGL_TRUE; + } + else { + XFreeGC(surf->Dpy, surf->Gc); + st_unreference_framebuffer(surf->Framebuffer); + free(surf); + } + return EGL_TRUE; + } + else { + _eglError(EGL_BAD_SURFACE, "eglDestroySurface"); + return EGL_FALSE; + } +} + + +static EGLBoolean +xlib_eglSwapBuffers(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw) +{ + /* error checking step: */ + if (!_eglSwapBuffers(drv, dpy, draw)) + return EGL_FALSE; + + { + struct xlib_egl_surface *xsurf = lookup_surface(draw); + struct pipe_winsys *pws = xsurf->winsys; + struct pipe_surface *psurf = + st_get_framebuffer_surface(xsurf->Framebuffer, ST_SURFACE_BACK_LEFT); + + st_notify_swapbuffers(xsurf->Framebuffer); + + display_surface(pws, psurf, xsurf); + + check_and_update_buffer_size(xsurf); + } + + return EGL_TRUE; +} + + +/** + * Determine which API(s) is(are) present by looking for some specific + * global symbols. + */ +static EGLint +find_supported_apis(void) +{ + EGLint mask = 0; + void *handle; + + handle = dlopen(NULL, 0); + + if (dlsym(handle, "st_api_OpenGL_ES1")) + mask |= EGL_OPENGL_ES_BIT; + + if (dlsym(handle, "st_api_OpenGL_ES2")) + mask |= EGL_OPENGL_ES2_BIT; + + if (dlsym(handle, "st_api_OpenGL")) + mask |= EGL_OPENGL_BIT; + + if (dlsym(handle, "st_api_OpenVG")) + mask |= EGL_OPENVG_BIT; + + dlclose(handle); + + return mask; +} + + +/** + * This is the main entrypoint into the driver. + * Called by libEGL to instantiate an _EGLDriver object. + */ +_EGLDriver * +_eglMain(_EGLDisplay *dpy, const char *args) +{ + struct xlib_egl_driver *xdrv; + + _eglLog(_EGL_INFO, "Entering EGL/Xlib _eglMain(%s)", args); + + xdrv = CALLOC_STRUCT(xlib_egl_driver); + if (!xdrv) + return NULL; + + if (!dpy->Xdpy) { + dpy->Xdpy = XOpenDisplay(NULL); + } + + _eglInitDriverFallbacks(&xdrv->Base); + xdrv->Base.API.Initialize = xlib_eglInitialize; + xdrv->Base.API.Terminate = xlib_eglTerminate; + xdrv->Base.API.GetProcAddress = xlib_eglGetProcAddress; + xdrv->Base.API.CreateContext = xlib_eglCreateContext; + xdrv->Base.API.DestroyContext = xlib_eglDestroyContext; + xdrv->Base.API.CreateWindowSurface = xlib_eglCreateWindowSurface; + xdrv->Base.API.DestroySurface = xlib_eglDestroySurface; + xdrv->Base.API.MakeCurrent = xlib_eglMakeCurrent; + xdrv->Base.API.SwapBuffers = xlib_eglSwapBuffers; + + xdrv->Base.ClientAPIsMask = find_supported_apis(); + if (xdrv->Base.ClientAPIsMask == 0x0) { + /* the app isn't directly linked with any EGL-supprted APIs + * (such as libGLESv2.so) so use an EGL utility to see what + * APIs might be loaded dynamically on this system. + */ + xdrv->Base.ClientAPIsMask = _eglFindAPIs(); + } + + xdrv->Base.Name = "Xlib/softpipe"; + + /* create one winsys and use it for all contexts/surfaces */ + xdrv->winsys = create_sw_winsys(); + xdrv->winsys->flush_frontbuffer = flush_frontbuffer; + + xdrv->screen = softpipe_create_screen(xdrv->winsys); + + return &xdrv->Base; +} + diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c new file mode 100644 index 0000000000..2fd190da52 --- /dev/null +++ b/src/gallium/winsys/egl_xlib/sw_winsys.c @@ -0,0 +1,284 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Totally software-based winsys layer. + * Note that the one winsys function that we can't implement here + * is flush_frontbuffer(). + * Whoever uses this code will have to provide that. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_winsys.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "sw_winsys.h" + + + +/** Subclass of pipe_winsys */ +struct sw_pipe_winsys +{ + struct pipe_winsys Base; + /* no extra fields for now */ +}; + + +/** subclass of pipe_buffer */ +struct sw_pipe_buffer +{ + struct pipe_buffer Base; + boolean UserBuffer; /** Is this a user-space buffer? */ + void *Data; + void *Mapped; +}; + + +/** cast wrapper */ +static INLINE struct sw_pipe_buffer * +sw_pipe_buffer(struct pipe_buffer *b) +{ + return (struct sw_pipe_buffer *) b; +} + + +/** + * Round n up to next multiple. + */ +static INLINE unsigned +round_up(unsigned n, unsigned multiple) +{ + return (n + multiple - 1) & ~(multiple - 1); +} + + +static const char * +get_name(struct pipe_winsys *pws) +{ + return "software"; +} + + +/** Create new pipe_buffer and allocate storage of given size */ +static struct pipe_buffer * +buffer_create(struct pipe_winsys *pws, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct sw_pipe_buffer *buffer = CALLOC_STRUCT(sw_pipe_buffer); + if (!buffer) + return NULL; + + buffer->Base.refcount = 1; + buffer->Base.alignment = alignment; + buffer->Base.usage = usage; + buffer->Base.size = size; + + /* align to 16-byte multiple for Cell */ + buffer->Data = align_malloc(size, MAX2(alignment, 16)); + + return &buffer->Base; +} + + +/** + * Create buffer which wraps user-space data. + */ +static struct pipe_buffer * +user_buffer_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) +{ + struct sw_pipe_buffer *buffer = CALLOC_STRUCT(sw_pipe_buffer); + if (!buffer) + return NULL; + + buffer->Base.refcount = 1; + buffer->Base.size = bytes; + buffer->UserBuffer = TRUE; + buffer->Data = ptr; + + return &buffer->Base; +} + + +static void * +buffer_map(struct pipe_winsys *pws, struct pipe_buffer *buf, unsigned flags) +{ + struct sw_pipe_buffer *buffer = sw_pipe_buffer(buf); + buffer->Mapped = buffer->Data; + return buffer->Mapped; +} + + +static void +buffer_unmap(struct pipe_winsys *pws, struct pipe_buffer *buf) +{ + struct sw_pipe_buffer *buffer = sw_pipe_buffer(buf); + buffer->Mapped = NULL; +} + + +static void +buffer_destroy(struct pipe_winsys *pws, struct pipe_buffer *buf) +{ + struct sw_pipe_buffer *buffer = sw_pipe_buffer(buf); + + if (buffer->Data && !buffer->UserBuffer) { + align_free(buffer->Data); + buffer->Data = NULL; + } + + free(buffer); +} + + +/** + * Called via winsys->surface_alloc() to create new surfaces. + */ +static struct pipe_surface * +surface_alloc(struct pipe_winsys *ws) +{ + struct pipe_surface *surf = CALLOC_STRUCT(pipe_surface); + if (!surf) + return NULL; + + surf->refcount = 1; + surf->winsys = ws; + + return surf; +} + + +static int +surface_alloc_storage(struct pipe_winsys *winsys, + struct pipe_surface *surf, + unsigned width, unsigned height, + enum pipe_format format, + unsigned flags, + unsigned tex_usage) +{ + const unsigned alignment = 64; + + surf->width = width; + surf->height = height; + surf->format = format; + pf_get_block(surf->format, &surf->block); + surf->nblocksx = pf_get_nblocksx(&surf->block, width); + surf->nblocksy = pf_get_nblocksy(&surf->block, height); + surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); + surf->usage = flags; + + assert(!surf->buffer); + surf->buffer = winsys->buffer_create(winsys, alignment, + PIPE_BUFFER_USAGE_PIXEL, + surf->stride * height); + if(!surf->buffer) + return -1; + + return 0; +} + + +static void +surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) +{ + struct pipe_surface *surf = *s; + assert(!surf->texture); + surf->refcount--; + if (surf->refcount == 0) { + if (surf->buffer) + winsys_buffer_reference(winsys, &surf->buffer, NULL); + free(surf); + } + *s = NULL; +} + + +static void +fence_reference(struct pipe_winsys *sws, struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + /* no-op */ +} + + +static int +fence_signalled(struct pipe_winsys *sws, struct pipe_fence_handle *fence, + unsigned flag) +{ + /* no-op */ + return 0; +} + + +static int +fence_finish(struct pipe_winsys *sws, struct pipe_fence_handle *fence, + unsigned flag) +{ + /* no-op */ + return 0; +} + + +/** + * Create/return a new pipe_winsys object. + */ +struct pipe_winsys * +create_sw_winsys(void) +{ + struct sw_pipe_winsys *ws = CALLOC_STRUCT(sw_pipe_winsys); + if (!ws) + return NULL; + + /* Fill in this struct with callbacks that pipe will need to + * communicate with the window system, buffer manager, etc. + */ + ws->Base.buffer_create = buffer_create; + ws->Base.user_buffer_create = user_buffer_create; + ws->Base.buffer_map = buffer_map; + ws->Base.buffer_unmap = buffer_unmap; + ws->Base.buffer_destroy = buffer_destroy; + + ws->Base.surface_alloc = surface_alloc; + ws->Base.surface_alloc_storage = surface_alloc_storage; + ws->Base.surface_release = surface_release; + + ws->Base.fence_reference = fence_reference; + ws->Base.fence_signalled = fence_signalled; + ws->Base.fence_finish = fence_finish; + + ws->Base.flush_frontbuffer = NULL; /* not implemented here! */ + + ws->Base.get_name = get_name; + + return &ws->Base; +} diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.h b/src/gallium/winsys/egl_xlib/sw_winsys.h new file mode 100644 index 0000000000..f96c5a14b0 --- /dev/null +++ b/src/gallium/winsys/egl_xlib/sw_winsys.h @@ -0,0 +1,40 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef SW_WINSYS_H +#define SW_WINSYS_H + + +struct pipe_winsys; + + +extern struct pipe_winsys * +create_sw_winsys(void); + + +#endif /* SW_WINSYS_H */ diff --git a/src/gallium/winsys/gdi/SConscript b/src/gallium/winsys/gdi/SConscript new file mode 100644 index 0000000000..170fdf5127 --- /dev/null +++ b/src/gallium/winsys/gdi/SConscript @@ -0,0 +1,33 @@ +####################################################################### +# SConscript for gdi winsys + +Import('*') + +if env['platform'] == 'windows': + + env = env.Clone() + + env.Append(CPPPATH = [ + '#src/mesa/glapi', + '#src/mesa', + '#src/mesa/main', + ]) + + sources = [ + 'opengl32.def', + 'wgl.c', + 'wmesa.c', + ] + + drivers = [ + softpipe, + ] + + env.Append(LIBS = ['gdi32', 'user32']) + + # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions + env.SharedLibrary( + target ='opengl32', + source = sources, + LIBS = glapi + mesa + drivers + auxiliaries + env['LIBS'], + ) diff --git a/src/gallium/winsys/gdi/colors.h b/src/gallium/winsys/gdi/colors.h new file mode 100644 index 0000000000..03e512c1fa --- /dev/null +++ b/src/gallium/winsys/gdi/colors.h @@ -0,0 +1,29 @@ +/* Values for wmesa->pixelformat: */ + +#define PF_8A8B8G8R 3 /* 32-bit TrueColor: 8-A, 8-B, 8-G, 8-R */ +#define PF_8R8G8B 4 /* 32-bit TrueColor: 8-R, 8-G, 8-B */ +#define PF_5R6G5B 5 /* 16-bit TrueColor: 5-R, 6-G, 5-B bits */ +#define PF_DITHER8 6 /* Dithered RGB using a lookup table */ +#define PF_LOOKUP 7 /* Undithered RGB using a lookup table */ +#define PF_GRAYSCALE 10 /* Grayscale or StaticGray */ +#define PF_BADFORMAT 11 +#define PF_INDEX8 12 + + +#define BGR8(r,g,b) (unsigned)(((BYTE)((b & 0xc0) | ((g & 0xe0)>>2) | \ + ((r & 0xe0)>>5)))) + +/* Windows uses 5,5,5 for 16-bit */ +#define BGR16(r,g,b) ( (((unsigned short)b ) >> 3) | \ + (((unsigned short)g & 0xf8) << 2) | \ + (((unsigned short)r & 0xf8) << 7) ) + +#define BGR24(r,g,b) (unsigned long)((DWORD)(((BYTE)(b)| \ + ((WORD)((BYTE)(g))<<8))| \ + (((DWORD)(BYTE)(r))<<16))) + +#define BGR32(r,g,b) (unsigned long)((DWORD)(((BYTE)(b)| \ + ((WORD)((BYTE)(g))<<8))| \ + (((DWORD)(BYTE)(r))<<16))) + + diff --git a/src/gallium/winsys/gdi/opengl32.def b/src/gallium/winsys/gdi/opengl32.def new file mode 100644 index 0000000000..54e72f57b1 --- /dev/null +++ b/src/gallium/winsys/gdi/opengl32.def @@ -0,0 +1,859 @@ +; DO NOT EDIT - This file generated automatically by mesadef.py script +;DESCRIPTION 'Mesa (OpenGL work-alike) for Win32' +VERSION 6.5 +; +; Module definition file for Mesa (OPENGL32.DLL) +; +; Note: The OpenGL functions use the STDCALL +; function calling convention. Microsoft's +; OPENGL32 uses this convention and so must the +; Mesa OPENGL32 so that the Mesa DLL can be used +; as a drop-in replacement. +; +; The linker exports STDCALL entry points with +; 'decorated' names; e.g., _glBegin@0, where the +; trailing number is the number of bytes of +; parameter data pushed onto the stack. The +; callee is responsible for popping this data +; off the stack, usually via a RETF n instruction. +; +; However, the Microsoft OPENGL32.DLL does not export +; the decorated names, even though the calling convention +; is STDCALL. So, this module definition file is +; needed to force the Mesa OPENGL32.DLL to export the +; symbols in the same manner as the Microsoft DLL. +; Were it not for this problem, this file would not +; be needed (for the gl* functions) since the entry +; points are compiled with dllexport declspec. +; +; However, this file is still needed to export "internal" +; Mesa symbols for the benefit of the OSMESA32.DLL. +; +EXPORTS + glNewList + glEndList + glCallList + glCallLists + glDeleteLists + glGenLists + glListBase + glBegin + glBitmap + glColor3b + glColor3bv + glColor3d + glColor3dv + glColor3f + glColor3fv + glColor3i + glColor3iv + glColor3s + glColor3sv + glColor3ub + glColor3ubv + glColor3ui + glColor3uiv + glColor3us + glColor3usv + glColor4b + glColor4bv + glColor4d + glColor4dv + glColor4f + glColor4fv + glColor4i + glColor4iv + glColor4s + glColor4sv + glColor4ub + glColor4ubv + glColor4ui + glColor4uiv + glColor4us + glColor4usv + glEdgeFlag + glEdgeFlagv + glEnd + glIndexd + glIndexdv + glIndexf + glIndexfv + glIndexi + glIndexiv + glIndexs + glIndexsv + glNormal3b + glNormal3bv + glNormal3d + glNormal3dv + glNormal3f + glNormal3fv + glNormal3i + glNormal3iv + glNormal3s + glNormal3sv + glRasterPos2d + glRasterPos2dv + glRasterPos2f + glRasterPos2fv + glRasterPos2i + glRasterPos2iv + glRasterPos2s + glRasterPos2sv + glRasterPos3d + glRasterPos3dv + glRasterPos3f + glRasterPos3fv + glRasterPos3i + glRasterPos3iv + glRasterPos3s + glRasterPos3sv + glRasterPos4d + glRasterPos4dv + glRasterPos4f + glRasterPos4fv + glRasterPos4i + glRasterPos4iv + glRasterPos4s + glRasterPos4sv + glRectd + glRectdv + glRectf + glRectfv + glRecti + glRectiv + glRects + glRectsv + glTexCoord1d + glTexCoord1dv + glTexCoord1f + glTexCoord1fv + glTexCoord1i + glTexCoord1iv + glTexCoord1s + glTexCoord1sv + glTexCoord2d + glTexCoord2dv + glTexCoord2f + glTexCoord2fv + glTexCoord2i + glTexCoord2iv + glTexCoord2s + glTexCoord2sv + glTexCoord3d + glTexCoord3dv + glTexCoord3f + glTexCoord3fv + glTexCoord3i + glTexCoord3iv + glTexCoord3s + glTexCoord3sv + glTexCoord4d + glTexCoord4dv + glTexCoord4f + glTexCoord4fv + glTexCoord4i + glTexCoord4iv + glTexCoord4s + glTexCoord4sv + glVertex2d + glVertex2dv + glVertex2f + glVertex2fv + glVertex2i + glVertex2iv + glVertex2s + glVertex2sv + glVertex3d + glVertex3dv + glVertex3f + glVertex3fv + glVertex3i + glVertex3iv + glVertex3s + glVertex3sv + glVertex4d + glVertex4dv + glVertex4f + glVertex4fv + glVertex4i + glVertex4iv + glVertex4s + glVertex4sv + glClipPlane + glColorMaterial + glCullFace + glFogf + glFogfv + glFogi + glFogiv + glFrontFace + glHint + glLightf + glLightfv + glLighti + glLightiv + glLightModelf + glLightModelfv + glLightModeli + glLightModeliv + glLineStipple + glLineWidth + glMaterialf + glMaterialfv + glMateriali + glMaterialiv + glPointSize + glPolygonMode + glPolygonStipple + glScissor + glShadeModel + glTexParameterf + glTexParameterfv + glTexParameteri + glTexParameteriv + glTexImage1D + glTexImage2D + glTexEnvf + glTexEnvfv + glTexEnvi + glTexEnviv + glTexGend + glTexGendv + glTexGenf + glTexGenfv + glTexGeni + glTexGeniv + glFeedbackBuffer + glSelectBuffer + glRenderMode + glInitNames + glLoadName + glPassThrough + glPopName + glPushName + glDrawBuffer + glClear + glClearAccum + glClearIndex + glClearColor + glClearStencil + glClearDepth + glStencilMask + glColorMask + glDepthMask + glIndexMask + glAccum + glDisable + glEnable + glFinish + glFlush + glPopAttrib + glPushAttrib + glMap1d + glMap1f + glMap2d + glMap2f + glMapGrid1d + glMapGrid1f + glMapGrid2d + glMapGrid2f + glEvalCoord1d + glEvalCoord1dv + glEvalCoord1f + glEvalCoord1fv + glEvalCoord2d + glEvalCoord2dv + glEvalCoord2f + glEvalCoord2fv + glEvalMesh1 + glEvalPoint1 + glEvalMesh2 + glEvalPoint2 + glAlphaFunc + glBlendFunc + glLogicOp + glStencilFunc + glStencilOp + glDepthFunc + glPixelZoom + glPixelTransferf + glPixelTransferi + glPixelStoref + glPixelStorei + glPixelMapfv + glPixelMapuiv + glPixelMapusv + glReadBuffer + glCopyPixels + glReadPixels + glDrawPixels + glGetBooleanv + glGetClipPlane + glGetDoublev + glGetError + glGetFloatv + glGetIntegerv + glGetLightfv + glGetLightiv + glGetMapdv + glGetMapfv + glGetMapiv + glGetMaterialfv + glGetMaterialiv + glGetPixelMapfv + glGetPixelMapuiv + glGetPixelMapusv + glGetPolygonStipple + glGetString + glGetTexEnvfv + glGetTexEnviv + glGetTexGendv + glGetTexGenfv + glGetTexGeniv + glGetTexImage + glGetTexParameterfv + glGetTexParameteriv + glGetTexLevelParameterfv + glGetTexLevelParameteriv + glIsEnabled + glIsList + glDepthRange + glFrustum + glLoadIdentity + glLoadMatrixf + glLoadMatrixd + glMatrixMode + glMultMatrixf + glMultMatrixd + glOrtho + glPopMatrix + glPushMatrix + glRotated + glRotatef + glScaled + glScalef + glTranslated + glTranslatef + glViewport + glArrayElement + glColorPointer + glDisableClientState + glDrawArrays + glDrawElements + glEdgeFlagPointer + glEnableClientState + glGetPointerv + glIndexPointer + glInterleavedArrays + glNormalPointer + glTexCoordPointer + glVertexPointer + glPolygonOffset + glCopyTexImage1D + glCopyTexImage2D + glCopyTexSubImage1D + glCopyTexSubImage2D + glTexSubImage1D + glTexSubImage2D + glAreTexturesResident + glBindTexture + glDeleteTextures + glGenTextures + glIsTexture + glPrioritizeTextures + glIndexub + glIndexubv + glPopClientAttrib + glPushClientAttrib + glBlendColor + glBlendEquation + glDrawRangeElements + glColorTable + glColorTableParameterfv + glColorTableParameteriv + glCopyColorTable + glGetColorTable + glGetColorTableParameterfv + glGetColorTableParameteriv + glColorSubTable + glCopyColorSubTable + glConvolutionFilter1D + glConvolutionFilter2D + glConvolutionParameterf + glConvolutionParameterfv + glConvolutionParameteri + glConvolutionParameteriv + glCopyConvolutionFilter1D + glCopyConvolutionFilter2D + glGetConvolutionFilter + glGetConvolutionParameterfv + glGetConvolutionParameteriv + glGetSeparableFilter + glSeparableFilter2D + glGetHistogram + glGetHistogramParameterfv + glGetHistogramParameteriv + glGetMinmax + glGetMinmaxParameterfv + glGetMinmaxParameteriv + glHistogram + glMinmax + glResetHistogram + glResetMinmax + glTexImage3D + glTexSubImage3D + glCopyTexSubImage3D + glActiveTextureARB + glClientActiveTextureARB + glMultiTexCoord1dARB + glMultiTexCoord1dvARB + glMultiTexCoord1fARB + glMultiTexCoord1fvARB + glMultiTexCoord1iARB + glMultiTexCoord1ivARB + glMultiTexCoord1sARB + glMultiTexCoord1svARB + glMultiTexCoord2dARB + glMultiTexCoord2dvARB + glMultiTexCoord2fARB + glMultiTexCoord2fvARB + glMultiTexCoord2iARB + glMultiTexCoord2ivARB + glMultiTexCoord2sARB + glMultiTexCoord2svARB + glMultiTexCoord3dARB + glMultiTexCoord3dvARB + glMultiTexCoord3fARB + glMultiTexCoord3fvARB + glMultiTexCoord3iARB + glMultiTexCoord3ivARB + glMultiTexCoord3sARB + glMultiTexCoord3svARB + glMultiTexCoord4dARB + glMultiTexCoord4dvARB + glMultiTexCoord4fARB + glMultiTexCoord4fvARB + glMultiTexCoord4iARB + glMultiTexCoord4ivARB + glMultiTexCoord4sARB + glMultiTexCoord4svARB + glLoadTransposeMatrixfARB + glLoadTransposeMatrixdARB + glMultTransposeMatrixfARB + glMultTransposeMatrixdARB + glSampleCoverageARB + glCompressedTexImage3DARB + glCompressedTexImage2DARB + glCompressedTexImage1DARB + glCompressedTexSubImage3DARB + glCompressedTexSubImage2DARB + glCompressedTexSubImage1DARB + glGetCompressedTexImageARB + glActiveTexture + glClientActiveTexture + glMultiTexCoord1d + glMultiTexCoord1dv + glMultiTexCoord1f + glMultiTexCoord1fv + glMultiTexCoord1i + glMultiTexCoord1iv + glMultiTexCoord1s + glMultiTexCoord1sv + glMultiTexCoord2d + glMultiTexCoord2dv + glMultiTexCoord2f + glMultiTexCoord2fv + glMultiTexCoord2i + glMultiTexCoord2iv + glMultiTexCoord2s + glMultiTexCoord2sv + glMultiTexCoord3d + glMultiTexCoord3dv + glMultiTexCoord3f + glMultiTexCoord3fv + glMultiTexCoord3i + glMultiTexCoord3iv + glMultiTexCoord3s + glMultiTexCoord3sv + glMultiTexCoord4d + glMultiTexCoord4dv + glMultiTexCoord4f + glMultiTexCoord4fv + glMultiTexCoord4i + glMultiTexCoord4iv + glMultiTexCoord4s + glMultiTexCoord4sv + glLoadTransposeMatrixf + glLoadTransposeMatrixd + glMultTransposeMatrixf + glMultTransposeMatrixd + glSampleCoverage + glCompressedTexImage3D + glCompressedTexImage2D + glCompressedTexImage1D + glCompressedTexSubImage3D + glCompressedTexSubImage2D + glCompressedTexSubImage1D + glGetCompressedTexImage + glBlendColorEXT + glPolygonOffsetEXT + glTexImage3DEXT + glTexSubImage3DEXT + glTexSubImage1DEXT + glTexSubImage2DEXT + glCopyTexImage1DEXT + glCopyTexImage2DEXT + glCopyTexSubImage1DEXT + glCopyTexSubImage2DEXT + glCopyTexSubImage3DEXT + glAreTexturesResidentEXT + glBindTextureEXT + glDeleteTexturesEXT + glGenTexturesEXT + glIsTextureEXT + glPrioritizeTexturesEXT + glArrayElementEXT + glColorPointerEXT + glDrawArraysEXT + glEdgeFlagPointerEXT + glGetPointervEXT + glIndexPointerEXT + glNormalPointerEXT + glTexCoordPointerEXT + glVertexPointerEXT + glBlendEquationEXT + glPointParameterfEXT + glPointParameterfvEXT + glPointParameterfARB + glPointParameterfvARB + glColorTableEXT + glGetColorTableEXT + glGetColorTableParameterivEXT + glGetColorTableParameterfvEXT + glLockArraysEXT + glUnlockArraysEXT + glDrawRangeElementsEXT + glSecondaryColor3bEXT + glSecondaryColor3bvEXT + glSecondaryColor3dEXT + glSecondaryColor3dvEXT + glSecondaryColor3fEXT + glSecondaryColor3fvEXT + glSecondaryColor3iEXT + glSecondaryColor3ivEXT + glSecondaryColor3sEXT + glSecondaryColor3svEXT + glSecondaryColor3ubEXT + glSecondaryColor3ubvEXT + glSecondaryColor3uiEXT + glSecondaryColor3uivEXT + glSecondaryColor3usEXT + glSecondaryColor3usvEXT + glSecondaryColorPointerEXT + glMultiDrawArraysEXT + glMultiDrawElementsEXT + glFogCoordfEXT + glFogCoordfvEXT + glFogCoorddEXT + glFogCoorddvEXT + glFogCoordPointerEXT + glBlendFuncSeparateEXT + glFlushVertexArrayRangeNV + glVertexArrayRangeNV + glCombinerParameterfvNV + glCombinerParameterfNV + glCombinerParameterivNV + glCombinerParameteriNV + glCombinerInputNV + glCombinerOutputNV + glFinalCombinerInputNV + glGetCombinerInputParameterfvNV + glGetCombinerInputParameterivNV + glGetCombinerOutputParameterfvNV + glGetCombinerOutputParameterivNV + glGetFinalCombinerInputParameterfvNV + glGetFinalCombinerInputParameterivNV + glResizeBuffersMESA + glWindowPos2dMESA + glWindowPos2dvMESA + glWindowPos2fMESA + glWindowPos2fvMESA + glWindowPos2iMESA + glWindowPos2ivMESA + glWindowPos2sMESA + glWindowPos2svMESA + glWindowPos3dMESA + glWindowPos3dvMESA + glWindowPos3fMESA + glWindowPos3fvMESA + glWindowPos3iMESA + glWindowPos3ivMESA + glWindowPos3sMESA + glWindowPos3svMESA + glWindowPos4dMESA + glWindowPos4dvMESA + glWindowPos4fMESA + glWindowPos4fvMESA + glWindowPos4iMESA + glWindowPos4ivMESA + glWindowPos4sMESA + glWindowPos4svMESA + glWindowPos2dARB + glWindowPos2fARB + glWindowPos2iARB + glWindowPos2sARB + glWindowPos2dvARB + glWindowPos2fvARB + glWindowPos2ivARB + glWindowPos2svARB + glWindowPos3dARB + glWindowPos3fARB + glWindowPos3iARB + glWindowPos3sARB + glWindowPos3dvARB + glWindowPos3fvARB + glWindowPos3ivARB + glWindowPos3svARB + glAreProgramsResidentNV + glBindProgramNV + glDeleteProgramsNV + glExecuteProgramNV + glGenProgramsNV + glGetProgramParameterdvNV + glGetProgramParameterfvNV + glGetProgramivNV + glGetProgramStringNV + glGetTrackMatrixivNV + glGetVertexAttribdvNV + glGetVertexAttribfvNV + glGetVertexAttribivNV + glGetVertexAttribPointervNV + glIsProgramNV + glLoadProgramNV + glProgramParameter4dNV + glProgramParameter4dvNV + glProgramParameter4fNV + glProgramParameter4fvNV + glProgramParameters4dvNV + glProgramParameters4fvNV + glRequestResidentProgramsNV + glTrackMatrixNV + glVertexAttribPointerNV + glVertexAttrib1dNV + glVertexAttrib1dvNV + glVertexAttrib1fNV + glVertexAttrib1fvNV + glVertexAttrib1sNV + glVertexAttrib1svNV + glVertexAttrib2dNV + glVertexAttrib2dvNV + glVertexAttrib2fNV + glVertexAttrib2fvNV + glVertexAttrib2sNV + glVertexAttrib2svNV + glVertexAttrib3dNV + glVertexAttrib3dvNV + glVertexAttrib3fNV + glVertexAttrib3fvNV + glVertexAttrib3sNV + glVertexAttrib3svNV + glVertexAttrib4dNV + glVertexAttrib4dvNV + glVertexAttrib4fNV + glVertexAttrib4fvNV + glVertexAttrib4sNV + glVertexAttrib4svNV + glVertexAttrib4ubNV + glVertexAttrib4ubvNV + glVertexAttribs1dvNV + glVertexAttribs1fvNV + glVertexAttribs1svNV + glVertexAttribs2dvNV + glVertexAttribs2fvNV + glVertexAttribs2svNV + glVertexAttribs3dvNV + glVertexAttribs3fvNV + glVertexAttribs3svNV + glVertexAttribs4dvNV + glVertexAttribs4fvNV + glVertexAttribs4svNV + glVertexAttribs4ubvNV + glPointParameteriNV + glPointParameterivNV + glFogCoordf + glFogCoordfv + glFogCoordd + glFogCoorddv + glFogCoordPointer + glMultiDrawArrays + glMultiDrawElements + glPointParameterf + glPointParameterfv + glPointParameteri + glPointParameteriv + glSecondaryColor3b + glSecondaryColor3bv + glSecondaryColor3d + glSecondaryColor3dv + glSecondaryColor3f + glSecondaryColor3fv + glSecondaryColor3i + glSecondaryColor3iv + glSecondaryColor3s + glSecondaryColor3sv + glSecondaryColor3ub + glSecondaryColor3ubv + glSecondaryColor3ui + glSecondaryColor3uiv + glSecondaryColor3us + glSecondaryColor3usv + glSecondaryColorPointer + glWindowPos2d + glWindowPos2dv + glWindowPos2f + glWindowPos2fv + glWindowPos2i + glWindowPos2iv + glWindowPos2s + glWindowPos2sv + glWindowPos3d + glWindowPos3dv + glWindowPos3f + glWindowPos3fv + glWindowPos3i + glWindowPos3iv + glWindowPos3s + glWindowPos3sv + glVertexAttrib1sARB + glVertexAttrib1fARB + glVertexAttrib1dARB + glVertexAttrib2sARB + glVertexAttrib2fARB + glVertexAttrib2dARB + glVertexAttrib3sARB + glVertexAttrib3fARB + glVertexAttrib3dARB + glVertexAttrib4sARB + glVertexAttrib4fARB + glVertexAttrib4dARB + glVertexAttrib4NubARB + glVertexAttrib1svARB + glVertexAttrib1fvARB + glVertexAttrib1dvARB + glVertexAttrib2svARB + glVertexAttrib2fvARB + glVertexAttrib2dvARB + glVertexAttrib3svARB + glVertexAttrib3fvARB + glVertexAttrib3dvARB + glVertexAttrib4bvARB + glVertexAttrib4svARB + glVertexAttrib4ivARB + glVertexAttrib4ubvARB + glVertexAttrib4usvARB + glVertexAttrib4uivARB + glVertexAttrib4fvARB + glVertexAttrib4dvARB + glVertexAttrib4NbvARB + glVertexAttrib4NsvARB + glVertexAttrib4NivARB + glVertexAttrib4NubvARB + glVertexAttrib4NusvARB + glVertexAttrib4NuivARB + glVertexAttribPointerARB + glEnableVertexAttribArrayARB + glDisableVertexAttribArrayARB + glProgramStringARB + glBindProgramARB + glDeleteProgramsARB + glGenProgramsARB + glIsProgramARB + glProgramEnvParameter4dARB + glProgramEnvParameter4dvARB + glProgramEnvParameter4fARB + glProgramEnvParameter4fvARB + glProgramLocalParameter4dARB + glProgramLocalParameter4dvARB + glProgramLocalParameter4fARB + glProgramLocalParameter4fvARB + glGetProgramEnvParameterdvARB + glGetProgramEnvParameterfvARB + glGetProgramLocalParameterdvARB + glGetProgramLocalParameterfvARB + glGetProgramivARB + glGetProgramStringARB + glGetVertexAttribdvARB + glGetVertexAttribfvARB + glGetVertexAttribivARB + glGetVertexAttribPointervARB + glProgramNamedParameter4fNV + glProgramNamedParameter4dNV + glProgramNamedParameter4fvNV + glProgramNamedParameter4dvNV + glGetProgramNamedParameterfvNV + glGetProgramNamedParameterdvNV + glBindBufferARB + glBufferDataARB + glBufferSubDataARB + glDeleteBuffersARB + glGenBuffersARB + glGetBufferParameterivARB + glGetBufferPointervARB + glGetBufferSubDataARB + glIsBufferARB + glMapBufferARB + glUnmapBufferARB + glGenQueriesARB + glDeleteQueriesARB + glIsQueryARB + glBeginQueryARB + glEndQueryARB + glGetQueryivARB + glGetQueryObjectivARB + glGetQueryObjectuivARB + glBindBuffer + glBufferData + glBufferSubData + glDeleteBuffers + glGenBuffers + glGetBufferParameteriv + glGetBufferPointerv + glGetBufferSubData + glIsBuffer + glMapBuffer + glUnmapBuffer + glGenQueries + glDeleteQueries + glIsQuery + glBeginQuery + glEndQuery + glGetQueryiv + glGetQueryObjectiv + glGetQueryObjectuiv +; +; WGL API + wglChoosePixelFormat + wglCopyContext + wglCreateContext + wglCreateLayerContext + wglDeleteContext + wglDescribeLayerPlane + wglDescribePixelFormat + wglGetCurrentContext + wglGetCurrentDC + wglGetLayerPaletteEntries + wglGetPixelFormat + wglGetProcAddress + wglMakeCurrent + wglRealizeLayerPalette + wglSetLayerPaletteEntries + wglSetPixelFormat + wglShareLists + wglSwapBuffers + wglSwapLayerBuffers + wglUseFontBitmapsA + wglUseFontBitmapsW + wglUseFontOutlinesA + wglUseFontOutlinesW + wglGetExtensionsStringARB diff --git a/src/gallium/winsys/gdi/wgl.c b/src/gallium/winsys/gdi/wgl.c new file mode 100644 index 0000000000..3ce470480d --- /dev/null +++ b/src/gallium/winsys/gdi/wgl.c @@ -0,0 +1,701 @@ +/* + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +/* + * File name : wgl.c + * WGL stuff. Added by Oleg Letsinsky, ajl@ultersys.ru + * Some things originated from the 3Dfx WGL functions + */ + +/* + * This file contains the implementation of the wgl* functions for + * Mesa on Windows. Since these functions are provided by Windows in + * GDI/OpenGL, we must supply our versions that work with Mesa here. + */ + + +/* We're essentially building part of GDI here, so define this so that + * we get the right export linkage. */ +#ifdef __MINGW32__ + +#include <stdarg.h> +#include <windef.h> +#include <wincon.h> +#include <winbase.h> + +# if defined(BUILD_GL32) +# define WINGDIAPI __declspec(dllexport) +# else +# define __W32API_USE_DLLIMPORT__ +# endif + +#include <wingdi.h> +#include "GL/mesa_wgl.h" +#include <stdlib.h> + +#else + +#define _GDI32_ +#include <windows.h> + +#endif + +#include "glapi.h" +#include "GL/wmesa.h" /* protos for wmesa* functions */ + +/* + * Pixel Format Descriptors + */ + +/* Extend the PFD to include DB flag */ +struct __pixelformat__ +{ + PIXELFORMATDESCRIPTOR pfd; + GLboolean doubleBuffered; +}; + +/* These are the PFD's supported by this driver. */ +struct __pixelformat__ pfd[] = +{ + /* Double Buffer, alpha */ + { + { + sizeof(PIXELFORMATDESCRIPTOR), 1, + PFD_DRAW_TO_WINDOW|PFD_SUPPORT_OPENGL| + PFD_GENERIC_FORMAT|PFD_DOUBLEBUFFER|PFD_SWAP_COPY, + PFD_TYPE_RGBA, + 24, + 8, 0, + 8, 8, + 8, 16, + 8, 24, + 0, 0, 0, 0, 0, + 16, 8, + 0, 0, 0, + 0, 0, 0 + }, + GL_TRUE + }, + /* Single Buffer, alpha */ + { + { + sizeof(PIXELFORMATDESCRIPTOR), 1, + PFD_DRAW_TO_WINDOW|PFD_SUPPORT_OPENGL| + PFD_GENERIC_FORMAT, + PFD_TYPE_RGBA, + 24, + 8, 0, + 8, 8, + 8, 16, + 8, 24, + 0, 0, 0, 0, 0, + 16, 8, + 0, 0, 0, + 0, 0, 0 + }, + GL_FALSE + }, + /* Double Buffer, no alpha */ + { + { + sizeof(PIXELFORMATDESCRIPTOR), 1, + PFD_DRAW_TO_WINDOW|PFD_SUPPORT_OPENGL| + PFD_GENERIC_FORMAT|PFD_DOUBLEBUFFER|PFD_SWAP_COPY, + PFD_TYPE_RGBA, + 24, + 8, 0, + 8, 8, + 8, 16, + 0, 0, + 0, 0, 0, 0, 0, + 16, 8, + 0, 0, 0, + 0, 0, 0 + }, + GL_TRUE + }, + /* Single Buffer, no alpha */ + { + { + sizeof(PIXELFORMATDESCRIPTOR), 1, + PFD_DRAW_TO_WINDOW|PFD_SUPPORT_OPENGL| + PFD_GENERIC_FORMAT, + PFD_TYPE_RGBA, + 24, + 8, 0, + 8, 8, + 8, 16, + 0, 0, + 0, 0, 0, 0, 0, + 16, 8, + 0, 0, 0, + 0, 0, 0 + }, + GL_FALSE + }, +}; + +int npfd = sizeof(pfd) / sizeof(pfd[0]); + + +/* + * Contexts + */ + +typedef struct { + WMesaContext ctx; +} MesaWglCtx; + +#define MESAWGL_CTX_MAX_COUNT 20 + +static MesaWglCtx wgl_ctx[MESAWGL_CTX_MAX_COUNT]; + +static unsigned ctx_count = 0; +static int ctx_current = -1; +static unsigned curPFD = 0; + +static HDC CurrentHDC = 0; + + +WINGDIAPI HGLRC GLAPIENTRY wglCreateContext(HDC hdc) +{ + int i = 0; + if (!ctx_count) { + for(i=0;i<MESAWGL_CTX_MAX_COUNT;i++) { + wgl_ctx[i].ctx = NULL; + } + } + for( i = 0; i < MESAWGL_CTX_MAX_COUNT; i++ ) { + if ( wgl_ctx[i].ctx == NULL ) { + wgl_ctx[i].ctx = + WMesaCreateContext(hdc, NULL, (GLboolean)GL_TRUE, + (GLboolean) (pfd[curPFD-1].doubleBuffered ? + GL_TRUE : GL_FALSE), + (GLboolean)(pfd[curPFD-1].pfd.cAlphaBits ? + GL_TRUE : GL_FALSE) ); + if (wgl_ctx[i].ctx == NULL) + break; + ctx_count++; + return ((HGLRC)wgl_ctx[i].ctx); + } + } + SetLastError(0); + return(NULL); +} + +WINGDIAPI BOOL GLAPIENTRY wglDeleteContext(HGLRC hglrc) +{ + int i; + for ( i = 0; i < MESAWGL_CTX_MAX_COUNT; i++ ) { + if ( wgl_ctx[i].ctx == (WMesaContext) hglrc ){ + WMesaMakeCurrent((WMesaContext) hglrc, NULL); + WMesaDestroyContext(wgl_ctx[i].ctx); + wgl_ctx[i].ctx = NULL; + ctx_count--; + return(TRUE); + } + } + SetLastError(0); + return(FALSE); +} + +WINGDIAPI HGLRC GLAPIENTRY wglGetCurrentContext(VOID) +{ + if (ctx_current < 0) + return 0; + else + return (HGLRC) wgl_ctx[ctx_current].ctx; +} + +WINGDIAPI HDC GLAPIENTRY wglGetCurrentDC(VOID) +{ + return CurrentHDC; +} + +WINGDIAPI BOOL GLAPIENTRY wglMakeCurrent(HDC hdc, HGLRC hglrc) +{ + int i; + + CurrentHDC = hdc; + + if (!hdc || !hglrc) { + WMesaMakeCurrent(NULL, NULL); + ctx_current = -1; + return TRUE; + } + + for ( i = 0; i < MESAWGL_CTX_MAX_COUNT; i++ ) { + if ( wgl_ctx[i].ctx == (WMesaContext) hglrc ) { + WMesaMakeCurrent( (WMesaContext) hglrc, hdc ); + ctx_current = i; + return TRUE; + } + } + return FALSE; +} + + +WINGDIAPI int GLAPIENTRY wglChoosePixelFormat(HDC hdc, + CONST + PIXELFORMATDESCRIPTOR *ppfd) +{ + int i,best = -1,bestdelta = 0x7FFFFFFF,delta; + (void) hdc; + + if(ppfd->nSize != sizeof(PIXELFORMATDESCRIPTOR) || ppfd->nVersion != 1) + { + SetLastError(0); + return(0); + } + for(i = 0; i < npfd;i++) + { + delta = 0; + if( + (ppfd->dwFlags & PFD_DRAW_TO_WINDOW) && + !(pfd[i].pfd.dwFlags & PFD_DRAW_TO_WINDOW)) + continue; + if( + (ppfd->dwFlags & PFD_DRAW_TO_BITMAP) && + !(pfd[i].pfd.dwFlags & PFD_DRAW_TO_BITMAP)) + continue; + if( + (ppfd->dwFlags & PFD_SUPPORT_GDI) && + !(pfd[i].pfd.dwFlags & PFD_SUPPORT_GDI)) + continue; + if( + (ppfd->dwFlags & PFD_SUPPORT_OPENGL) && + !(pfd[i].pfd.dwFlags & PFD_SUPPORT_OPENGL)) + continue; + if( + !(ppfd->dwFlags & PFD_DOUBLEBUFFER_DONTCARE) && + ((ppfd->dwFlags & PFD_DOUBLEBUFFER) != + (pfd[i].pfd.dwFlags & PFD_DOUBLEBUFFER))) + continue; + if( + !(ppfd->dwFlags & PFD_STEREO_DONTCARE) && + ((ppfd->dwFlags & PFD_STEREO) != + (pfd[i].pfd.dwFlags & PFD_STEREO))) + continue; + if(ppfd->iPixelType != pfd[i].pfd.iPixelType) + delta++; + if(ppfd->cAlphaBits != pfd[i].pfd.cAlphaBits) + delta++; + if(delta < bestdelta) + { + best = i + 1; + bestdelta = delta; + if(bestdelta == 0) + break; + } + } + if(best == -1) + { + SetLastError(0); + return(0); + } + return(best); +} + +WINGDIAPI int GLAPIENTRY wglDescribePixelFormat(HDC hdc, + int iPixelFormat, + UINT nBytes, + LPPIXELFORMATDESCRIPTOR ppfd) +{ + (void) hdc; + + if(ppfd == NULL) + return(npfd); + if(iPixelFormat < 1 || iPixelFormat > npfd || + nBytes != sizeof(PIXELFORMATDESCRIPTOR)) + { + SetLastError(0); + return(0); + } + *ppfd = pfd[iPixelFormat - 1].pfd; + return(npfd); +} + +WINGDIAPI PROC GLAPIENTRY wglGetProcAddress(LPCSTR lpszProc) +{ + PROC p = (PROC) _glapi_get_proc_address((const char *) lpszProc); + if (p) + return p; + + SetLastError(0); + return(NULL); +} + +WINGDIAPI int GLAPIENTRY wglGetPixelFormat(HDC hdc) +{ + (void) hdc; + if(curPFD == 0) { + SetLastError(0); + return(0); + } + return(curPFD); +} + +WINGDIAPI BOOL GLAPIENTRY wglSetPixelFormat(HDC hdc,int iPixelFormat, + const PIXELFORMATDESCRIPTOR *ppfd) +{ + (void) hdc; + + if(iPixelFormat < 1 || iPixelFormat > npfd || + ppfd->nSize != sizeof(PIXELFORMATDESCRIPTOR)) { + SetLastError(0); + return(FALSE); + } + curPFD = iPixelFormat; + return(TRUE); +} + +WINGDIAPI BOOL GLAPIENTRY wglSwapBuffers(HDC hdc) +{ + WMesaSwapBuffers(hdc); + return TRUE; +} + +static FIXED FixedFromDouble(double d) +{ + long l = (long) (d * 65536L); + return *(FIXED *) (void *) &l; +} + + +/* +** This is cribbed from FX/fxwgl.c, and seems to implement support +** for bitmap fonts where the wglUseFontBitmapsA() code implements +** support for outline fonts. In combination they hopefully give +** fairly generic support for fonts. +*/ +static BOOL wglUseFontBitmaps_FX(HDC fontDevice, DWORD firstChar, + DWORD numChars, DWORD listBase) +{ +#define VERIFY(a) a + + TEXTMETRIC metric; + BITMAPINFO *dibInfo; + HDC bitDevice; + COLORREF tempColor; + int i; + + VERIFY(GetTextMetrics(fontDevice, &metric)); + + dibInfo = (BITMAPINFO *) calloc(sizeof(BITMAPINFO) + sizeof(RGBQUAD), 1); + dibInfo->bmiHeader.biSize = sizeof(BITMAPINFOHEADER); + dibInfo->bmiHeader.biPlanes = 1; + dibInfo->bmiHeader.biBitCount = 1; + dibInfo->bmiHeader.biCompression = BI_RGB; + + bitDevice = CreateCompatibleDC(fontDevice); + + /* Swap fore and back colors so the bitmap has the right polarity */ + tempColor = GetBkColor(bitDevice); + SetBkColor(bitDevice, GetTextColor(bitDevice)); + SetTextColor(bitDevice, tempColor); + + /* Place chars based on base line */ + VERIFY(SetTextAlign(bitDevice, TA_BASELINE) != GDI_ERROR ? 1 : 0); + + for(i = 0; i < (int)numChars; i++) { + SIZE size; + char curChar; + int charWidth,charHeight,bmapWidth,bmapHeight,numBytes,res; + HBITMAP bitObject; + HGDIOBJ origBmap; + unsigned char *bmap; + + curChar = (char)(i + firstChar); + + /* Find how high/wide this character is */ + VERIFY(GetTextExtentPoint32(bitDevice, (LPCWSTR)&curChar, 1, &size)); + + /* Create the output bitmap */ + charWidth = size.cx; + charHeight = size.cy; + /* Round up to the next multiple of 32 bits */ + bmapWidth = ((charWidth + 31) / 32) * 32; + bmapHeight = charHeight; + bitObject = CreateCompatibleBitmap(bitDevice, + bmapWidth, + bmapHeight); + /* VERIFY(bitObject); */ + + /* Assign the output bitmap to the device */ + origBmap = SelectObject(bitDevice, bitObject); + (void) VERIFY(origBmap); + + VERIFY( PatBlt( bitDevice, 0, 0, bmapWidth, bmapHeight,BLACKNESS ) ); + + /* Use our source font on the device */ + VERIFY(SelectObject(bitDevice, GetCurrentObject(fontDevice,OBJ_FONT))); + + /* Draw the character */ + VERIFY(TextOut(bitDevice, 0, metric.tmAscent, (LPCWSTR)&curChar, 1)); + + /* Unselect our bmap object */ + VERIFY(SelectObject(bitDevice, origBmap)); + + /* Convert the display dependant representation to a 1 bit deep DIB */ + numBytes = (bmapWidth * bmapHeight) / 8; + bmap = (unsigned char *)malloc(numBytes); + dibInfo->bmiHeader.biWidth = bmapWidth; + dibInfo->bmiHeader.biHeight = bmapHeight; + res = GetDIBits(bitDevice, bitObject, 0, bmapHeight, bmap, + dibInfo, + DIB_RGB_COLORS); + /* VERIFY(res); */ + + /* Create the GL object */ + glNewList(i + listBase, GL_COMPILE); + glBitmap(bmapWidth, bmapHeight, 0.0, (GLfloat)metric.tmDescent, + (GLfloat)charWidth, 0.0, + bmap); + glEndList(); + /* CheckGL(); */ + + /* Destroy the bmap object */ + DeleteObject(bitObject); + + /* Deallocate the bitmap data */ + free(bmap); + } + + /* Destroy the DC */ + VERIFY(DeleteDC(bitDevice)); + + free(dibInfo); + + return TRUE; +#undef VERIFY +} + +WINGDIAPI BOOL GLAPIENTRY wglUseFontBitmapsA(HDC hdc, DWORD first, + DWORD count, DWORD listBase) +{ + int i; + GLuint font_list; + DWORD size; + GLYPHMETRICS gm; + HANDLE hBits; + LPSTR lpBits; + MAT2 mat; + int success = TRUE; + + if (count == 0) + return FALSE; + + font_list = listBase; + + mat.eM11 = FixedFromDouble(1); + mat.eM12 = FixedFromDouble(0); + mat.eM21 = FixedFromDouble(0); + mat.eM22 = FixedFromDouble(-1); + + memset(&gm,0,sizeof(gm)); + + /* + ** If we can't get the glyph outline, it may be because this is a fixed + ** font. Try processing it that way. + */ + if( GetGlyphOutline(hdc, first, GGO_BITMAP, &gm, 0, NULL, &mat) + == GDI_ERROR ) { + return wglUseFontBitmaps_FX( hdc, first, count, listBase ); + } + + /* + ** Otherwise process all desired characters. + */ + for (i = 0; i < (int)count; i++) { + DWORD err; + + glNewList( font_list+i, GL_COMPILE ); + + /* allocate space for the bitmap/outline */ + size = GetGlyphOutline(hdc, first + i, GGO_BITMAP, + &gm, 0, NULL, &mat); + if (size == GDI_ERROR) { + glEndList( ); + err = GetLastError(); + success = FALSE; + continue; + } + + hBits = GlobalAlloc(GHND, size+1); + lpBits = GlobalLock(hBits); + + err = + GetGlyphOutline(hdc, /* handle to device context */ + first + i, /* character to query */ + GGO_BITMAP, /* format of data to return */ + &gm, /* ptr to structure for metrics*/ + size, /* size of buffer for data */ + lpBits, /* pointer to buffer for data */ + &mat /* pointer to transformation */ + /* matrix structure */ + ); + + if (err == GDI_ERROR) { + GlobalUnlock(hBits); + GlobalFree(hBits); + + glEndList( ); + err = GetLastError(); + success = FALSE; + continue; + } + + glBitmap(gm.gmBlackBoxX,gm.gmBlackBoxY, + (GLfloat)-gm.gmptGlyphOrigin.x, + (GLfloat)gm.gmptGlyphOrigin.y, + (GLfloat)gm.gmCellIncX, + (GLfloat)gm.gmCellIncY, + (const GLubyte * )lpBits); + + GlobalUnlock(hBits); + GlobalFree(hBits); + + glEndList( ); + } + + return success; +} + + + +/* NOT IMPLEMENTED YET */ +WINGDIAPI BOOL GLAPIENTRY wglCopyContext(HGLRC hglrcSrc, + HGLRC hglrcDst, + UINT mask) +{ + (void) hglrcSrc; (void) hglrcDst; (void) mask; + return(FALSE); +} + +WINGDIAPI HGLRC GLAPIENTRY wglCreateLayerContext(HDC hdc, + int iLayerPlane) +{ + (void) hdc; (void) iLayerPlane; + SetLastError(0); + return(NULL); +} + +WINGDIAPI BOOL GLAPIENTRY wglShareLists(HGLRC hglrc1, + HGLRC hglrc2) +{ + (void) hglrc1; (void) hglrc2; + return(TRUE); +} + + +WINGDIAPI BOOL GLAPIENTRY wglUseFontBitmapsW(HDC hdc, + DWORD first, + DWORD count, + DWORD listBase) +{ + (void) hdc; (void) first; (void) count; (void) listBase; + return FALSE; +} + +WINGDIAPI BOOL GLAPIENTRY wglUseFontOutlinesA(HDC hdc, + DWORD first, + DWORD count, + DWORD listBase, + FLOAT deviation, + FLOAT extrusion, + int format, + LPGLYPHMETRICSFLOAT lpgmf) +{ + (void) hdc; (void) first; (void) count; + (void) listBase; (void) deviation; (void) extrusion; (void) format; + (void) lpgmf; + SetLastError(0); + return(FALSE); +} + +WINGDIAPI BOOL GLAPIENTRY wglUseFontOutlinesW(HDC hdc, + DWORD first, + DWORD count, + DWORD listBase, + FLOAT deviation, + FLOAT extrusion, + int format, + LPGLYPHMETRICSFLOAT lpgmf) +{ + (void) hdc; (void) first; (void) count; + (void) listBase; (void) deviation; (void) extrusion; (void) format; + (void) lpgmf; + SetLastError(0); + return(FALSE); +} + +WINGDIAPI BOOL GLAPIENTRY wglDescribeLayerPlane(HDC hdc, + int iPixelFormat, + int iLayerPlane, + UINT nBytes, + LPLAYERPLANEDESCRIPTOR plpd) +{ + (void) hdc; (void) iPixelFormat; (void) iLayerPlane; + (void) nBytes; (void) plpd; + SetLastError(0); + return(FALSE); +} + +WINGDIAPI int GLAPIENTRY wglSetLayerPaletteEntries(HDC hdc, + int iLayerPlane, + int iStart, + int cEntries, + CONST COLORREF *pcr) +{ + (void) hdc; (void) iLayerPlane; (void) iStart; + (void) cEntries; (void) pcr; + SetLastError(0); + return(0); +} + +WINGDIAPI int GLAPIENTRY wglGetLayerPaletteEntries(HDC hdc, + int iLayerPlane, + int iStart, + int cEntries, + COLORREF *pcr) +{ + (void) hdc; (void) iLayerPlane; (void) iStart; (void) cEntries; (void) pcr; + SetLastError(0); + return(0); +} + +WINGDIAPI BOOL GLAPIENTRY wglRealizeLayerPalette(HDC hdc, + int iLayerPlane, + BOOL bRealize) +{ + (void) hdc; (void) iLayerPlane; (void) bRealize; + SetLastError(0); + return(FALSE); +} + +WINGDIAPI BOOL GLAPIENTRY wglSwapLayerBuffers(HDC hdc, + UINT fuPlanes) +{ + (void) hdc; (void) fuPlanes; + SetLastError(0); + return(FALSE); +} + +WINGDIAPI const char * GLAPIENTRY wglGetExtensionsStringARB(HDC hdc) +{ + return "WGL_ARB_extensions_string"; +} diff --git a/src/gallium/winsys/gdi/wmesa.c b/src/gallium/winsys/gdi/wmesa.c new file mode 100644 index 0000000000..ed3dd2b927 --- /dev/null +++ b/src/gallium/winsys/gdi/wmesa.c @@ -0,0 +1,823 @@ +/* + * Windows (Win32/Win64) device driver for Mesa + * + */ + +#include "mtypes.h" +#include <GL/wmesa.h> +#include "wmesadef.h" + +#undef Elements + +#include "pipe/p_winsys.h" +#include "pipe/p_format.h" +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" +#include "util/u_memory.h" +#include "softpipe/sp_winsys.h" +#include "glapi/glapi.h" +#include "colors.h" + +extern GLvisual * +_mesa_create_visual( GLboolean rgbFlag, + GLboolean dbFlag, + GLboolean stereoFlag, + GLint redBits, + GLint greenBits, + GLint blueBits, + GLint alphaBits, + GLint indexBits, + GLint depthBits, + GLint stencilBits, + GLint accumRedBits, + GLint accumGreenBits, + GLint accumBlueBits, + GLint accumAlphaBits, + GLint numSamples ); + +/* linked list of our Framebuffers (windows) */ +WMesaFramebuffer FirstFramebuffer = NULL; + +struct wmesa_pipe_winsys +{ + struct pipe_winsys base; +}; + +/** + * Choose the pixel format for the given visual. + * This will tell the gallium driver how to pack pixel data into + * drawing surfaces. + */ +static GLuint +choose_pixel_format(GLvisual *v) +{ +#if 1 + return PIPE_FORMAT_A8R8G8B8_UNORM; +#else + if ( GET_REDMASK(v) == 0x0000ff + && GET_GREENMASK(v) == 0x00ff00 + && GET_BLUEMASK(v) == 0xff0000 + && v->BitsPerPixel == 32) { + if (CHECK_BYTE_ORDER(v)) { + /* no byteswapping needed */ + return 0 /* PIXEL_FORMAT_U_A8_B8_G8_R8 */; + } + else { + return PIPE_FORMAT_R8G8B8A8_UNORM; + } + } + else if ( GET_REDMASK(v) == 0xff0000 + && GET_GREENMASK(v) == 0x00ff00 + && GET_BLUEMASK(v) == 0x0000ff + && v->BitsPerPixel == 32) { + if (CHECK_BYTE_ORDER(v)) { + /* no byteswapping needed */ + return PIPE_FORMAT_A8R8G8B8_UNORM; + } + else { + return PIPE_FORMAT_B8G8R8A8_UNORM; + } + } + else if ( GET_REDMASK(v) == 0xf800 + && GET_GREENMASK(v) == 0x07e0 + && GET_BLUEMASK(v) == 0x001f + && CHECK_BYTE_ORDER(v) + && v->BitsPerPixel == 16) { + /* 5-6-5 RGB */ + return PIPE_FORMAT_R5G6B5_UNORM; + } + +printf("BITS %d\n",v->BitsPerPixel); + assert(0); + return 0; +#endif +} + +/* + * Determine the pixel format based on the pixel size. + */ +static void wmSetPixelFormat(WMesaFramebuffer pwfb, HDC hDC) +{ + /* Only 16 and 32 bit targets are supported now */ + assert(pwfb->cColorBits == 0 || + pwfb->cColorBits == 16 || + pwfb->cColorBits == 32); + + switch(pwfb->cColorBits){ + case 8: + pwfb->pixelformat = PF_INDEX8; + break; + case 16: + pwfb->pixelformat = PF_5R6G5B; + break; + case 32: + pwfb->pixelformat = PF_8R8G8B; + break; + default: + pwfb->pixelformat = PF_BADFORMAT; + } +} + +/** + * Create a new WMesaFramebuffer object which will correspond to the + * given HDC (Window handle). + */ +WMesaFramebuffer +wmesa_new_framebuffer(HDC hdc, GLvisual *visual, GLuint width, GLuint height) +{ + WMesaFramebuffer pwfb + = (WMesaFramebuffer) malloc(sizeof(struct wmesa_framebuffer)); + if (pwfb) { + enum pipe_format colorFormat, depthFormat, stencilFormat; + + /* determine PIPE_FORMATs for buffers */ + colorFormat = choose_pixel_format(visual); + + if (visual->depthBits == 0) + depthFormat = PIPE_FORMAT_NONE; + else if (visual->depthBits <= 16) + depthFormat = PIPE_FORMAT_Z16_UNORM; + else if (visual->depthBits <= 24) + depthFormat = PIPE_FORMAT_S8Z24_UNORM; + else + depthFormat = PIPE_FORMAT_Z32_UNORM; + + if (visual->stencilBits == 8) { + if (depthFormat == PIPE_FORMAT_S8Z24_UNORM) + stencilFormat = depthFormat; + else + stencilFormat = PIPE_FORMAT_S8_UNORM; + } + else { + stencilFormat = PIPE_FORMAT_NONE; + } + + pwfb->stfb = st_create_framebuffer(visual, + colorFormat, depthFormat, stencilFormat, + width, height, + (void *) pwfb); + + pwfb->cColorBits = GetDeviceCaps(hdc, BITSPIXEL); + + pwfb->hDC = hdc; + /* insert at head of list */ + pwfb->next = FirstFramebuffer; + FirstFramebuffer = pwfb; + } + return pwfb; +} + +/** + * Given an hdc, free the corresponding WMesaFramebuffer + */ +void +wmesa_free_framebuffer(HDC hdc) +{ + WMesaFramebuffer pwfb, prev; + for (pwfb = FirstFramebuffer; pwfb; pwfb = pwfb->next) { + if (pwfb->hDC == hdc) + break; + prev = pwfb; + } + if (pwfb) { + if (pwfb == FirstFramebuffer) + FirstFramebuffer = pwfb->next; + else + prev->next = pwfb->next; + free(pwfb); + } +} + +/** + * Given an hdc, return the corresponding WMesaFramebuffer + */ +WMesaFramebuffer +wmesa_lookup_framebuffer(HDC hdc) +{ + WMesaFramebuffer pwfb; + for (pwfb = FirstFramebuffer; pwfb; pwfb = pwfb->next) { + if (pwfb->hDC == hdc) + return pwfb; + } + return NULL; +} + + +/** + * Given a GLframebuffer, return the corresponding WMesaFramebuffer. + */ +static WMesaFramebuffer wmesa_framebuffer(GLframebuffer *fb) +{ + return (WMesaFramebuffer) fb; +} + + +/** + * Given a GLcontext, return the corresponding WMesaContext. + */ +static WMesaContext wmesa_context(const GLcontext *ctx) +{ + return (WMesaContext) ctx; +} + +/** + * Find the width and height of the window named by hdc. + */ +static void +get_window_size(HDC hdc, GLuint *width, GLuint *height) +{ + if (WindowFromDC(hdc)) { + RECT rect; + GetClientRect(WindowFromDC(hdc), &rect); + *width = rect.right - rect.left; + *height = rect.bottom - rect.top; + } + else { /* Memory context */ + /* From contributed code - use the size of the desktop + * for the size of a memory context (?) */ + *width = GetDeviceCaps(hdc, HORZRES); + *height = GetDeviceCaps(hdc, VERTRES); + } +} + +/** + * Low-level OS/window system memory buffer + */ +struct wm_buffer +{ + struct pipe_buffer base; + boolean userBuffer; /** Is this a user-space buffer? */ + void *data; + void *mapped; +}; + +struct wmesa_surface +{ + struct pipe_surface surface; + + int no_swap; +}; + + +/** Cast wrapper */ +static INLINE struct wmesa_surface * +wmesa_surface(struct pipe_surface *ps) +{ +// assert(0); + return (struct wmesa_surface *) ps; +} + +/** + * Turn the softpipe opaque buffer pointer into a dri_bufmgr opaque + * buffer pointer... + */ +static INLINE struct wm_buffer * +wm_buffer( struct pipe_buffer *buf ) +{ + return (struct wm_buffer *)buf; +} + + + +/* Most callbacks map direcly onto dri_bufmgr operations: + */ +static void * +wm_buffer_map(struct pipe_winsys *pws, struct pipe_buffer *buf, + unsigned flags) +{ + struct wm_buffer *wm_buf = wm_buffer(buf); + wm_buf->mapped = wm_buf->data; + return wm_buf->mapped; +} + +static void +wm_buffer_unmap(struct pipe_winsys *pws, struct pipe_buffer *buf) +{ + struct wm_buffer *wm_buf = wm_buffer(buf); + wm_buf->mapped = NULL; +} + +static void +wm_buffer_destroy(struct pipe_winsys *pws, + struct pipe_buffer *buf) +{ + struct wm_buffer *oldBuf = wm_buffer(buf); + + if (oldBuf->data) { + { + if (!oldBuf->userBuffer) { + align_free(oldBuf->data); + } + } + + oldBuf->data = NULL; + } + + free(oldBuf); +} + + +static void +wm_flush_frontbuffer(struct pipe_winsys *pws, + struct pipe_surface *surf, + void *context_private) +{ + WMesaContext pwc = context_private; + WMesaFramebuffer pwfb = wmesa_lookup_framebuffer(pwc->hDC); + struct wm_buffer *wm_buf; + BITMAPINFO bmi, *pbmi; + + wm_buf = wm_buffer(surf->buffer); + + pbmi = &bmi; + memset(pbmi, 0, sizeof(BITMAPINFO)); + pbmi->bmiHeader.biSize = sizeof(BITMAPINFOHEADER); + pbmi->bmiHeader.biWidth = pwfb->stfb->Base.Width; + pbmi->bmiHeader.biHeight= -((long)pwfb->stfb->Base.Height); + pbmi->bmiHeader.biPlanes = 1; + pbmi->bmiHeader.biBitCount = pwfb->cColorBits; + pbmi->bmiHeader.biCompression = BI_RGB; + pbmi->bmiHeader.biSizeImage = 0; + pbmi->bmiHeader.biXPelsPerMeter = 0; + pbmi->bmiHeader.biYPelsPerMeter = 0; + pbmi->bmiHeader.biClrUsed = 0; + pbmi->bmiHeader.biClrImportant = 0; + + StretchDIBits(pwfb->hDC, 0, 0, pwfb->stfb->Base.Width, pwfb->stfb->Base.Height, 0, 0, pwfb->stfb->Base.Width, pwfb->stfb->Base.Height, wm_buf->data, pbmi, 0, SRCCOPY); +} + + + +static const char * +wm_get_name(struct pipe_winsys *pws) +{ + return "gdi"; +} + +static struct pipe_buffer * +wm_buffer_create(struct pipe_winsys *pws, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct wm_buffer *buffer = CALLOC_STRUCT(wm_buffer); + + buffer->base.refcount = 1; + buffer->base.alignment = alignment; + buffer->base.usage = usage; + buffer->base.size = size; + + if (buffer->data == NULL) { + /* align to 16-byte multiple for Cell */ + buffer->data = align_malloc(size, max(alignment, 16)); + } + + return &buffer->base; +} + + +/** + * Create buffer which wraps user-space data. + */ +static struct pipe_buffer * +wm_user_buffer_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) +{ + struct wm_buffer *buffer = CALLOC_STRUCT(wm_buffer); + buffer->base.refcount = 1; + buffer->base.size = bytes; + buffer->userBuffer = TRUE; + buffer->data = ptr; + + return &buffer->base; +} + + + +/** + * Round n up to next multiple. + */ +static INLINE unsigned +round_up(unsigned n, unsigned multiple) +{ + return (n + multiple - 1) & ~(multiple - 1); +} + +static int +wm_surface_alloc_storage(struct pipe_winsys *winsys, + struct pipe_surface *surf, + unsigned width, unsigned height, + enum pipe_format format, + unsigned flags, + unsigned tex_usage) +{ + const unsigned alignment = 64; + + surf->width = width; + surf->height = height; + surf->format = format; + pf_get_block(format, &surf->block); + surf->nblocksx = pf_get_nblocksx(&surf->block, width); + surf->nblocksy = pf_get_nblocksy(&surf->block, height); + surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); + + assert(!surf->buffer); + surf->buffer = winsys->buffer_create(winsys, alignment, + PIPE_BUFFER_USAGE_PIXEL, + surf->nblocksy * surf->stride); + if(!surf->buffer) + return -1; + + return 0; +} + + +/** + * Called via winsys->surface_alloc() to create new surfaces. + */ +static struct pipe_surface * +wm_surface_alloc(struct pipe_winsys *ws) +{ + struct wmesa_surface *wms = CALLOC_STRUCT(wmesa_surface); + static boolean no_swap = 0; + static boolean firsttime = 1; + + if (firsttime) { + no_swap = getenv("SP_NO_RAST") != NULL; + firsttime = 0; + } + + assert(ws); + + wms->surface.refcount = 1; + wms->surface.winsys = ws; + + wms->no_swap = no_swap; + + return &wms->surface; +} + +static void +wm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) +{ + struct pipe_surface *surf = *s; + surf->refcount--; + if (surf->refcount == 0) { + if (surf->buffer) + winsys_buffer_reference(winsys, &surf->buffer, NULL); + free(surf); + } + *s = NULL; +} + + +/* + * Fence functions - basically nothing to do, as we don't create any actual + * fence objects. + */ + +static void +wm_fence_reference(struct pipe_winsys *sws, struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ +} + + +static int +wm_fence_signalled(struct pipe_winsys *sws, struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + +static int +wm_fence_finish(struct pipe_winsys *sws, struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + + +struct pipe_winsys * +wmesa_get_pipe_winsys(GLvisual *visual) +{ + static struct wmesa_pipe_winsys *ws = NULL; + + if (!ws) { + ws = CALLOC_STRUCT(wmesa_pipe_winsys); + + /* Fill in this struct with callbacks that pipe will need to + * communicate with the window system, buffer manager, etc. + */ + ws->base.buffer_create = wm_buffer_create; + ws->base.user_buffer_create = wm_user_buffer_create; + ws->base.buffer_map = wm_buffer_map; + ws->base.buffer_unmap = wm_buffer_unmap; + ws->base.buffer_destroy = wm_buffer_destroy; + + ws->base.surface_alloc = wm_surface_alloc; + ws->base.surface_alloc_storage = wm_surface_alloc_storage; + ws->base.surface_release = wm_surface_release; + + ws->base.fence_reference = wm_fence_reference; + ws->base.fence_signalled = wm_fence_signalled; + ws->base.fence_finish = wm_fence_finish; + + ws->base.flush_frontbuffer = wm_flush_frontbuffer; + ws->base.get_name = wm_get_name; + } + + return &ws->base; +} + + + +/**********************************************************************/ +/***** WMESA Functions *****/ +/**********************************************************************/ + +WMesaContext WMesaCreateContext(HDC hDC, + HPALETTE* Pal, + GLboolean rgb_flag, + GLboolean db_flag, + GLboolean alpha_flag) +{ + WMesaContext c; + struct pipe_winsys *pws; + struct pipe_context *pipe; + struct pipe_screen *screen; + GLint red_bits, green_bits, blue_bits, alpha_bits; + GLvisual *visual; + + (void) Pal; + + /* Indexed mode not supported */ + if (!rgb_flag) + return NULL; + + /* Allocate wmesa context */ + c = CALLOC_STRUCT(wmesa_context); + if (!c) + return NULL; + + c->hDC = hDC; + + /* Get data for visual */ + /* Dealing with this is actually a bit of overkill because Mesa will end + * up treating all color component size requests less than 8 by using + * a single byte per channel. In addition, the interface to the span + * routines passes colors as an entire byte per channel anyway, so there + * is nothing to be saved by telling the visual to be 16 bits if the device + * is 16 bits. That is, Mesa is going to compute colors down to 8 bits per + * channel anyway. + * But we go through the motions here anyway. + */ + c->cColorBits = GetDeviceCaps(c->hDC, BITSPIXEL); + + switch (c->cColorBits) { + case 16: + red_bits = green_bits = blue_bits = 5; + alpha_bits = 0; + break; + default: + red_bits = green_bits = blue_bits = 8; + alpha_bits = 8; + break; + } + /* Create visual based on flags */ + visual = _mesa_create_visual(rgb_flag, + db_flag, /* db_flag */ + GL_FALSE, /* stereo */ + red_bits, green_bits, blue_bits, /* color RGB */ + alpha_flag ? alpha_bits : 0, /* color A */ + 0, /* index bits */ + DEFAULT_SOFTWARE_DEPTH_BITS, /* depth_bits */ + 8, /* stencil_bits */ + 16,16,16, /* accum RGB */ + alpha_flag ? 16 : 0, /* accum A */ + 1); /* num samples */ + + if (!visual) { + _mesa_free(c); + return NULL; + } + + pws = wmesa_get_pipe_winsys(visual); + + screen = softpipe_create_screen(pws); + + if (!screen) { + _mesa_free(c); + return NULL; + } + + pipe = softpipe_create(screen, pws, NULL); + + if (!pipe) { + /* FIXME - free screen */ + _mesa_free(c); + return NULL; + } + + pipe->priv = c; + + c->st = st_create_context(pipe, visual, NULL); + + c->st->ctx->DriverCtx = c; + + return c; +} + + +void WMesaDestroyContext( WMesaContext pwc ) +{ + GLcontext *ctx = pwc->st->ctx; + WMesaFramebuffer pwfb; + GET_CURRENT_CONTEXT(cur_ctx); + + if (cur_ctx == ctx) { + /* unbind current if deleting current context */ + WMesaMakeCurrent(NULL, NULL); + } + + /* clean up frame buffer resources */ + pwfb = wmesa_lookup_framebuffer(pwc->hDC); + if (pwfb) { + wmesa_free_framebuffer(pwc->hDC); + } + + /* Release for device, not memory contexts */ + if (WindowFromDC(pwc->hDC) != NULL) + { + ReleaseDC(WindowFromDC(pwc->hDC), pwc->hDC); + } + + st_destroy_context(pwc->st); + _mesa_free(pwc); +} + + +void WMesaMakeCurrent(WMesaContext c, HDC hdc) +{ + GLuint width = 0, height = 0; + WMesaFramebuffer pwfb; + + { + /* return if already current */ + GET_CURRENT_CONTEXT(ctx); + WMesaContext pwc = wmesa_context(ctx); + if (pwc && c == pwc && pwc->hDC == hdc) + return; + } + + pwfb = wmesa_lookup_framebuffer(hdc); + + if (hdc) { + get_window_size(hdc, &width, &height); + } + + /* Lazy creation of framebuffers */ + if (c && !pwfb && (hdc != 0)) { + GLvisual *visual = &c->st->ctx->Visual; + + pwfb = wmesa_new_framebuffer(hdc, visual, width, height); + } + + if (c && pwfb) { + st_make_current(c->st, pwfb->stfb, pwfb->stfb); + + st_resize_framebuffer(pwfb->stfb, width, height); + } + else { + /* Detach */ + st_make_current( NULL, NULL, NULL ); + } +} + + +void WMesaSwapBuffers( HDC hdc ) +{ + struct pipe_surface *surf; + struct wm_buffer *wm_buf; + WMesaFramebuffer pwfb = wmesa_lookup_framebuffer(hdc); + BITMAPINFO bmi, *pbmi; + + if (!pwfb) { + _mesa_problem(NULL, "wmesa: swapbuffers on unknown hdc"); + return; + } + + + /* If we're swapping the buffer associated with the current context + * we have to flush any pending rendering commands first. + */ + st_notify_swapbuffers(pwfb->stfb); + + surf = st_get_framebuffer_surface(pwfb->stfb, ST_SURFACE_BACK_LEFT); + wm_buf = wm_buffer(surf->buffer); + + pbmi = &bmi; + memset(pbmi, 0, sizeof(BITMAPINFO)); + pbmi->bmiHeader.biSize = sizeof(BITMAPINFOHEADER); + pbmi->bmiHeader.biWidth = pwfb->stfb->Base.Width; + pbmi->bmiHeader.biHeight= -((long)pwfb->stfb->Base.Height); + pbmi->bmiHeader.biPlanes = 1; + pbmi->bmiHeader.biBitCount = pwfb->cColorBits; + pbmi->bmiHeader.biCompression = BI_RGB; + pbmi->bmiHeader.biSizeImage = 0; + pbmi->bmiHeader.biXPelsPerMeter = 0; + pbmi->bmiHeader.biYPelsPerMeter = 0; + pbmi->bmiHeader.biClrUsed = 0; + pbmi->bmiHeader.biClrImportant = 0; + + StretchDIBits(pwfb->hDC, 0, 0, pwfb->stfb->Base.Width, pwfb->stfb->Base.Height, 0, 0, pwfb->stfb->Base.Width, pwfb->stfb->Base.Height, wm_buf->data, pbmi, 0, SRCCOPY); + + { + GLuint width = 0, height = 0; + + get_window_size(pwfb->hDC, &width, &height); + + st_resize_framebuffer(pwfb->stfb, width, height); + } +} + +/* This is hopefully a temporary hack to define some needed dispatch + * table entries. Hopefully, I'll find a better solution. The + * dispatch table generation scripts ought to be making these dummy + * stubs as well. */ +#if !defined(__MINGW32__) || !defined(GL_NO_STDCALL) +void gl_dispatch_stub_543(void){} +void gl_dispatch_stub_544(void){} +void gl_dispatch_stub_545(void){} +void gl_dispatch_stub_546(void){} +void gl_dispatch_stub_547(void){} +void gl_dispatch_stub_548(void){} +void gl_dispatch_stub_549(void){} +void gl_dispatch_stub_550(void){} +void gl_dispatch_stub_551(void){} +void gl_dispatch_stub_552(void){} +void gl_dispatch_stub_553(void){} +void gl_dispatch_stub_554(void){} +void gl_dispatch_stub_555(void){} +void gl_dispatch_stub_556(void){} +void gl_dispatch_stub_557(void){} +void gl_dispatch_stub_558(void){} +void gl_dispatch_stub_559(void){} +void gl_dispatch_stub_560(void){} +void gl_dispatch_stub_561(void){} +void gl_dispatch_stub_565(void){} +void gl_dispatch_stub_566(void){} +void gl_dispatch_stub_577(void){} +void gl_dispatch_stub_578(void){} +void gl_dispatch_stub_603(void){} +void gl_dispatch_stub_645(void){} +void gl_dispatch_stub_646(void){} +void gl_dispatch_stub_647(void){} +void gl_dispatch_stub_648(void){} +void gl_dispatch_stub_649(void){} +void gl_dispatch_stub_650(void){} +void gl_dispatch_stub_651(void){} +void gl_dispatch_stub_652(void){} +void gl_dispatch_stub_653(void){} +void gl_dispatch_stub_733(void){} +void gl_dispatch_stub_734(void){} +void gl_dispatch_stub_735(void){} +void gl_dispatch_stub_736(void){} +void gl_dispatch_stub_737(void){} +void gl_dispatch_stub_738(void){} +void gl_dispatch_stub_744(void){} +void gl_dispatch_stub_745(void){} +void gl_dispatch_stub_746(void){} +void gl_dispatch_stub_760(void){} +void gl_dispatch_stub_761(void){} +void gl_dispatch_stub_763(void){} +void gl_dispatch_stub_765(void){} +void gl_dispatch_stub_766(void){} +void gl_dispatch_stub_767(void){} +void gl_dispatch_stub_768(void){} + +void gl_dispatch_stub_562(void){} +void gl_dispatch_stub_563(void){} +void gl_dispatch_stub_564(void){} +void gl_dispatch_stub_567(void){} +void gl_dispatch_stub_568(void){} +void gl_dispatch_stub_569(void){} +void gl_dispatch_stub_580(void){} +void gl_dispatch_stub_581(void){} +void gl_dispatch_stub_606(void){} +void gl_dispatch_stub_654(void){} +void gl_dispatch_stub_655(void){} +void gl_dispatch_stub_656(void){} +void gl_dispatch_stub_739(void){} +void gl_dispatch_stub_740(void){} +void gl_dispatch_stub_741(void){} +void gl_dispatch_stub_748(void){} +void gl_dispatch_stub_749(void){} +void gl_dispatch_stub_769(void){} +void gl_dispatch_stub_770(void){} +void gl_dispatch_stub_771(void){} +void gl_dispatch_stub_772(void){} +void gl_dispatch_stub_773(void){} + +#endif diff --git a/src/gallium/winsys/gdi/wmesadef.h b/src/gallium/winsys/gdi/wmesadef.h new file mode 100644 index 0000000000..fb8ce30a08 --- /dev/null +++ b/src/gallium/winsys/gdi/wmesadef.h @@ -0,0 +1,40 @@ +#ifndef WMESADEF_H +#define WMESADEF_H +#ifdef __MINGW32__ +#include <windows.h> +#endif +#if 0 +#include "context.h" +#endif +#include "state_tracker/st_context.h" +#include "state_tracker/st_public.h" + + +/** + * The Windows Mesa rendering context, derived from GLcontext. + */ +struct wmesa_context { + struct st_context *st; + HDC hDC; + BYTE cColorBits; +}; + +/** + * Windows framebuffer, derived from gl_framebuffer + */ +struct wmesa_framebuffer +{ + struct st_framebuffer *stfb; + HDC hDC; + int pixelformat; + BYTE cColorBits; + HDC dib_hDC; + HBITMAP hbmDIB; + HBITMAP hOldBitmap; + PBYTE pbPixels; + struct wmesa_framebuffer *next; +}; + +typedef struct wmesa_framebuffer *WMesaFramebuffer; + +#endif /* WMESADEF_H */ diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile new file mode 100644 index 0000000000..11c7632411 --- /dev/null +++ b/src/gallium/winsys/xlib/Makefile @@ -0,0 +1,94 @@ +# src/gallium/winsys/xlib/Makefile + +# This makefile produces a "stand-alone" libGL.so which is based on +# Xlib (no DRI HW acceleration) + + +TOP = ../../../.. +include $(TOP)/configs/current + + +GL_MAJOR = 1 +GL_MINOR = 5 +GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) + + +INCLUDE_DIRS = \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/auxiliary + +XLIB_WINSYS_SOURCES = \ + glxapi.c \ + fakeglx.c \ + xfonts.c \ + xm_api.c \ + xm_winsys.c \ + xm_winsys_aub.c \ + brw_aub.c + +XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o) + + +# Note: CELL_SPU_LIB is only defined for cell configs + +LIBS = \ + $(GALLIUM_DRIVERS) \ + $(TOP)/src/mesa/libglapi.a \ + $(TOP)/src/mesa/libmesa.a \ + $(GALLIUM_AUXILIARIES) \ + $(CELL_SPU_LIB) \ + + +.SUFFIXES : .cpp + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDE_DIRS) $(CXXFLAGS) $< -o $@ + + + +default: $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) + + +# Make the libGL.so library +$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(XLIB_WINSYS_OBJECTS) $(LIBS) + $(TOP)/bin/mklib -o $(GL_LIB) \ + -linker "$(CC)" \ + -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ + -install $(TOP)/$(LIB_DIR) \ + $(MKLIB_OPTIONS) $(XLIB_WINSYS_OBJECTS) \ + --start-group $(LIBS) --end-group $(GL_LIB_DEPS) + + +depend: $(ALL_SOURCES) + @ echo "running $(MKDEP)" + @ rm -f depend # workaround oops on gutsy?!? + @ touch depend + @ $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(ALL_SOURCES) \ + > /dev/null 2>/dev/null + + +install: default + $(INSTALL) -d $(INSTALL_DIR)/include/GL + $(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR) + $(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL + @if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \ + $(INSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \ + fi + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h + +clean: + -rm -f *.o + + +include depend diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript new file mode 100644 index 0000000000..3aef3b6ced --- /dev/null +++ b/src/gallium/winsys/xlib/SConscript @@ -0,0 +1,49 @@ +####################################################################### +# SConscript for xlib winsys + +Import('*') + +if env['platform'] == 'linux' \ + and 'mesa' in env['statetrackers'] \ + and ('softpipe' or 'i915simple' or 'trace') in env['drivers'] \ + and not env['dri']: + + env = env.Clone() + + env.Append(CPPPATH = [ + '#/src/mesa', + '#/src/mesa/main', + ]) + + sources = [ + 'glxapi.c', + 'fakeglx.c', + 'xfonts.c', + 'xm_api.c', + 'xm_winsys.c', + ] + + drivers = []; + + if 'softpipe' in env['drivers']: + drivers += [softpipe] + + if 'i965simple' in env['drivers']: + drivers += [i965simple] + sources += [ + 'brw_aub.c', + 'xm_winsys_aub.c', + ] + + if 'trace' in env['drivers']: + env.Append(CPPDEFINES = 'GALLIUM_TRACE') + drivers += [trace] + + # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions + libgl = env.SharedLibrary( + target ='GL', + source = sources, + LIBS = glapi + mesa + drivers + auxiliaries + env['LIBS'], + ) + + env.InstallSharedLibrary(libgl, version=(1, 5)) diff --git a/src/gallium/winsys/xlib/brw_aub.c b/src/gallium/winsys/xlib/brw_aub.c new file mode 100644 index 0000000000..9e96efaa53 --- /dev/null +++ b/src/gallium/winsys/xlib/brw_aub.c @@ -0,0 +1,397 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include <stdio.h> +#include <stdlib.h> +#include "brw_aub.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "pipe/p_debug.h" +#include "util/u_memory.h" + + +struct brw_aubfile { + FILE *file; + unsigned next_free_page; +}; + + +extern char *__progname; + + +struct aub_file_header { + unsigned int instruction_type; + unsigned int pad0:16; + unsigned int minor:8; + unsigned int major:8; + unsigned char application[8*4]; + unsigned int day:8; + unsigned int month:8; + unsigned int year:16; + unsigned int timezone:8; + unsigned int second:8; + unsigned int minute:8; + unsigned int hour:8; + unsigned int comment_length:16; + unsigned int pad1:16; +}; + +struct aub_block_header { + unsigned int instruction_type; + unsigned int operation:8; + unsigned int type:8; + unsigned int address_space:8; + unsigned int pad0:8; + unsigned int general_state_type:8; + unsigned int surface_state_type:8; + unsigned int pad1:16; + unsigned int address; + unsigned int length; +}; + +struct aub_dump_bmp { + unsigned int instruction_type; + unsigned int xmin:16; + unsigned int ymin:16; + unsigned int pitch:16; + unsigned int bpp:8; + unsigned int format:8; + unsigned int xsize:16; + unsigned int ysize:16; + unsigned int addr; + unsigned int unknown; +}; + +enum bh_operation { + BH_COMMENT, + BH_DATA_WRITE, + BH_COMMAND_WRITE, + BH_MMI0_WRITE32, + BH_END_SCENE, + BH_CONFIG_MEMORY_MAP, + BH_MAX_OPERATION +}; + +enum command_write_type { + CW_HWB_RING = 1, + CW_PRIMARY_RING_A, + CW_PRIMARY_RING_B, /* XXX - disagreement with listaub! */ + CW_PRIMARY_RING_C, + CW_MAX_TYPE +}; + +enum memory_map_type { + MM_DEFAULT, + MM_DYNAMIC, + MM_MAX_TYPE +}; + +enum address_space { + ADDR_GTT, + ADDR_LOCAL, + ADDR_MAIN, + ADDR_MAX +}; + + +#define AUB_FILE_HEADER 0xe085000b +#define AUB_BLOCK_HEADER 0xe0c10003 +#define AUB_DUMP_BMP 0xe09e0004 + +/* Registers to control page table + */ +#define PGETBL_CTL 0x2020 +#define PGETBL_ENABLED 0x1 + +#define NR_GTT_ENTRIES 65536 /* 256 mb */ + +#define FAIL \ +do { \ + fprintf(stderr, "failed to write aub data at %s/%d\n", __FUNCTION__, __LINE__); \ + exit(1); \ +} while (0) + + +/* Emit the headers at the top of each aubfile. Initialize the GTT. + */ +static void init_aubfile( FILE *aub_file ) +{ + struct aub_file_header fh; + struct aub_block_header bh; + unsigned int data; + + static int nr; + + nr++; + + /* Emit the aub header: + */ + memset(&fh, 0, sizeof(fh)); + + fh.instruction_type = AUB_FILE_HEADER; + fh.minor = 0x0; + fh.major = 0x7; + memcpy(fh.application, __progname, sizeof(fh.application)); + fh.day = (nr>>24) & 0xff; + fh.month = 0x0; + fh.year = 0x0; + fh.timezone = 0x0; + fh.second = nr & 0xff; + fh.minute = (nr>>8) & 0xff; + fh.hour = (nr>>16) & 0xff; + fh.comment_length = 0x0; + + if (fwrite(&fh, sizeof(fh), 1, aub_file) < 0) + FAIL; + + /* Setup the GTT starting at main memory address zero (!): + */ + memset(&bh, 0, sizeof(bh)); + + bh.instruction_type = AUB_BLOCK_HEADER; + bh.operation = BH_MMI0_WRITE32; + bh.type = 0x0; + bh.address_space = ADDR_GTT; /* ??? */ + bh.general_state_type = 0x0; + bh.surface_state_type = 0x0; + bh.address = PGETBL_CTL; + bh.length = 0x4; + + if (fwrite(&bh, sizeof(bh), 1, aub_file) < 0) + FAIL; + + data = 0x0 | PGETBL_ENABLED; + + if (fwrite(&data, sizeof(data), 1, aub_file) < 0) + FAIL; +} + + +static void init_aub_gtt( struct brw_aubfile *aubfile, + unsigned start_offset, + unsigned size ) +{ + FILE *aub_file = aubfile->file; + struct aub_block_header bh; + unsigned int i; + + assert(start_offset + size < NR_GTT_ENTRIES * 4096); + + + memset(&bh, 0, sizeof(bh)); + + bh.instruction_type = AUB_BLOCK_HEADER; + bh.operation = BH_DATA_WRITE; + bh.type = 0x0; + bh.address_space = ADDR_MAIN; + bh.general_state_type = 0x0; + bh.surface_state_type = 0x0; + bh.address = start_offset / 4096 * 4; + bh.length = size / 4096 * 4; + + if (fwrite(&bh, sizeof(bh), 1, aub_file) < 0) + FAIL; + + for (i = 0; i < size / 4096; i++) { + unsigned data = aubfile->next_free_page | 1; + + aubfile->next_free_page += 4096; + + if (fwrite(&data, sizeof(data), 1, aub_file) < 0) + FAIL; + } + +} + +static void write_block_header( FILE *aub_file, + struct aub_block_header *bh, + const unsigned *data, + unsigned sz ) +{ + sz = (sz + 3) & ~3; + + if (fwrite(bh, sizeof(*bh), 1, aub_file) < 0) + FAIL; + + if (fwrite(data, sz, 1, aub_file) < 0) + FAIL; + + fflush(aub_file); +} + + +static void write_dump_bmp( FILE *aub_file, + struct aub_dump_bmp *db ) +{ + if (fwrite(db, sizeof(*db), 1, aub_file) < 0) + FAIL; + + fflush(aub_file); +} + + + +void brw_aub_gtt_data( struct brw_aubfile *aubfile, + unsigned offset, + const void *data, + unsigned sz, + unsigned type, + unsigned state_type ) +{ + struct aub_block_header bh; + + bh.instruction_type = AUB_BLOCK_HEADER; + bh.operation = BH_DATA_WRITE; + bh.type = type; + bh.address_space = ADDR_GTT; + bh.pad0 = 0; + + if (type == DW_GENERAL_STATE) { + bh.general_state_type = state_type; + bh.surface_state_type = 0; + } + else { + bh.general_state_type = 0; + bh.surface_state_type = state_type; + } + + bh.pad1 = 0; + bh.address = offset; + bh.length = sz; + + write_block_header(aubfile->file, &bh, data, sz); +} + + + +void brw_aub_gtt_cmds( struct brw_aubfile *aubfile, + unsigned offset, + const void *data, + unsigned sz ) +{ + struct aub_block_header bh; + unsigned type = CW_PRIMARY_RING_A; + + + bh.instruction_type = AUB_BLOCK_HEADER; + bh.operation = BH_COMMAND_WRITE; + bh.type = type; + bh.address_space = ADDR_GTT; + bh.pad0 = 0; + bh.general_state_type = 0; + bh.surface_state_type = 0; + bh.pad1 = 0; + bh.address = offset; + bh.length = sz; + + write_block_header(aubfile->file, &bh, data, sz); +} + +void brw_aub_dump_bmp( struct brw_aubfile *aubfile, + struct pipe_surface *surface, + unsigned gtt_offset ) +{ + struct aub_dump_bmp db; + unsigned format; + + assert(surface->block.width == 1); + assert(surface->block.height == 1); + + if (surface->block.size == 4) + format = 0x7; + else + format = 0x3; + + db.instruction_type = AUB_DUMP_BMP; + db.xmin = 0; + db.ymin = 0; + db.format = format; + db.bpp = surface->block.size * 8; + db.pitch = surface->stride/surface->block.size; + db.xsize = surface->width; + db.ysize = surface->height; + db.addr = gtt_offset; + db.unknown = /* surface->tiled ? 0x4 : */ 0x0; + + write_dump_bmp(aubfile->file, &db); +} + + + +struct brw_aubfile *brw_aubfile_create( void ) +{ + struct brw_aubfile *aubfile = CALLOC_STRUCT(brw_aubfile); + char filename[80]; + int val; + static int i = 0; + + i++; + + if (getenv("INTEL_AUBFILE")) { + val = snprintf(filename, sizeof(filename), "%s%d.aub", getenv("INTEL_AUBFILE"), i%4); + debug_printf("--> Aub file: %s\n", filename); + aubfile->file = fopen(filename, "w"); + } + else { + val = snprintf(filename, sizeof(filename), "%s.aub", __progname); + if (val < 0 || val > sizeof(filename)) + strcpy(filename, "default.aub"); + + debug_printf("--> Aub file: %s\n", filename); + aubfile->file = fopen(filename, "w"); + } + + if (!aubfile->file) { + debug_printf("couldn't open aubfile\n"); + exit(1); + } + + init_aubfile(aubfile->file); + + /* The GTT is located starting address zero in main memory. Pages + * to populate the gtt start after this point. + */ + aubfile->next_free_page = (NR_GTT_ENTRIES * 4 + 4095) & ~4095; + + /* More or less correspond with all the agp regions mapped by the + * driver: + */ + init_aub_gtt(aubfile, 0, 4096*4); + init_aub_gtt(aubfile, AUB_BUF_START, AUB_BUF_SIZE); + + return aubfile; +} + +void brw_aub_destroy( struct brw_aubfile *aubfile ) +{ + fclose(aubfile->file); + FREE(aubfile); +} diff --git a/src/gallium/winsys/xlib/brw_aub.h b/src/gallium/winsys/xlib/brw_aub.h new file mode 100644 index 0000000000..f5c60c7be2 --- /dev/null +++ b/src/gallium/winsys/xlib/brw_aub.h @@ -0,0 +1,114 @@ +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef BRW_AUB_H +#define BRW_AUB_H + +/* We set up this region, buffers may be allocated here: + */ +#define AUB_BUF_START (4096*4) +#define AUB_BUF_SIZE (8*1024*1024) + +struct intel_context; +struct pipe_surface; + +struct brw_aubfile *brw_aubfile_create( void ); + +void brw_aub_destroy( struct brw_aubfile *aubfile ); + +void brw_aub_gtt_data( struct brw_aubfile *aubfile, + unsigned offset, + const void *data, + unsigned sz, + unsigned type, + unsigned state_type ); + +void brw_aub_gtt_cmds( struct brw_aubfile *aubfile, + unsigned offset, + const void *data, + unsigned sz ); + +void brw_aub_dump_bmp( struct brw_aubfile *aubfile, + struct pipe_surface *surface, + unsigned gtt_offset ); + + +enum data_write_type { + DW_NOTYPE, + DW_BATCH_BUFFER, + DW_BIN_BUFFER, + DW_BIN_POINTER_LIST, + DW_SLOW_STATE_BUFFER, + DW_VERTEX_BUFFER, + DW_2D_MAP, + DW_CUBE_MAP, + DW_INDIRECT_STATE_BUFFER, + DW_VOLUME_MAP, + DW_1D_MAP, + DW_CONSTANT_BUFFER, + DW_CONSTANT_URB_ENTRY, + DW_INDEX_BUFFER, + DW_GENERAL_STATE, + DW_SURFACE_STATE, + DW_MEDIA_OBJECT_INDIRECT_DATA, + DW_MAX_TYPE +}; + +enum data_write_general_state_type { + DWGS_NOTYPE, + DWGS_VERTEX_SHADER_STATE, + DWGS_GEOMETRY_SHADER_STATE , + DWGS_CLIPPER_STATE, + DWGS_STRIPS_FANS_STATE, + DWGS_WINDOWER_IZ_STATE, + DWGS_COLOR_CALC_STATE, + DWGS_CLIPPER_VIEWPORT_STATE, /* was 0x7 */ + DWGS_STRIPS_FANS_VIEWPORT_STATE, + DWGS_COLOR_CALC_VIEWPORT_STATE, /* was 0x9 */ + DWGS_SAMPLER_STATE, + DWGS_KERNEL_INSTRUCTIONS, + DWGS_SCRATCH_SPACE, + DWGS_SAMPLER_DEFAULT_COLOR, + DWGS_INTERFACE_DESCRIPTOR, + DWGS_VLD_STATE, + DWGS_VFE_STATE, + DWGS_MAX_TYPE +}; + +enum data_write_surface_state_type { + DWSS_NOTYPE, + DWSS_BINDING_TABLE_STATE, + DWSS_SURFACE_STATE, + DWSS_MAX_TYPE +}; + + +#endif diff --git a/src/gallium/winsys/xlib/fakeglx.c b/src/gallium/winsys/xlib/fakeglx.c new file mode 100644 index 0000000000..2c0075e934 --- /dev/null +++ b/src/gallium/winsys/xlib/fakeglx.c @@ -0,0 +1,3212 @@ +/* + * Mesa 3-D graphics library + * Version: 7.1 + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/* + * This is an emulation of the GLX API which allows Mesa/GLX-based programs + * to run on X servers which do not have the real GLX extension. + * + * Thanks to the contributors: + * + * Initial version: Philip Brown (phil@bolthole.com) + * Better glXGetConfig() support: Armin Liebchen (liebchen@asylum.cs.utah.edu) + * Further visual-handling refinements: Wolfram Gloger + * (wmglo@Dent.MED.Uni-Muenchen.DE). + * + * Notes: + * Don't be fooled, stereo isn't supported yet. + */ + + + +#include "glxheader.h" +#include "glxapi.h" +#include "GL/xmesa.h" +#include "context.h" +#include "config.h" +#include "macros.h" +#include "imports.h" +#include "mtypes.h" +#include "version.h" +#include "xfonts.h" +#include "xmesaP.h" +#include "state_tracker/st_context.h" +#include "state_tracker/st_public.h" + + +#ifdef __VMS +#define _mesa_sprintf sprintf +#endif + +/* This indicates the client-side GLX API and GLX encoder version. */ +#define CLIENT_MAJOR_VERSION 1 +#define CLIENT_MINOR_VERSION 4 /* but don't have 1.3's pbuffers, etc yet */ + +/* This indicates the server-side GLX decoder version. + * GLX 1.4 indicates OpenGL 1.3 support + */ +#define SERVER_MAJOR_VERSION 1 +#define SERVER_MINOR_VERSION 4 + +/* This is appended onto the glXGetClient/ServerString version strings. */ +#define MESA_GLX_VERSION "Mesa " MESA_VERSION_STRING + +/* Who implemented this GLX? */ +#define VENDOR "Brian Paul" + +#define EXTENSIONS \ + "GLX_MESA_set_3dfx_mode " \ + "GLX_MESA_copy_sub_buffer " \ + "GLX_MESA_pixmap_colormap " \ + "GLX_MESA_release_buffers " \ + "GLX_ARB_get_proc_address " \ + "GLX_EXT_texture_from_pixmap " \ + "GLX_EXT_visual_info " \ + "GLX_EXT_visual_rating " \ + /*"GLX_SGI_video_sync "*/ \ + "GLX_SGIX_fbconfig " \ + "GLX_SGIX_pbuffer " + +/* + * Our fake GLX context will contain a "real" GLX context and an XMesa context. + * + * Note that a pointer to a __GLXcontext is a pointer to a fake_glx_context, + * and vice versa. + * + * We really just need this structure in order to make the libGL functions + * glXGetCurrentContext(), glXGetCurrentDrawable() and glXGetCurrentDisplay() + * work correctly. + */ +struct fake_glx_context { + __GLXcontext glxContext; /* this MUST be first! */ + XMesaContext xmesaContext; +}; + + + +/**********************************************************************/ +/*** GLX Visual Code ***/ +/**********************************************************************/ + +#define DONT_CARE -1 + + +static XMesaVisual *VisualTable = NULL; +static int NumVisuals = 0; + + +/* + * This struct and some code fragments borrowed + * from Mark Kilgard's GLUT library. + */ +typedef struct _OverlayInfo { + /* Avoid 64-bit portability problems by being careful to use + longs due to the way XGetWindowProperty is specified. Note + that these parameters are passed as CARD32s over X + protocol. */ + unsigned long overlay_visual; + long transparent_type; + long value; + long layer; +} OverlayInfo; + + + +/* Macro to handle c_class vs class field name in XVisualInfo struct */ +#if defined(__cplusplus) || defined(c_plusplus) +#define CLASS c_class +#else +#define CLASS class +#endif + + + +/* + * Test if the given XVisualInfo is usable for Mesa rendering. + */ +static GLboolean +is_usable_visual( XVisualInfo *vinfo ) +{ + switch (vinfo->CLASS) { + case StaticGray: + case GrayScale: + /* Any StaticGray/GrayScale visual works in RGB or CI mode */ + return GL_TRUE; + case StaticColor: + case PseudoColor: + /* Any StaticColor/PseudoColor visual of at least 4 bits */ + if (vinfo->depth>=4) { + return GL_TRUE; + } + else { + return GL_FALSE; + } + case TrueColor: + case DirectColor: + /* Any depth of TrueColor or DirectColor works in RGB mode */ + return GL_TRUE; + default: + /* This should never happen */ + return GL_FALSE; + } +} + + + +/** + * Get an array OverlayInfo records for specified screen. + * \param dpy the display + * \param screen screen number + * \param numOverlays returns numver of OverlayInfo records + * \return pointer to OverlayInfo array, free with XFree() + */ +static OverlayInfo * +GetOverlayInfo(Display *dpy, int screen, int *numOverlays) +{ + Atom overlayVisualsAtom; + Atom actualType; + Status status; + unsigned char *ovInfo; + unsigned long sizeData, bytesLeft; + int actualFormat; + + /* + * The SERVER_OVERLAY_VISUALS property on the root window contains + * a list of overlay visuals. Get that list now. + */ + overlayVisualsAtom = XInternAtom(dpy,"SERVER_OVERLAY_VISUALS", True); + if (overlayVisualsAtom == None) { + return 0; + } + + status = XGetWindowProperty(dpy, RootWindow(dpy, screen), + overlayVisualsAtom, 0L, (long) 10000, False, + overlayVisualsAtom, &actualType, &actualFormat, + &sizeData, &bytesLeft, + &ovInfo); + + if (status != Success || actualType != overlayVisualsAtom || + actualFormat != 32 || sizeData < 4) { + /* something went wrong */ + XFree((void *) ovInfo); + *numOverlays = 0; + return NULL; + } + + *numOverlays = sizeData / 4; + return (OverlayInfo *) ovInfo; +} + + + +/** + * Return the level (overlay, normal, underlay) of a given XVisualInfo. + * Input: dpy - the X display + * vinfo - the XVisualInfo to test + * Return: level of the visual: + * 0 = normal planes + * >0 = overlay planes + * <0 = underlay planes + */ +static int +level_of_visual( Display *dpy, XVisualInfo *vinfo ) +{ + OverlayInfo *overlay_info; + int numOverlaysPerScreen, i; + + overlay_info = GetOverlayInfo(dpy, vinfo->screen, &numOverlaysPerScreen); + if (!overlay_info) { + return 0; + } + + /* search the overlay visual list for the visual ID of interest */ + for (i = 0; i < numOverlaysPerScreen; i++) { + const OverlayInfo *ov = overlay_info + i; + if (ov->overlay_visual == vinfo->visualid) { + /* found the visual */ + if (/*ov->transparent_type==1 &&*/ ov->layer!=0) { + int level = ov->layer; + XFree((void *) overlay_info); + return level; + } + else { + XFree((void *) overlay_info); + return 0; + } + } + } + + /* The visual ID was not found in the overlay list. */ + XFree((void *) overlay_info); + return 0; +} + + + + +/* + * Given an XVisualInfo and RGB, Double, and Depth buffer flags, save the + * configuration in our list of GLX visuals. + */ +static XMesaVisual +save_glx_visual( Display *dpy, XVisualInfo *vinfo, + GLboolean rgbFlag, GLboolean alphaFlag, GLboolean dbFlag, + GLboolean stereoFlag, + GLint depth_size, GLint stencil_size, + GLint accumRedSize, GLint accumGreenSize, + GLint accumBlueSize, GLint accumAlphaSize, + GLint level, GLint numAuxBuffers ) +{ + GLboolean ximageFlag = GL_TRUE; + XMesaVisual xmvis; + GLint i; + GLboolean comparePointers; + + if (dbFlag) { + /* Check if the MESA_BACK_BUFFER env var is set */ + char *backbuffer = _mesa_getenv("MESA_BACK_BUFFER"); + if (backbuffer) { + if (backbuffer[0]=='p' || backbuffer[0]=='P') { + ximageFlag = GL_FALSE; + } + else if (backbuffer[0]=='x' || backbuffer[0]=='X') { + ximageFlag = GL_TRUE; + } + else { + _mesa_warning(NULL, "Mesa: invalid value for MESA_BACK_BUFFER environment variable, using an XImage."); + } + } + } + + if (stereoFlag) { + /* stereo not supported */ + return NULL; + } + + /* Comparing IDs uses less memory but sometimes fails. */ + /* XXX revisit this after 3.0 is finished. */ + if (_mesa_getenv("MESA_GLX_VISUAL_HACK")) + comparePointers = GL_TRUE; + else + comparePointers = GL_FALSE; + + /* Force the visual to have an alpha channel */ + if (rgbFlag && _mesa_getenv("MESA_GLX_FORCE_ALPHA")) + alphaFlag = GL_TRUE; + + /* First check if a matching visual is already in the list */ + for (i=0; i<NumVisuals; i++) { + XMesaVisual v = VisualTable[i]; + if (v->display == dpy + && v->mesa_visual.level == level + && v->mesa_visual.numAuxBuffers == numAuxBuffers + && v->ximage_flag == ximageFlag + && v->mesa_visual.rgbMode == rgbFlag + && v->mesa_visual.doubleBufferMode == dbFlag + && v->mesa_visual.stereoMode == stereoFlag + && (v->mesa_visual.alphaBits > 0) == alphaFlag + && (v->mesa_visual.depthBits >= depth_size || depth_size == 0) + && (v->mesa_visual.stencilBits >= stencil_size || stencil_size == 0) + && (v->mesa_visual.accumRedBits >= accumRedSize || accumRedSize == 0) + && (v->mesa_visual.accumGreenBits >= accumGreenSize || accumGreenSize == 0) + && (v->mesa_visual.accumBlueBits >= accumBlueSize || accumBlueSize == 0) + && (v->mesa_visual.accumAlphaBits >= accumAlphaSize || accumAlphaSize == 0)) { + /* now either compare XVisualInfo pointers or visual IDs */ + if ((!comparePointers && v->visinfo->visualid == vinfo->visualid) + || (comparePointers && v->vishandle == vinfo)) { + return v; + } + } + } + + /* Create a new visual and add it to the list. */ + + xmvis = XMesaCreateVisual( dpy, vinfo, rgbFlag, alphaFlag, dbFlag, + stereoFlag, ximageFlag, + depth_size, stencil_size, + accumRedSize, accumBlueSize, + accumBlueSize, accumAlphaSize, 0, level, + GLX_NONE_EXT ); + if (xmvis) { + /* Save a copy of the pointer now so we can find this visual again + * if we need to search for it in find_glx_visual(). + */ + xmvis->vishandle = vinfo; + /* Allocate more space for additional visual */ + VisualTable = (XMesaVisual *) _mesa_realloc( VisualTable, + sizeof(XMesaVisual) * NumVisuals, + sizeof(XMesaVisual) * (NumVisuals + 1)); + /* add xmvis to the list */ + VisualTable[NumVisuals] = xmvis; + NumVisuals++; + /* XXX minor hack, because XMesaCreateVisual doesn't support an + * aux buffers parameter. + */ + xmvis->mesa_visual.numAuxBuffers = numAuxBuffers; + } + return xmvis; +} + + +/** + * Return the default number of bits for the Z buffer. + * If defined, use the MESA_GLX_DEPTH_BITS env var value. + * Otherwise, use the DEFAULT_SOFTWARE_DEPTH_BITS constant. + * XXX probably do the same thing for stencil, accum, etc. + */ +static GLint +default_depth_bits(void) +{ + int zBits; + const char *zEnv = _mesa_getenv("MESA_GLX_DEPTH_BITS"); + if (zEnv) + zBits = _mesa_atoi(zEnv); + else + zBits = DEFAULT_SOFTWARE_DEPTH_BITS; + return zBits; +} + +static GLint +default_alpha_bits(void) +{ + int aBits; + const char *aEnv = _mesa_getenv("MESA_GLX_ALPHA_BITS"); + if (aEnv) + aBits = _mesa_atoi(aEnv); + else + aBits = 0; + return aBits; +} + +static GLint +default_accum_bits(void) +{ + return 16; +} + + + +/* + * Create a GLX visual from a regular XVisualInfo. + * This is called when Fake GLX is given an XVisualInfo which wasn't + * returned by glXChooseVisual. Since this is the first time we're + * considering this visual we'll take a guess at reasonable values + * for depth buffer size, stencil size, accum size, etc. + * This is the best we can do with a client-side emulation of GLX. + */ +static XMesaVisual +create_glx_visual( Display *dpy, XVisualInfo *visinfo ) +{ + int vislevel; + GLint zBits = 24; /*default_depth_bits();*/ + GLint accBits = default_accum_bits(); + GLboolean alphaFlag = default_alpha_bits() > 0; + + vislevel = level_of_visual( dpy, visinfo ); + if (vislevel) { + /* Configure this visual as a CI, single-buffered overlay */ + return save_glx_visual( dpy, visinfo, + GL_FALSE, /* rgb */ + GL_FALSE, /* alpha */ + GL_FALSE, /* double */ + GL_FALSE, /* stereo */ + 0, /* depth bits */ + 0, /* stencil bits */ + 0,0,0,0, /* accum bits */ + vislevel, /* level */ + 0 /* numAux */ + ); + } + else if (is_usable_visual( visinfo )) { + if (_mesa_getenv("MESA_GLX_FORCE_CI")) { + /* Configure this visual as a COLOR INDEX visual. */ + return save_glx_visual( dpy, visinfo, + GL_FALSE, /* rgb */ + GL_FALSE, /* alpha */ + GL_TRUE, /* double */ + GL_FALSE, /* stereo */ + zBits, + STENCIL_BITS, + 0, 0, 0, 0, /* accum bits */ + 0, /* level */ + 0 /* numAux */ + ); + } + else { + /* Configure this visual as RGB, double-buffered, depth-buffered. */ + /* This is surely wrong for some people's needs but what else */ + /* can be done? They should use glXChooseVisual(). */ + return save_glx_visual( dpy, visinfo, + GL_TRUE, /* rgb */ + alphaFlag, /* alpha */ + GL_TRUE, /* double */ + GL_FALSE, /* stereo */ + zBits, + STENCIL_BITS, + accBits, /* r */ + accBits, /* g */ + accBits, /* b */ + accBits, /* a */ + 0, /* level */ + 0 /* numAux */ + ); + } + } + else { + _mesa_warning(NULL, "Mesa: error in glXCreateContext: bad visual\n"); + return NULL; + } +} + + + +/* + * Find the GLX visual associated with an XVisualInfo. + */ +static XMesaVisual +find_glx_visual( Display *dpy, XVisualInfo *vinfo ) +{ + int i; + + /* try to match visual id */ + for (i=0;i<NumVisuals;i++) { + if (VisualTable[i]->display==dpy + && VisualTable[i]->visinfo->visualid == vinfo->visualid) { + return VisualTable[i]; + } + } + + /* if that fails, try to match pointers */ + for (i=0;i<NumVisuals;i++) { + if (VisualTable[i]->display==dpy && VisualTable[i]->vishandle==vinfo) { + return VisualTable[i]; + } + } + + return NULL; +} + + + +/** + * Return the transparent pixel value for a GLX visual. + * Input: glxvis - the glx_visual + * Return: a pixel value or -1 if no transparent pixel + */ +static int +transparent_pixel( XMesaVisual glxvis ) +{ + Display *dpy = glxvis->display; + XVisualInfo *vinfo = glxvis->visinfo; + OverlayInfo *overlay_info; + int numOverlaysPerScreen, i; + + overlay_info = GetOverlayInfo(dpy, vinfo->screen, &numOverlaysPerScreen); + if (!overlay_info) { + return -1; + } + + for (i = 0; i < numOverlaysPerScreen; i++) { + const OverlayInfo *ov = overlay_info + i; + if (ov->overlay_visual == vinfo->visualid) { + /* found it! */ + if (ov->transparent_type == 0) { + /* type 0 indicates no transparency */ + XFree((void *) overlay_info); + return -1; + } + else { + /* ov->value is the transparent pixel */ + XFree((void *) overlay_info); + return ov->value; + } + } + } + + /* The visual ID was not found in the overlay list. */ + XFree((void *) overlay_info); + return -1; +} + + + +/** + * Try to get an X visual which matches the given arguments. + */ +static XVisualInfo * +get_visual( Display *dpy, int scr, unsigned int depth, int xclass ) +{ + XVisualInfo temp, *vis; + long mask; + int n; + unsigned int default_depth; + int default_class; + + mask = VisualScreenMask | VisualDepthMask | VisualClassMask; + temp.screen = scr; + temp.depth = depth; + temp.CLASS = xclass; + + default_depth = DefaultDepth(dpy,scr); + default_class = DefaultVisual(dpy,scr)->CLASS; + + if (depth==default_depth && xclass==default_class) { + /* try to get root window's visual */ + temp.visualid = DefaultVisual(dpy,scr)->visualid; + mask |= VisualIDMask; + } + + vis = XGetVisualInfo( dpy, mask, &temp, &n ); + + /* In case bits/pixel > 24, make sure color channels are still <=8 bits. + * An SGI Infinite Reality system, for example, can have 30bpp pixels: + * 10 bits per color channel. Mesa's limited to a max of 8 bits/channel. + */ + if (vis && depth > 24 && (xclass==TrueColor || xclass==DirectColor)) { + if (_mesa_bitcount((GLuint) vis->red_mask ) <= 8 && + _mesa_bitcount((GLuint) vis->green_mask) <= 8 && + _mesa_bitcount((GLuint) vis->blue_mask ) <= 8) { + return vis; + } + else { + XFree((void *) vis); + return NULL; + } + } + + return vis; +} + + + +/* + * Retrieve the value of the given environment variable and find + * the X visual which matches it. + * Input: dpy - the display + * screen - the screen number + * varname - the name of the environment variable + * Return: an XVisualInfo pointer to NULL if error. + */ +static XVisualInfo * +get_env_visual(Display *dpy, int scr, const char *varname) +{ + char value[100], type[100]; + int depth, xclass = -1; + XVisualInfo *vis; + + if (!_mesa_getenv( varname )) { + return NULL; + } + + _mesa_strncpy( value, _mesa_getenv(varname), 100 ); + value[99] = 0; + + sscanf( value, "%s %d", type, &depth ); + + if (_mesa_strcmp(type,"TrueColor")==0) xclass = TrueColor; + else if (_mesa_strcmp(type,"DirectColor")==0) xclass = DirectColor; + else if (_mesa_strcmp(type,"PseudoColor")==0) xclass = PseudoColor; + else if (_mesa_strcmp(type,"StaticColor")==0) xclass = StaticColor; + else if (_mesa_strcmp(type,"GrayScale")==0) xclass = GrayScale; + else if (_mesa_strcmp(type,"StaticGray")==0) xclass = StaticGray; + + if (xclass>-1 && depth>0) { + vis = get_visual( dpy, scr, depth, xclass ); + if (vis) { + return vis; + } + } + + _mesa_warning(NULL, "GLX unable to find visual class=%s, depth=%d.", + type, depth); + + return NULL; +} + + + +/* + * Select an X visual which satisfies the RGBA/CI flag and minimum depth. + * Input: dpy, screen - X display and screen number + * rgba - GL_TRUE = RGBA mode, GL_FALSE = CI mode + * min_depth - minimum visual depth + * preferred_class - preferred GLX visual class or DONT_CARE + * Return: pointer to an XVisualInfo or NULL. + */ +static XVisualInfo * +choose_x_visual( Display *dpy, int screen, GLboolean rgba, int min_depth, + int preferred_class ) +{ + XVisualInfo *vis; + int xclass, visclass = 0; + int depth; + + if (rgba) { + Atom hp_cr_maps = XInternAtom(dpy, "_HP_RGB_SMOOTH_MAP_LIST", True); + /* First see if the MESA_RGB_VISUAL env var is defined */ + vis = get_env_visual( dpy, screen, "MESA_RGB_VISUAL" ); + if (vis) { + return vis; + } + /* Otherwise, search for a suitable visual */ + if (preferred_class==DONT_CARE) { + for (xclass=0;xclass<6;xclass++) { + switch (xclass) { + case 0: visclass = TrueColor; break; + case 1: visclass = DirectColor; break; + case 2: visclass = PseudoColor; break; + case 3: visclass = StaticColor; break; + case 4: visclass = GrayScale; break; + case 5: visclass = StaticGray; break; + } + if (min_depth==0) { + /* start with shallowest */ + for (depth=0;depth<=32;depth++) { + if (visclass==TrueColor && depth==8 && !hp_cr_maps) { + /* Special case: try to get 8-bit PseudoColor before */ + /* 8-bit TrueColor */ + vis = get_visual( dpy, screen, 8, PseudoColor ); + if (vis) { + return vis; + } + } + vis = get_visual( dpy, screen, depth, visclass ); + if (vis) { + return vis; + } + } + } + else { + /* start with deepest */ + for (depth=32;depth>=min_depth;depth--) { + if (visclass==TrueColor && depth==8 && !hp_cr_maps) { + /* Special case: try to get 8-bit PseudoColor before */ + /* 8-bit TrueColor */ + vis = get_visual( dpy, screen, 8, PseudoColor ); + if (vis) { + return vis; + } + } + vis = get_visual( dpy, screen, depth, visclass ); + if (vis) { + return vis; + } + } + } + } + } + else { + /* search for a specific visual class */ + switch (preferred_class) { + case GLX_TRUE_COLOR_EXT: visclass = TrueColor; break; + case GLX_DIRECT_COLOR_EXT: visclass = DirectColor; break; + case GLX_PSEUDO_COLOR_EXT: visclass = PseudoColor; break; + case GLX_STATIC_COLOR_EXT: visclass = StaticColor; break; + case GLX_GRAY_SCALE_EXT: visclass = GrayScale; break; + case GLX_STATIC_GRAY_EXT: visclass = StaticGray; break; + default: return NULL; + } + if (min_depth==0) { + /* start with shallowest */ + for (depth=0;depth<=32;depth++) { + vis = get_visual( dpy, screen, depth, visclass ); + if (vis) { + return vis; + } + } + } + else { + /* start with deepest */ + for (depth=32;depth>=min_depth;depth--) { + vis = get_visual( dpy, screen, depth, visclass ); + if (vis) { + return vis; + } + } + } + } + } + else { + /* First see if the MESA_CI_VISUAL env var is defined */ + vis = get_env_visual( dpy, screen, "MESA_CI_VISUAL" ); + if (vis) { + return vis; + } + /* Otherwise, search for a suitable visual, starting with shallowest */ + if (preferred_class==DONT_CARE) { + for (xclass=0;xclass<4;xclass++) { + switch (xclass) { + case 0: visclass = PseudoColor; break; + case 1: visclass = StaticColor; break; + case 2: visclass = GrayScale; break; + case 3: visclass = StaticGray; break; + } + /* try 8-bit up through 16-bit */ + for (depth=8;depth<=16;depth++) { + vis = get_visual( dpy, screen, depth, visclass ); + if (vis) { + return vis; + } + } + /* try min_depth up to 8-bit */ + for (depth=min_depth;depth<8;depth++) { + vis = get_visual( dpy, screen, depth, visclass ); + if (vis) { + return vis; + } + } + } + } + else { + /* search for a specific visual class */ + switch (preferred_class) { + case GLX_TRUE_COLOR_EXT: visclass = TrueColor; break; + case GLX_DIRECT_COLOR_EXT: visclass = DirectColor; break; + case GLX_PSEUDO_COLOR_EXT: visclass = PseudoColor; break; + case GLX_STATIC_COLOR_EXT: visclass = StaticColor; break; + case GLX_GRAY_SCALE_EXT: visclass = GrayScale; break; + case GLX_STATIC_GRAY_EXT: visclass = StaticGray; break; + default: return NULL; + } + /* try 8-bit up through 16-bit */ + for (depth=8;depth<=16;depth++) { + vis = get_visual( dpy, screen, depth, visclass ); + if (vis) { + return vis; + } + } + /* try min_depth up to 8-bit */ + for (depth=min_depth;depth<8;depth++) { + vis = get_visual( dpy, screen, depth, visclass ); + if (vis) { + return vis; + } + } + } + } + + /* didn't find a visual */ + return NULL; +} + + + +/* + * Find the deepest X over/underlay visual of at least min_depth. + * Input: dpy, screen - X display and screen number + * level - the over/underlay level + * trans_type - transparent pixel type: GLX_NONE_EXT, + * GLX_TRANSPARENT_RGB_EXT, GLX_TRANSPARENT_INDEX_EXT, + * or DONT_CARE + * trans_value - transparent pixel value or DONT_CARE + * min_depth - minimum visual depth + * preferred_class - preferred GLX visual class or DONT_CARE + * Return: pointer to an XVisualInfo or NULL. + */ +static XVisualInfo * +choose_x_overlay_visual( Display *dpy, int scr, GLboolean rgbFlag, + int level, int trans_type, int trans_value, + int min_depth, int preferred_class ) +{ + OverlayInfo *overlay_info; + int numOverlaysPerScreen; + int i; + XVisualInfo *deepvis; + int deepest; + + /*DEBUG int tt, tv; */ + + switch (preferred_class) { + case GLX_TRUE_COLOR_EXT: preferred_class = TrueColor; break; + case GLX_DIRECT_COLOR_EXT: preferred_class = DirectColor; break; + case GLX_PSEUDO_COLOR_EXT: preferred_class = PseudoColor; break; + case GLX_STATIC_COLOR_EXT: preferred_class = StaticColor; break; + case GLX_GRAY_SCALE_EXT: preferred_class = GrayScale; break; + case GLX_STATIC_GRAY_EXT: preferred_class = StaticGray; break; + default: preferred_class = DONT_CARE; + } + + overlay_info = GetOverlayInfo(dpy, scr, &numOverlaysPerScreen); + if (!overlay_info) { + return NULL; + } + + /* Search for the deepest overlay which satisifies all criteria. */ + deepest = min_depth; + deepvis = NULL; + + for (i = 0; i < numOverlaysPerScreen; i++) { + const OverlayInfo *ov = overlay_info + i; + XVisualInfo *vislist, vistemplate; + int count; + + if (ov->layer!=level) { + /* failed overlay level criteria */ + continue; + } + if (!(trans_type==DONT_CARE + || (trans_type==GLX_TRANSPARENT_INDEX_EXT + && ov->transparent_type>0) + || (trans_type==GLX_NONE_EXT && ov->transparent_type==0))) { + /* failed transparent pixel type criteria */ + continue; + } + if (trans_value!=DONT_CARE && trans_value!=ov->value) { + /* failed transparent pixel value criteria */ + continue; + } + + /* get XVisualInfo and check the depth */ + vistemplate.visualid = ov->overlay_visual; + vistemplate.screen = scr; + vislist = XGetVisualInfo( dpy, VisualIDMask | VisualScreenMask, + &vistemplate, &count ); + + if (count!=1) { + /* something went wrong */ + continue; + } + if (preferred_class!=DONT_CARE && preferred_class!=vislist->CLASS) { + /* wrong visual class */ + continue; + } + + /* if RGB was requested, make sure we have True/DirectColor */ + if (rgbFlag && vislist->CLASS != TrueColor + && vislist->CLASS != DirectColor) + continue; + + /* if CI was requested, make sure we have a color indexed visual */ + if (!rgbFlag + && (vislist->CLASS == TrueColor || vislist->CLASS == DirectColor)) + continue; + + if (deepvis==NULL || vislist->depth > deepest) { + /* YES! found a satisfactory visual */ + if (deepvis) { + XFree( deepvis ); + } + deepest = vislist->depth; + deepvis = vislist; + /* DEBUG tt = ov->transparent_type;*/ + /* DEBUG tv = ov->value; */ + } + } + +/*DEBUG + if (deepvis) { + printf("chose 0x%x: layer=%d depth=%d trans_type=%d trans_value=%d\n", + deepvis->visualid, level, deepvis->depth, tt, tv ); + } +*/ + return deepvis; +} + + +/**********************************************************************/ +/*** Display-related functions ***/ +/**********************************************************************/ + + +/** + * Free all XMesaVisuals which are associated with the given display. + */ +static void +destroy_visuals_on_display(Display *dpy) +{ + int i; + for (i = 0; i < NumVisuals; i++) { + if (VisualTable[i]->display == dpy) { + /* remove this visual */ + int j; + free(VisualTable[i]); + for (j = i; j < NumVisuals - 1; j++) + VisualTable[j] = VisualTable[j + 1]; + NumVisuals--; + } + } +} + + +/** + * Called from XCloseDisplay() to let us free our display-related data. + */ +static int +close_display_callback(Display *dpy, XExtCodes *codes) +{ + destroy_visuals_on_display(dpy); + xmesa_destroy_buffers_on_display(dpy); + return 0; +} + + +/** + * Look for the named extension on given display and return a pointer + * to the _XExtension data, or NULL if extension not found. + */ +static _XExtension * +lookup_extension(Display *dpy, const char *extName) +{ + _XExtension *ext; + for (ext = dpy->ext_procs; ext; ext = ext->next) { + if (ext->name && strcmp(ext->name, extName) == 0) { + return ext; + } + } + return NULL; +} + + +/** + * Whenever we're given a new Display pointer, call this function to + * register our close_display_callback function. + */ +static void +register_with_display(Display *dpy) +{ + const char *extName = "MesaGLX"; + _XExtension *ext; + + ext = lookup_extension(dpy, extName); + if (!ext) { + XExtCodes *c = XAddExtension(dpy); + ext = dpy->ext_procs; /* new extension is at head of list */ + assert(c->extension == ext->codes.extension); + ext->name = _mesa_strdup(extName); + ext->close_display = close_display_callback; + } +} + + +/**********************************************************************/ +/*** Begin Fake GLX API Functions ***/ +/**********************************************************************/ + + +/** + * Helper used by glXChooseVisual and glXChooseFBConfig. + * The fbConfig parameter must be GL_FALSE for the former and GL_TRUE for + * the later. + * In either case, the attribute list is terminated with the value 'None'. + */ +static XMesaVisual +choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) +{ + const GLboolean rgbModeDefault = fbConfig; + const int *parselist; + XVisualInfo *vis; + int min_ci = 0; + int min_red=0, min_green=0, min_blue=0; + GLboolean rgb_flag = rgbModeDefault; + GLboolean alpha_flag = GL_FALSE; + GLboolean double_flag = GL_FALSE; + GLboolean stereo_flag = GL_FALSE; + GLint depth_size = 0; + GLint stencil_size = 0; + GLint accumRedSize = 0; + GLint accumGreenSize = 0; + GLint accumBlueSize = 0; + GLint accumAlphaSize = 0; + int level = 0; + int visual_type = DONT_CARE; + int trans_type = DONT_CARE; + int trans_value = DONT_CARE; + GLint caveat = DONT_CARE; + XMesaVisual xmvis = NULL; + int desiredVisualID = -1; + int numAux = 0; + + parselist = list; + + while (*parselist) { + + switch (*parselist) { + case GLX_USE_GL: + if (fbConfig) { + /* invalid token */ + return NULL; + } + else { + /* skip */ + parselist++; + } + break; + case GLX_BUFFER_SIZE: + parselist++; + min_ci = *parselist++; + break; + case GLX_LEVEL: + parselist++; + level = *parselist++; + break; + case GLX_RGBA: + if (fbConfig) { + /* invalid token */ + return NULL; + } + else { + rgb_flag = GL_TRUE; + parselist++; + } + break; + case GLX_DOUBLEBUFFER: + parselist++; + if (fbConfig) { + double_flag = *parselist++; + } + else { + double_flag = GL_TRUE; + } + break; + case GLX_STEREO: + parselist++; + if (fbConfig) { + stereo_flag = *parselist++; + } + else { + stereo_flag = GL_TRUE; + } + break; + case GLX_AUX_BUFFERS: + parselist++; + numAux = *parselist++; + if (numAux > MAX_AUX_BUFFERS) + return NULL; + break; + case GLX_RED_SIZE: + parselist++; + min_red = *parselist++; + break; + case GLX_GREEN_SIZE: + parselist++; + min_green = *parselist++; + break; + case GLX_BLUE_SIZE: + parselist++; + min_blue = *parselist++; + break; + case GLX_ALPHA_SIZE: + parselist++; + { + GLint size = *parselist++; + alpha_flag = size ? GL_TRUE : GL_FALSE; + } + break; + case GLX_DEPTH_SIZE: + parselist++; + depth_size = *parselist++; + break; + case GLX_STENCIL_SIZE: + parselist++; + stencil_size = *parselist++; + break; + case GLX_ACCUM_RED_SIZE: + parselist++; + { + GLint size = *parselist++; + accumRedSize = MAX2( accumRedSize, size ); + } + break; + case GLX_ACCUM_GREEN_SIZE: + parselist++; + { + GLint size = *parselist++; + accumGreenSize = MAX2( accumGreenSize, size ); + } + break; + case GLX_ACCUM_BLUE_SIZE: + parselist++; + { + GLint size = *parselist++; + accumBlueSize = MAX2( accumBlueSize, size ); + } + break; + case GLX_ACCUM_ALPHA_SIZE: + parselist++; + { + GLint size = *parselist++; + accumAlphaSize = MAX2( accumAlphaSize, size ); + } + break; + + /* + * GLX_EXT_visual_info extension + */ + case GLX_X_VISUAL_TYPE_EXT: + parselist++; + visual_type = *parselist++; + break; + case GLX_TRANSPARENT_TYPE_EXT: + parselist++; + trans_type = *parselist++; + break; + case GLX_TRANSPARENT_INDEX_VALUE_EXT: + parselist++; + trans_value = *parselist++; + break; + case GLX_TRANSPARENT_RED_VALUE_EXT: + case GLX_TRANSPARENT_GREEN_VALUE_EXT: + case GLX_TRANSPARENT_BLUE_VALUE_EXT: + case GLX_TRANSPARENT_ALPHA_VALUE_EXT: + /* ignore */ + parselist++; + parselist++; + break; + + /* + * GLX_EXT_visual_info extension + */ + case GLX_VISUAL_CAVEAT_EXT: + parselist++; + caveat = *parselist++; /* ignored for now */ + break; + + /* + * GLX_ARB_multisample + */ + case GLX_SAMPLE_BUFFERS_ARB: + /* ms not supported */ + return NULL; + case GLX_SAMPLES_ARB: + /* ms not supported */ + return NULL; + + /* + * FBConfig attribs. + */ + case GLX_RENDER_TYPE: + if (!fbConfig) + return NULL; + parselist++; + if (*parselist == GLX_RGBA_BIT) { + rgb_flag = GL_TRUE; + } + else if (*parselist == GLX_COLOR_INDEX_BIT) { + rgb_flag = GL_FALSE; + } + else if (*parselist == 0) { + rgb_flag = GL_TRUE; + } + parselist++; + break; + case GLX_DRAWABLE_TYPE: + if (!fbConfig) + return NULL; + parselist++; + if (*parselist & ~(GLX_WINDOW_BIT | GLX_PIXMAP_BIT | GLX_PBUFFER_BIT)) { + return NULL; /* bad bit */ + } + parselist++; + break; + case GLX_FBCONFIG_ID: + if (!fbConfig) + return NULL; + parselist++; + desiredVisualID = *parselist++; + break; + case GLX_X_RENDERABLE: + if (!fbConfig) + return NULL; + parselist += 2; + /* ignore */ + break; + +#ifdef GLX_EXT_texture_from_pixmap + case GLX_BIND_TO_TEXTURE_RGB_EXT: + parselist++; /*skip*/ + break; + case GLX_BIND_TO_TEXTURE_RGBA_EXT: + parselist++; /*skip*/ + break; + case GLX_BIND_TO_MIPMAP_TEXTURE_EXT: + parselist++; /*skip*/ + break; + case GLX_BIND_TO_TEXTURE_TARGETS_EXT: + parselist++; + if (*parselist & ~(GLX_TEXTURE_1D_BIT_EXT | + GLX_TEXTURE_2D_BIT_EXT | + GLX_TEXTURE_RECTANGLE_BIT_EXT)) { + /* invalid bit */ + return NULL; + } + break; + case GLX_Y_INVERTED_EXT: + parselist++; /*skip*/ + break; +#endif + + case None: + /* end of list */ + break; + + default: + /* undefined attribute */ + _mesa_warning(NULL, "unexpected attrib 0x%x in choose_visual()", + *parselist); + return NULL; + } + } + + (void) caveat; + + /* + * Since we're only simulating the GLX extension this function will never + * find any real GL visuals. Instead, all we can do is try to find an RGB + * or CI visual of appropriate depth. Other requested attributes such as + * double buffering, depth buffer, etc. will be associated with the X + * visual and stored in the VisualTable[]. + */ + if (desiredVisualID != -1) { + /* try to get a specific visual, by visualID */ + XVisualInfo temp; + int n; + temp.visualid = desiredVisualID; + temp.screen = screen; + vis = XGetVisualInfo(dpy, VisualIDMask | VisualScreenMask, &temp, &n); + if (vis) { + /* give the visual some useful GLX attributes */ + double_flag = GL_TRUE; + if (vis->depth > 8) + rgb_flag = GL_TRUE; + depth_size = 24; /*default_depth_bits();*/ + stencil_size = STENCIL_BITS; + /* XXX accum??? */ + } + } + else if (level==0) { + /* normal color planes */ + if (rgb_flag) { + /* Get an RGB visual */ + int min_rgb = min_red + min_green + min_blue; + if (min_rgb>1 && min_rgb<8) { + /* a special case to be sure we can get a monochrome visual */ + min_rgb = 1; + } + vis = choose_x_visual( dpy, screen, rgb_flag, min_rgb, visual_type ); + } + else { + /* Get a color index visual */ + vis = choose_x_visual( dpy, screen, rgb_flag, min_ci, visual_type ); + accumRedSize = accumGreenSize = accumBlueSize = accumAlphaSize = 0; + } + } + else { + /* over/underlay planes */ + if (rgb_flag) { + /* rgba overlay */ + int min_rgb = min_red + min_green + min_blue; + if (min_rgb>1 && min_rgb<8) { + /* a special case to be sure we can get a monochrome visual */ + min_rgb = 1; + } + vis = choose_x_overlay_visual( dpy, screen, rgb_flag, level, + trans_type, trans_value, min_rgb, visual_type ); + } + else { + /* color index overlay */ + vis = choose_x_overlay_visual( dpy, screen, rgb_flag, level, + trans_type, trans_value, min_ci, visual_type ); + } + } + + if (vis) { + /* Note: we're not exactly obeying the glXChooseVisual rules here. + * When GLX_DEPTH_SIZE = 1 is specified we're supposed to choose the + * largest depth buffer size, which is 32bits/value. Instead, we + * return 16 to maintain performance with earlier versions of Mesa. + */ + if (stencil_size > 0) + depth_size = 24; /* if Z and stencil, always use 24+8 format */ + else if (depth_size > 24) + depth_size = 32; + else if (depth_size > 16) + depth_size = 24; + else if (depth_size > 0) { + depth_size = default_depth_bits(); + } + + if (!alpha_flag) { + alpha_flag = default_alpha_bits() > 0; + } + + /* we only support one size of stencil and accum buffers. */ + if (stencil_size > 0) + stencil_size = STENCIL_BITS; + if (accumRedSize > 0 || accumGreenSize > 0 || accumBlueSize > 0 || + accumAlphaSize > 0) { + accumRedSize = + accumGreenSize = + accumBlueSize = default_accum_bits(); + accumAlphaSize = alpha_flag ? accumRedSize : 0; + } + + xmvis = save_glx_visual( dpy, vis, rgb_flag, alpha_flag, double_flag, + stereo_flag, depth_size, stencil_size, + accumRedSize, accumGreenSize, + accumBlueSize, accumAlphaSize, level, numAux ); + } + + return xmvis; +} + + +static XVisualInfo * +Fake_glXChooseVisual( Display *dpy, int screen, int *list ) +{ + XMesaVisual xmvis; + + /* register ourselves as an extension on this display */ + register_with_display(dpy); + + xmvis = choose_visual(dpy, screen, list, GL_FALSE); + if (xmvis) { +#if 0 + return xmvis->vishandle; +#else + /* create a new vishandle - the cached one may be stale */ + xmvis->vishandle = (XVisualInfo *) _mesa_malloc(sizeof(XVisualInfo)); + if (xmvis->vishandle) { + _mesa_memcpy(xmvis->vishandle, xmvis->visinfo, sizeof(XVisualInfo)); + } + return xmvis->vishandle; +#endif + } + else + return NULL; +} + + +static GLXContext +Fake_glXCreateContext( Display *dpy, XVisualInfo *visinfo, + GLXContext share_list, Bool direct ) +{ + XMesaVisual xmvis; + struct fake_glx_context *glxCtx; + struct fake_glx_context *shareCtx = (struct fake_glx_context *) share_list; + + if (!dpy || !visinfo) + return 0; + + glxCtx = CALLOC_STRUCT(fake_glx_context); + if (!glxCtx) + return 0; + + /* deallocate unused windows/buffers */ +#if 0 + XMesaGarbageCollect(); +#endif + + xmvis = find_glx_visual( dpy, visinfo ); + if (!xmvis) { + /* This visual wasn't found with glXChooseVisual() */ + xmvis = create_glx_visual( dpy, visinfo ); + if (!xmvis) { + /* unusable visual */ + _mesa_free(glxCtx); + return NULL; + } + } + + glxCtx->xmesaContext = XMesaCreateContext(xmvis, + shareCtx ? shareCtx->xmesaContext : NULL); + if (!glxCtx->xmesaContext) { + _mesa_free(glxCtx); + return NULL; + } + + glxCtx->glxContext.isDirect = GL_FALSE; + glxCtx->glxContext.currentDpy = dpy; + glxCtx->glxContext.xid = (XID) glxCtx; /* self pointer */ + + assert((void *) glxCtx == (void *) &(glxCtx->glxContext)); + + return (GLXContext) glxCtx; +} + + +/* XXX these may have to be removed due to thread-safety issues. */ +static GLXContext MakeCurrent_PrevContext = 0; +static GLXDrawable MakeCurrent_PrevDrawable = 0; +static GLXDrawable MakeCurrent_PrevReadable = 0; +static XMesaBuffer MakeCurrent_PrevDrawBuffer = 0; +static XMesaBuffer MakeCurrent_PrevReadBuffer = 0; + + +/* GLX 1.3 and later */ +static Bool +Fake_glXMakeContextCurrent( Display *dpy, GLXDrawable draw, + GLXDrawable read, GLXContext ctx ) +{ + struct fake_glx_context *glxCtx = (struct fake_glx_context *) ctx; + static boolean firsttime = 1, no_rast = 0; + + if (firsttime) { + no_rast = getenv("SP_NO_RAST") != NULL; + firsttime = 0; + } + + + if (ctx && draw && read) { + XMesaBuffer drawBuffer, readBuffer; + XMesaContext xmctx = glxCtx->xmesaContext; + + /* Find the XMesaBuffer which corresponds to the GLXDrawable 'draw' */ + if (ctx == MakeCurrent_PrevContext + && draw == MakeCurrent_PrevDrawable) { + drawBuffer = MakeCurrent_PrevDrawBuffer; + } + else { + drawBuffer = XMesaFindBuffer( dpy, draw ); + } + if (!drawBuffer) { + /* drawable must be a new window! */ + drawBuffer = XMesaCreateWindowBuffer( xmctx->xm_visual, draw ); + if (!drawBuffer) { + /* Out of memory, or context/drawable depth mismatch */ + return False; + } +#ifdef FX + FXcreateContext( xmctx->xm_visual, draw, xmctx, drawBuffer ); +#endif + } + + /* Find the XMesaBuffer which corresponds to the GLXDrawable 'read' */ + if (ctx == MakeCurrent_PrevContext + && read == MakeCurrent_PrevReadable) { + readBuffer = MakeCurrent_PrevReadBuffer; + } + else { + readBuffer = XMesaFindBuffer( dpy, read ); + } + if (!readBuffer) { + /* drawable must be a new window! */ + readBuffer = XMesaCreateWindowBuffer( xmctx->xm_visual, read ); + if (!readBuffer) { + /* Out of memory, or context/drawable depth mismatch */ + return False; + } +#ifdef FX + FXcreateContext( xmctx->xm_visual, read, xmctx, readBuffer ); +#endif + } + + if (no_rast && + MakeCurrent_PrevContext == ctx && + MakeCurrent_PrevDrawable == draw && + MakeCurrent_PrevReadable == read && + MakeCurrent_PrevDrawBuffer == drawBuffer && + MakeCurrent_PrevReadBuffer == readBuffer) + return True; + + MakeCurrent_PrevContext = ctx; + MakeCurrent_PrevDrawable = draw; + MakeCurrent_PrevReadable = read; + MakeCurrent_PrevDrawBuffer = drawBuffer; + MakeCurrent_PrevReadBuffer = readBuffer; + + /* Now make current! */ + if (XMesaMakeCurrent2(xmctx, drawBuffer, readBuffer)) { + ((__GLXcontext *) ctx)->currentDpy = dpy; + ((__GLXcontext *) ctx)->currentDrawable = draw; + ((__GLXcontext *) ctx)->currentReadable = read; + return True; + } + else { + return False; + } + } + else if (!ctx && !draw && !read) { + /* release current context w/out assigning new one. */ + XMesaMakeCurrent( NULL, NULL ); + MakeCurrent_PrevContext = 0; + MakeCurrent_PrevDrawable = 0; + MakeCurrent_PrevReadable = 0; + MakeCurrent_PrevDrawBuffer = 0; + MakeCurrent_PrevReadBuffer = 0; + return True; + } + else { + /* The args must either all be non-zero or all zero. + * This is an error. + */ + return False; + } +} + + +static Bool +Fake_glXMakeCurrent( Display *dpy, GLXDrawable drawable, GLXContext ctx ) +{ + return Fake_glXMakeContextCurrent( dpy, drawable, drawable, ctx ); +} + + +static GLXPixmap +Fake_glXCreateGLXPixmap( Display *dpy, XVisualInfo *visinfo, Pixmap pixmap ) +{ + XMesaVisual v; + XMesaBuffer b; + + v = find_glx_visual( dpy, visinfo ); + if (!v) { + v = create_glx_visual( dpy, visinfo ); + if (!v) { + /* unusable visual */ + return 0; + } + } + + b = XMesaCreatePixmapBuffer( v, pixmap, 0 ); + if (!b) { + return 0; + } + return b->drawable; +} + + +/*** GLX_MESA_pixmap_colormap ***/ + +static GLXPixmap +Fake_glXCreateGLXPixmapMESA( Display *dpy, XVisualInfo *visinfo, + Pixmap pixmap, Colormap cmap ) +{ + XMesaVisual v; + XMesaBuffer b; + + v = find_glx_visual( dpy, visinfo ); + if (!v) { + v = create_glx_visual( dpy, visinfo ); + if (!v) { + /* unusable visual */ + return 0; + } + } + + b = XMesaCreatePixmapBuffer( v, pixmap, cmap ); + if (!b) { + return 0; + } + return b->drawable; +} + + +static void +Fake_glXDestroyGLXPixmap( Display *dpy, GLXPixmap pixmap ) +{ + XMesaBuffer b = XMesaFindBuffer(dpy, pixmap); + if (b) { + XMesaDestroyBuffer(b); + } + else if (_mesa_getenv("MESA_DEBUG")) { + _mesa_warning(NULL, "Mesa: glXDestroyGLXPixmap: invalid pixmap\n"); + } +} + + +static void +Fake_glXCopyContext( Display *dpy, GLXContext src, GLXContext dst, + unsigned long mask ) +{ + struct fake_glx_context *fakeSrc = (struct fake_glx_context *) src; + struct fake_glx_context *fakeDst = (struct fake_glx_context *) dst; + XMesaContext xm_src = fakeSrc->xmesaContext; + XMesaContext xm_dst = fakeDst->xmesaContext; + (void) dpy; + if (MakeCurrent_PrevContext == src) { + _mesa_Flush(); + } + st_copy_context_state( xm_src->st, xm_dst->st, (GLuint) mask ); +} + + +static Bool +Fake_glXQueryExtension( Display *dpy, int *errorb, int *event ) +{ + /* Mesa's GLX isn't really an X extension but we try to act like one. */ + (void) dpy; + (void) errorb; + (void) event; + return True; +} + + +extern void _kw_ungrab_all( Display *dpy ); +void _kw_ungrab_all( Display *dpy ) +{ + XUngrabPointer( dpy, CurrentTime ); + XUngrabKeyboard( dpy, CurrentTime ); +} + + +static void +Fake_glXDestroyContext( Display *dpy, GLXContext ctx ) +{ + struct fake_glx_context *glxCtx = (struct fake_glx_context *) ctx; + (void) dpy; + MakeCurrent_PrevContext = 0; + MakeCurrent_PrevDrawable = 0; + MakeCurrent_PrevReadable = 0; + MakeCurrent_PrevDrawBuffer = 0; + MakeCurrent_PrevReadBuffer = 0; + XMesaDestroyContext( glxCtx->xmesaContext ); + XMesaGarbageCollect(); + _mesa_free(glxCtx); +} + + +static Bool +Fake_glXIsDirect( Display *dpy, GLXContext ctx ) +{ + (void) dpy; + (void) ctx; + return False; +} + + + +static void +Fake_glXSwapBuffers( Display *dpy, GLXDrawable drawable ) +{ + XMesaBuffer buffer = XMesaFindBuffer( dpy, drawable ); + static boolean firsttime = 1, no_rast = 0; + + if (firsttime) { + no_rast = getenv("SP_NO_RAST") != NULL; + firsttime = 0; + } + + if (no_rast) + return; + + if (buffer) { + XMesaSwapBuffers(buffer); + } + else if (_mesa_getenv("MESA_DEBUG")) { + _mesa_warning(NULL, "glXSwapBuffers: invalid drawable 0x%x\n", + (int) drawable); + } +} + + + +/*** GLX_MESA_copy_sub_buffer ***/ + +static void +Fake_glXCopySubBufferMESA( Display *dpy, GLXDrawable drawable, + int x, int y, int width, int height ) +{ + XMesaBuffer buffer = XMesaFindBuffer( dpy, drawable ); + if (buffer) { + XMesaCopySubBuffer(buffer, x, y, width, height); + } + else if (_mesa_getenv("MESA_DEBUG")) { + _mesa_warning(NULL, "Mesa: glXCopySubBufferMESA: invalid drawable\n"); + } +} + + +static Bool +Fake_glXQueryVersion( Display *dpy, int *maj, int *min ) +{ + (void) dpy; + /* Return GLX version, not Mesa version */ + assert(CLIENT_MAJOR_VERSION == SERVER_MAJOR_VERSION); + *maj = CLIENT_MAJOR_VERSION; + *min = MIN2( CLIENT_MINOR_VERSION, SERVER_MINOR_VERSION ); + return True; +} + + +/* + * Query the GLX attributes of the given XVisualInfo. + */ +static int +get_config( XMesaVisual xmvis, int attrib, int *value, GLboolean fbconfig ) +{ + ASSERT(xmvis); + switch(attrib) { + case GLX_USE_GL: + if (fbconfig) + return GLX_BAD_ATTRIBUTE; + *value = (int) True; + return 0; + case GLX_BUFFER_SIZE: + *value = xmvis->visinfo->depth; + return 0; + case GLX_LEVEL: + *value = xmvis->mesa_visual.level; + return 0; + case GLX_RGBA: + if (fbconfig) + return GLX_BAD_ATTRIBUTE; + if (xmvis->mesa_visual.rgbMode) { + *value = True; + } + else { + *value = False; + } + return 0; + case GLX_DOUBLEBUFFER: + *value = (int) xmvis->mesa_visual.doubleBufferMode; + return 0; + case GLX_STEREO: + *value = (int) xmvis->mesa_visual.stereoMode; + return 0; + case GLX_AUX_BUFFERS: + *value = xmvis->mesa_visual.numAuxBuffers; + return 0; + case GLX_RED_SIZE: + *value = xmvis->mesa_visual.redBits; + return 0; + case GLX_GREEN_SIZE: + *value = xmvis->mesa_visual.greenBits; + return 0; + case GLX_BLUE_SIZE: + *value = xmvis->mesa_visual.blueBits; + return 0; + case GLX_ALPHA_SIZE: + *value = xmvis->mesa_visual.alphaBits; + return 0; + case GLX_DEPTH_SIZE: + *value = xmvis->mesa_visual.depthBits; + return 0; + case GLX_STENCIL_SIZE: + *value = xmvis->mesa_visual.stencilBits; + return 0; + case GLX_ACCUM_RED_SIZE: + *value = xmvis->mesa_visual.accumRedBits; + return 0; + case GLX_ACCUM_GREEN_SIZE: + *value = xmvis->mesa_visual.accumGreenBits; + return 0; + case GLX_ACCUM_BLUE_SIZE: + *value = xmvis->mesa_visual.accumBlueBits; + return 0; + case GLX_ACCUM_ALPHA_SIZE: + *value = xmvis->mesa_visual.accumAlphaBits; + return 0; + + /* + * GLX_EXT_visual_info extension + */ + case GLX_X_VISUAL_TYPE_EXT: + switch (xmvis->visinfo->CLASS) { + case StaticGray: *value = GLX_STATIC_GRAY_EXT; return 0; + case GrayScale: *value = GLX_GRAY_SCALE_EXT; return 0; + case StaticColor: *value = GLX_STATIC_GRAY_EXT; return 0; + case PseudoColor: *value = GLX_PSEUDO_COLOR_EXT; return 0; + case TrueColor: *value = GLX_TRUE_COLOR_EXT; return 0; + case DirectColor: *value = GLX_DIRECT_COLOR_EXT; return 0; + } + return 0; + case GLX_TRANSPARENT_TYPE_EXT: + if (xmvis->mesa_visual.level==0) { + /* normal planes */ + *value = GLX_NONE_EXT; + } + else if (xmvis->mesa_visual.level>0) { + /* overlay */ + if (xmvis->mesa_visual.rgbMode) { + *value = GLX_TRANSPARENT_RGB_EXT; + } + else { + *value = GLX_TRANSPARENT_INDEX_EXT; + } + } + else if (xmvis->mesa_visual.level<0) { + /* underlay */ + *value = GLX_NONE_EXT; + } + return 0; + case GLX_TRANSPARENT_INDEX_VALUE_EXT: + { + int pixel = transparent_pixel( xmvis ); + if (pixel>=0) { + *value = pixel; + } + /* else undefined */ + } + return 0; + case GLX_TRANSPARENT_RED_VALUE_EXT: + /* undefined */ + return 0; + case GLX_TRANSPARENT_GREEN_VALUE_EXT: + /* undefined */ + return 0; + case GLX_TRANSPARENT_BLUE_VALUE_EXT: + /* undefined */ + return 0; + case GLX_TRANSPARENT_ALPHA_VALUE_EXT: + /* undefined */ + return 0; + + /* + * GLX_EXT_visual_info extension + */ + case GLX_VISUAL_CAVEAT_EXT: + /* test for zero, just in case */ + if (xmvis->mesa_visual.visualRating > 0) + *value = xmvis->mesa_visual.visualRating; + else + *value = GLX_NONE_EXT; + return 0; + + /* + * GLX_ARB_multisample + */ + case GLX_SAMPLE_BUFFERS_ARB: + *value = 0; + return 0; + case GLX_SAMPLES_ARB: + *value = 0; + return 0; + + /* + * For FBConfigs: + */ + case GLX_SCREEN_EXT: + if (!fbconfig) + return GLX_BAD_ATTRIBUTE; + *value = xmvis->visinfo->screen; + break; + case GLX_DRAWABLE_TYPE: /*SGIX too */ + if (!fbconfig) + return GLX_BAD_ATTRIBUTE; + *value = GLX_WINDOW_BIT | GLX_PIXMAP_BIT | GLX_PBUFFER_BIT; + break; + case GLX_RENDER_TYPE_SGIX: + if (!fbconfig) + return GLX_BAD_ATTRIBUTE; + if (xmvis->mesa_visual.rgbMode) + *value = GLX_RGBA_BIT; + else + *value = GLX_COLOR_INDEX_BIT; + break; + case GLX_X_RENDERABLE_SGIX: + if (!fbconfig) + return GLX_BAD_ATTRIBUTE; + *value = True; /* XXX really? */ + break; + case GLX_FBCONFIG_ID_SGIX: + if (!fbconfig) + return GLX_BAD_ATTRIBUTE; + *value = xmvis->visinfo->visualid; + break; + case GLX_MAX_PBUFFER_WIDTH: + if (!fbconfig) + return GLX_BAD_ATTRIBUTE; + /* XXX or MAX_WIDTH? */ + *value = DisplayWidth(xmvis->display, xmvis->visinfo->screen); + break; + case GLX_MAX_PBUFFER_HEIGHT: + if (!fbconfig) + return GLX_BAD_ATTRIBUTE; + *value = DisplayHeight(xmvis->display, xmvis->visinfo->screen); + break; + case GLX_MAX_PBUFFER_PIXELS: + if (!fbconfig) + return GLX_BAD_ATTRIBUTE; + *value = DisplayWidth(xmvis->display, xmvis->visinfo->screen) * + DisplayHeight(xmvis->display, xmvis->visinfo->screen); + break; + case GLX_VISUAL_ID: + if (!fbconfig) + return GLX_BAD_ATTRIBUTE; + *value = xmvis->visinfo->visualid; + break; + +#ifdef GLX_EXT_texture_from_pixmap + case GLX_BIND_TO_TEXTURE_RGB_EXT: + *value = True; /*XXX*/ + break; + case GLX_BIND_TO_TEXTURE_RGBA_EXT: + /* XXX review */ + *value = xmvis->mesa_visual.alphaBits > 0 ? True : False; + break; + case GLX_BIND_TO_MIPMAP_TEXTURE_EXT: + *value = True; /*XXX*/ + break; + case GLX_BIND_TO_TEXTURE_TARGETS_EXT: + *value = (GLX_TEXTURE_1D_BIT_EXT | + GLX_TEXTURE_2D_BIT_EXT | + GLX_TEXTURE_RECTANGLE_BIT_EXT); /*XXX*/ + break; + case GLX_Y_INVERTED_EXT: + *value = True; /*XXX*/ + break; +#endif + + default: + return GLX_BAD_ATTRIBUTE; + } + return Success; +} + + +static int +Fake_glXGetConfig( Display *dpy, XVisualInfo *visinfo, + int attrib, int *value ) +{ + XMesaVisual xmvis; + int k; + if (!dpy || !visinfo) + return GLX_BAD_ATTRIBUTE; + + xmvis = find_glx_visual( dpy, visinfo ); + if (!xmvis) { + /* this visual wasn't obtained with glXChooseVisual */ + xmvis = create_glx_visual( dpy, visinfo ); + if (!xmvis) { + /* this visual can't be used for GL rendering */ + if (attrib==GLX_USE_GL) { + *value = (int) False; + return 0; + } + else { + return GLX_BAD_VISUAL; + } + } + } + + k = get_config(xmvis, attrib, value, GL_FALSE); + return k; +} + + +static void +Fake_glXWaitGL( void ) +{ + XMesaContext xmesa = XMesaGetCurrentContext(); + XMesaFlush( xmesa ); +} + + + +static void +Fake_glXWaitX( void ) +{ + XMesaContext xmesa = XMesaGetCurrentContext(); + XMesaFlush( xmesa ); +} + + +static const char * +get_extensions( void ) +{ +#ifdef FX + const char *fx = _mesa_getenv("MESA_GLX_FX"); + if (fx && fx[0] != 'd') { + return EXTENSIONS; + } +#endif + return EXTENSIONS + 23; /* skip "GLX_MESA_set_3dfx_mode" */ +} + + + +/* GLX 1.1 and later */ +static const char * +Fake_glXQueryExtensionsString( Display *dpy, int screen ) +{ + (void) dpy; + (void) screen; + return get_extensions(); +} + + + +/* GLX 1.1 and later */ +static const char * +Fake_glXQueryServerString( Display *dpy, int screen, int name ) +{ + static char version[1000]; + _mesa_sprintf(version, "%d.%d %s", + SERVER_MAJOR_VERSION, SERVER_MINOR_VERSION, MESA_GLX_VERSION); + + (void) dpy; + (void) screen; + + switch (name) { + case GLX_EXTENSIONS: + return get_extensions(); + case GLX_VENDOR: + return VENDOR; + case GLX_VERSION: + return version; + default: + return NULL; + } +} + + + +/* GLX 1.1 and later */ +static const char * +Fake_glXGetClientString( Display *dpy, int name ) +{ + static char version[1000]; + _mesa_sprintf(version, "%d.%d %s", CLIENT_MAJOR_VERSION, + CLIENT_MINOR_VERSION, MESA_GLX_VERSION); + + (void) dpy; + + switch (name) { + case GLX_EXTENSIONS: + return get_extensions(); + case GLX_VENDOR: + return VENDOR; + case GLX_VERSION: + return version; + default: + return NULL; + } +} + + + +/* + * GLX 1.3 and later + */ + + +static int +Fake_glXGetFBConfigAttrib( Display *dpy, GLXFBConfig config, + int attribute, int *value ) +{ + XMesaVisual v = (XMesaVisual) config; + (void) dpy; + (void) config; + + if (!dpy || !config || !value) + return -1; + + return get_config(v, attribute, value, GL_TRUE); +} + + +static GLXFBConfig * +Fake_glXGetFBConfigs( Display *dpy, int screen, int *nelements ) +{ + XVisualInfo *visuals, visTemplate; + const long visMask = VisualScreenMask; + int i; + + /* Get list of all X visuals */ + visTemplate.screen = screen; + visuals = XGetVisualInfo(dpy, visMask, &visTemplate, nelements); + if (*nelements > 0) { + XMesaVisual *results; + results = (XMesaVisual *) _mesa_malloc(*nelements * sizeof(XMesaVisual)); + if (!results) { + *nelements = 0; + return NULL; + } + for (i = 0; i < *nelements; i++) { + results[i] = create_glx_visual(dpy, visuals + i); + } + return (GLXFBConfig *) results; + } + return NULL; +} + + +static GLXFBConfig * +Fake_glXChooseFBConfig( Display *dpy, int screen, + const int *attribList, int *nitems ) +{ + XMesaVisual xmvis; + + if (!attribList || !attribList[0]) { + /* return list of all configs (per GLX_SGIX_fbconfig spec) */ + return Fake_glXGetFBConfigs(dpy, screen, nitems); + } + + xmvis = choose_visual(dpy, screen, attribList, GL_TRUE); + if (xmvis) { + GLXFBConfig *config = (GLXFBConfig *) _mesa_malloc(sizeof(XMesaVisual)); + if (!config) { + *nitems = 0; + return NULL; + } + *nitems = 1; + config[0] = (GLXFBConfig) xmvis; + return (GLXFBConfig *) config; + } + else { + *nitems = 0; + return NULL; + } +} + + +static XVisualInfo * +Fake_glXGetVisualFromFBConfig( Display *dpy, GLXFBConfig config ) +{ + if (dpy && config) { + XMesaVisual xmvis = (XMesaVisual) config; +#if 0 + return xmvis->vishandle; +#else + /* create a new vishandle - the cached one may be stale */ + xmvis->vishandle = (XVisualInfo *) _mesa_malloc(sizeof(XVisualInfo)); + if (xmvis->vishandle) { + _mesa_memcpy(xmvis->vishandle, xmvis->visinfo, sizeof(XVisualInfo)); + } + return xmvis->vishandle; +#endif + } + else { + return NULL; + } +} + + +static GLXWindow +Fake_glXCreateWindow( Display *dpy, GLXFBConfig config, Window win, + const int *attribList ) +{ + XMesaVisual xmvis = (XMesaVisual) config; + XMesaBuffer xmbuf; + if (!xmvis) + return 0; + + xmbuf = XMesaCreateWindowBuffer(xmvis, win); + if (!xmbuf) + return 0; + +#ifdef FX + /* XXX this will segfault if actually called */ + FXcreateContext(xmvis, win, NULL, xmbuf); +#endif + + (void) dpy; + (void) attribList; /* Ignored in GLX 1.3 */ + + return win; /* A hack for now */ +} + + +static void +Fake_glXDestroyWindow( Display *dpy, GLXWindow window ) +{ + XMesaBuffer b = XMesaFindBuffer(dpy, (XMesaDrawable) window); + if (b) + XMesaDestroyBuffer(b); + /* don't destroy X window */ +} + + +/* XXX untested */ +static GLXPixmap +Fake_glXCreatePixmap( Display *dpy, GLXFBConfig config, Pixmap pixmap, + const int *attribList ) +{ + XMesaVisual v = (XMesaVisual) config; + XMesaBuffer b; + const int *attr; + int target = 0, format = 0, mipmap = 0; + int value; + + if (!dpy || !config || !pixmap) + return 0; + + for (attr = attribList; *attr; attr++) { + switch (*attr) { + case GLX_TEXTURE_FORMAT_EXT: + attr++; + switch (*attr) { + case GLX_TEXTURE_FORMAT_NONE_EXT: + case GLX_TEXTURE_FORMAT_RGB_EXT: + case GLX_TEXTURE_FORMAT_RGBA_EXT: + format = *attr; + break; + default: + /* error */ + return 0; + } + break; + case GLX_TEXTURE_TARGET_EXT: + attr++; + switch (*attr) { + case GLX_TEXTURE_1D_EXT: + case GLX_TEXTURE_2D_EXT: + case GLX_TEXTURE_RECTANGLE_EXT: + target = *attr; + break; + default: + /* error */ + return 0; + } + break; + case GLX_MIPMAP_TEXTURE_EXT: + attr++; + if (*attr) + mipmap = 1; + break; + default: + /* error */ + return 0; + } + } + + if (format == GLX_TEXTURE_FORMAT_RGB_EXT) { + if (get_config(v, GLX_BIND_TO_TEXTURE_RGB_EXT, + &value, GL_TRUE) != Success + || !value) { + return 0; /* error! */ + } + } + else if (format == GLX_TEXTURE_FORMAT_RGBA_EXT) { + if (get_config(v, GLX_BIND_TO_TEXTURE_RGBA_EXT, + &value, GL_TRUE) != Success + || !value) { + return 0; /* error! */ + } + } + if (mipmap) { + if (get_config(v, GLX_BIND_TO_MIPMAP_TEXTURE_EXT, + &value, GL_TRUE) != Success + || !value) { + return 0; /* error! */ + } + } + if (target == GLX_TEXTURE_1D_EXT) { + if (get_config(v, GLX_BIND_TO_TEXTURE_TARGETS_EXT, + &value, GL_TRUE) != Success + || (value & GLX_TEXTURE_1D_BIT_EXT) == 0) { + return 0; /* error! */ + } + } + else if (target == GLX_TEXTURE_2D_EXT) { + if (get_config(v, GLX_BIND_TO_TEXTURE_TARGETS_EXT, + &value, GL_TRUE) != Success + || (value & GLX_TEXTURE_2D_BIT_EXT) == 0) { + return 0; /* error! */ + } + } + if (target == GLX_TEXTURE_RECTANGLE_EXT) { + if (get_config(v, GLX_BIND_TO_TEXTURE_TARGETS_EXT, + &value, GL_TRUE) != Success + || (value & GLX_TEXTURE_RECTANGLE_BIT_EXT) == 0) { + return 0; /* error! */ + } + } + + if (format || target || mipmap) { + /* texture from pixmap */ + b = XMesaCreatePixmapTextureBuffer(v, pixmap, 0, format, target, mipmap); + } + else { + b = XMesaCreatePixmapBuffer( v, pixmap, 0 ); + } + if (!b) { + return 0; + } + + return pixmap; +} + + +static void +Fake_glXDestroyPixmap( Display *dpy, GLXPixmap pixmap ) +{ + XMesaBuffer b = XMesaFindBuffer(dpy, (XMesaDrawable)pixmap); + if (b) + XMesaDestroyBuffer(b); + /* don't destroy X pixmap */ +} + + +static GLXPbuffer +Fake_glXCreatePbuffer( Display *dpy, GLXFBConfig config, + const int *attribList ) +{ + XMesaVisual xmvis = (XMesaVisual) config; + XMesaBuffer xmbuf; + const int *attrib; + int width = 0, height = 0; + GLboolean useLargest = GL_FALSE, preserveContents = GL_FALSE; + + (void) dpy; + + for (attrib = attribList; *attrib; attrib++) { + switch (*attrib) { + case GLX_PBUFFER_WIDTH: + attrib++; + width = *attrib; + break; + case GLX_PBUFFER_HEIGHT: + attrib++; + height = *attrib; + break; + case GLX_PRESERVED_CONTENTS: + attrib++; + preserveContents = *attrib; /* ignored */ + break; + case GLX_LARGEST_PBUFFER: + attrib++; + useLargest = *attrib; /* ignored */ + break; + default: + return 0; + } + } + + /* not used at this time */ + (void) useLargest; + (void) preserveContents; + + if (width == 0 || height == 0) + return 0; + + xmbuf = XMesaCreatePBuffer( xmvis, 0, width, height); + /* A GLXPbuffer handle must be an X Drawable because that's what + * glXMakeCurrent takes. + */ + if (xmbuf) + return (GLXPbuffer) xmbuf->drawable; + else + return 0; +} + + +static void +Fake_glXDestroyPbuffer( Display *dpy, GLXPbuffer pbuf ) +{ + XMesaBuffer b = XMesaFindBuffer(dpy, pbuf); + if (b) { + XMesaDestroyBuffer(b); + } +} + + +static void +Fake_glXQueryDrawable( Display *dpy, GLXDrawable draw, int attribute, + unsigned int *value ) +{ + XMesaBuffer xmbuf = XMesaFindBuffer(dpy, draw); + if (!xmbuf) + return; + + switch (attribute) { + case GLX_WIDTH: + *value = xmesa_buffer_width(xmbuf); + break; + case GLX_HEIGHT: + *value = xmesa_buffer_width(xmbuf); + break; + case GLX_PRESERVED_CONTENTS: + *value = True; + break; + case GLX_LARGEST_PBUFFER: + *value = xmesa_buffer_width(xmbuf) * xmesa_buffer_height(xmbuf); + break; + case GLX_FBCONFIG_ID: + *value = xmbuf->xm_visual->visinfo->visualid; + return; +#ifdef GLX_EXT_texture_from_pixmap + case GLX_TEXTURE_FORMAT_EXT: + *value = xmbuf->TextureFormat; + break; + case GLX_TEXTURE_TARGET_EXT: + *value = xmbuf->TextureTarget; + break; + case GLX_MIPMAP_TEXTURE_EXT: + *value = xmbuf->TextureMipmap; + break; +#endif + + default: + return; /* raise BadValue error */ + } +} + + +static GLXContext +Fake_glXCreateNewContext( Display *dpy, GLXFBConfig config, + int renderType, GLXContext shareList, Bool direct ) +{ + struct fake_glx_context *glxCtx; + struct fake_glx_context *shareCtx = (struct fake_glx_context *) shareList; + XMesaVisual xmvis = (XMesaVisual) config; + + if (!dpy || !config || + (renderType != GLX_RGBA_TYPE && renderType != GLX_COLOR_INDEX_TYPE)) + return 0; + + glxCtx = CALLOC_STRUCT(fake_glx_context); + if (!glxCtx) + return 0; + + /* deallocate unused windows/buffers */ + XMesaGarbageCollect(); + + glxCtx->xmesaContext = XMesaCreateContext(xmvis, + shareCtx ? shareCtx->xmesaContext : NULL); + if (!glxCtx->xmesaContext) { + _mesa_free(glxCtx); + return NULL; + } + + glxCtx->glxContext.isDirect = GL_FALSE; + glxCtx->glxContext.currentDpy = dpy; + glxCtx->glxContext.xid = (XID) glxCtx; /* self pointer */ + + assert((void *) glxCtx == (void *) &(glxCtx->glxContext)); + + return (GLXContext) glxCtx; +} + + +static int +Fake_glXQueryContext( Display *dpy, GLXContext ctx, int attribute, int *value ) +{ + struct fake_glx_context *glxCtx = (struct fake_glx_context *) ctx; + XMesaContext xmctx = glxCtx->xmesaContext; + + (void) dpy; + (void) ctx; + + switch (attribute) { + case GLX_FBCONFIG_ID: + *value = xmctx->xm_visual->visinfo->visualid; + break; + case GLX_RENDER_TYPE: + if (xmctx->xm_visual->mesa_visual.rgbMode) + *value = GLX_RGBA_BIT; + else + *value = GLX_COLOR_INDEX_BIT; + break; + case GLX_SCREEN: + *value = 0; + return Success; + default: + return GLX_BAD_ATTRIBUTE; + } + return 0; +} + + +static void +Fake_glXSelectEvent( Display *dpy, GLXDrawable drawable, unsigned long mask ) +{ + XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable); + if (xmbuf) + xmbuf->selectedEvents = mask; +} + + +static void +Fake_glXGetSelectedEvent( Display *dpy, GLXDrawable drawable, + unsigned long *mask ) +{ + XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable); + if (xmbuf) + *mask = xmbuf->selectedEvents; + else + *mask = 0; +} + + + +/*** GLX_SGI_swap_control ***/ + +static int +Fake_glXSwapIntervalSGI(int interval) +{ + (void) interval; + return 0; +} + + + +/*** GLX_SGI_video_sync ***/ + +static unsigned int FrameCounter = 0; + +static int +Fake_glXGetVideoSyncSGI(unsigned int *count) +{ + /* this is a bogus implementation */ + *count = FrameCounter++; + return 0; +} + +static int +Fake_glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count) +{ + if (divisor <= 0 || remainder < 0) + return GLX_BAD_VALUE; + /* this is a bogus implementation */ + FrameCounter++; + while (FrameCounter % divisor != remainder) + FrameCounter++; + *count = FrameCounter; + return 0; +} + + + +/*** GLX_SGI_make_current_read ***/ + +static Bool +Fake_glXMakeCurrentReadSGI(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx) +{ + return Fake_glXMakeContextCurrent( dpy, draw, read, ctx ); +} + +/* not used +static GLXDrawable +Fake_glXGetCurrentReadDrawableSGI(void) +{ + return 0; +} +*/ + + +/*** GLX_SGIX_video_source ***/ +#if defined(_VL_H) + +static GLXVideoSourceSGIX +Fake_glXCreateGLXVideoSourceSGIX(Display *dpy, int screen, VLServer server, VLPath path, int nodeClass, VLNode drainNode) +{ + (void) dpy; + (void) screen; + (void) server; + (void) path; + (void) nodeClass; + (void) drainNode; + return 0; +} + +static void +Fake_glXDestroyGLXVideoSourceSGIX(Display *dpy, GLXVideoSourceSGIX src) +{ + (void) dpy; + (void) src; +} + +#endif + + +/*** GLX_EXT_import_context ***/ + +static void +Fake_glXFreeContextEXT(Display *dpy, GLXContext context) +{ + (void) dpy; + (void) context; +} + +static GLXContextID +Fake_glXGetContextIDEXT(const GLXContext context) +{ + (void) context; + return 0; +} + +static GLXContext +Fake_glXImportContextEXT(Display *dpy, GLXContextID contextID) +{ + (void) dpy; + (void) contextID; + return 0; +} + +static int +Fake_glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute, int *value) +{ + (void) dpy; + (void) context; + (void) attribute; + (void) value; + return 0; +} + + + +/*** GLX_SGIX_fbconfig ***/ + +static int +Fake_glXGetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config, int attribute, int *value) +{ + return Fake_glXGetFBConfigAttrib(dpy, config, attribute, value); +} + +static GLXFBConfigSGIX * +Fake_glXChooseFBConfigSGIX(Display *dpy, int screen, int *attrib_list, int *nelements) +{ + return (GLXFBConfig *) Fake_glXChooseFBConfig(dpy, screen, attrib_list, nelements); +} + + +static GLXPixmap +Fake_glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, Pixmap pixmap) +{ + XMesaVisual xmvis = (XMesaVisual) config; + XMesaBuffer xmbuf = XMesaCreatePixmapBuffer(xmvis, pixmap, 0); + return xmbuf->drawable; /* need to return an X ID */ +} + + +static GLXContext +Fake_glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int render_type, GLXContext share_list, Bool direct) +{ + XMesaVisual xmvis = (XMesaVisual) config; + struct fake_glx_context *glxCtx; + struct fake_glx_context *shareCtx = (struct fake_glx_context *) share_list; + + glxCtx = CALLOC_STRUCT(fake_glx_context); + if (!glxCtx) + return 0; + + /* deallocate unused windows/buffers */ + XMesaGarbageCollect(); + + glxCtx->xmesaContext = XMesaCreateContext(xmvis, + shareCtx ? shareCtx->xmesaContext : NULL); + if (!glxCtx->xmesaContext) { + _mesa_free(glxCtx); + return NULL; + } + + glxCtx->glxContext.isDirect = GL_FALSE; + glxCtx->glxContext.currentDpy = dpy; + glxCtx->glxContext.xid = (XID) glxCtx; /* self pointer */ + + assert((void *) glxCtx == (void *) &(glxCtx->glxContext)); + + return (GLXContext) glxCtx; +} + + +static XVisualInfo * +Fake_glXGetVisualFromFBConfigSGIX(Display *dpy, GLXFBConfigSGIX config) +{ + return Fake_glXGetVisualFromFBConfig(dpy, config); +} + + +static GLXFBConfigSGIX +Fake_glXGetFBConfigFromVisualSGIX(Display *dpy, XVisualInfo *vis) +{ + XMesaVisual xmvis = find_glx_visual(dpy, vis); + if (!xmvis) { + /* This visual wasn't found with glXChooseVisual() */ + xmvis = create_glx_visual(dpy, vis); + } + + return (GLXFBConfigSGIX) xmvis; +} + + + +/*** GLX_SGIX_pbuffer ***/ + +static GLXPbufferSGIX +Fake_glXCreateGLXPbufferSGIX(Display *dpy, GLXFBConfigSGIX config, + unsigned int width, unsigned int height, + int *attribList) +{ + XMesaVisual xmvis = (XMesaVisual) config; + XMesaBuffer xmbuf; + const int *attrib; + GLboolean useLargest = GL_FALSE, preserveContents = GL_FALSE; + + (void) dpy; + + for (attrib = attribList; attrib && *attrib; attrib++) { + switch (*attrib) { + case GLX_PRESERVED_CONTENTS_SGIX: + attrib++; + preserveContents = *attrib; /* ignored */ + break; + case GLX_LARGEST_PBUFFER_SGIX: + attrib++; + useLargest = *attrib; /* ignored */ + break; + default: + return 0; + } + } + + /* not used at this time */ + (void) useLargest; + (void) preserveContents; + + xmbuf = XMesaCreatePBuffer( xmvis, 0, width, height); + /* A GLXPbuffer handle must be an X Drawable because that's what + * glXMakeCurrent takes. + */ + return (GLXPbuffer) xmbuf->drawable; +} + + +static void +Fake_glXDestroyGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf) +{ + XMesaBuffer xmbuf = XMesaFindBuffer(dpy, pbuf); + if (xmbuf) { + XMesaDestroyBuffer(xmbuf); + } +} + + +static int +Fake_glXQueryGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf, int attribute, unsigned int *value) +{ + const XMesaBuffer xmbuf = XMesaFindBuffer(dpy, pbuf); + + if (!xmbuf) { + /* Generate GLXBadPbufferSGIX for bad pbuffer */ + return 0; + } + + switch (attribute) { + case GLX_PRESERVED_CONTENTS_SGIX: + *value = True; + break; + case GLX_LARGEST_PBUFFER_SGIX: + *value = xmesa_buffer_width(xmbuf) * xmesa_buffer_height(xmbuf); + break; + case GLX_WIDTH_SGIX: + *value = xmesa_buffer_width(xmbuf); + break; + case GLX_HEIGHT_SGIX: + *value = xmesa_buffer_height(xmbuf); + break; + case GLX_EVENT_MASK_SGIX: + *value = 0; /* XXX might be wrong */ + break; + default: + *value = 0; + } + return 0; +} + + +static void +Fake_glXSelectEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long mask) +{ + XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable); + if (xmbuf) { + /* Note: we'll never generate clobber events */ + xmbuf->selectedEvents = mask; + } +} + + +static void +Fake_glXGetSelectedEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long *mask) +{ + XMesaBuffer xmbuf = XMesaFindBuffer(dpy, drawable); + if (xmbuf) { + *mask = xmbuf->selectedEvents; + } + else { + *mask = 0; + } +} + + + +/*** GLX_SGI_cushion ***/ + +static void +Fake_glXCushionSGI(Display *dpy, Window win, float cushion) +{ + (void) dpy; + (void) win; + (void) cushion; +} + + + +/*** GLX_SGIX_video_resize ***/ + +static int +Fake_glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel , Window window) +{ + (void) dpy; + (void) screen; + (void) channel; + (void) window; + return 0; +} + +static int +Fake_glXChannelRectSGIX(Display *dpy, int screen, int channel, int x, int y, int w, int h) +{ + (void) dpy; + (void) screen; + (void) channel; + (void) x; + (void) y; + (void) w; + (void) h; + return 0; +} + +static int +Fake_glXQueryChannelRectSGIX(Display *dpy, int screen, int channel, int *x, int *y, int *w, int *h) +{ + (void) dpy; + (void) screen; + (void) channel; + (void) x; + (void) y; + (void) w; + (void) h; + return 0; +} + +static int +Fake_glXQueryChannelDeltasSGIX(Display *dpy, int screen, int channel, int *dx, int *dy, int *dw, int *dh) +{ + (void) dpy; + (void) screen; + (void) channel; + (void) dx; + (void) dy; + (void) dw; + (void) dh; + return 0; +} + +static int +Fake_glXChannelRectSyncSGIX(Display *dpy, int screen, int channel, GLenum synctype) +{ + (void) dpy; + (void) screen; + (void) channel; + (void) synctype; + return 0; +} + + + +/*** GLX_SGIX_dmbuffer **/ + +#if defined(_DM_BUFFER_H_) +static Bool +Fake_glXAssociateDMPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuffer, DMparams *params, DMbuffer dmbuffer) +{ + (void) dpy; + (void) pbuffer; + (void) params; + (void) dmbuffer; + return False; +} +#endif + + +/*** GLX_SGIX_swap_group ***/ + +static void +Fake_glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable, GLXDrawable member) +{ + (void) dpy; + (void) drawable; + (void) member; +} + + + +/*** GLX_SGIX_swap_barrier ***/ + +static void +Fake_glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable, int barrier) +{ + (void) dpy; + (void) drawable; + (void) barrier; +} + +static Bool +Fake_glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen, int *max) +{ + (void) dpy; + (void) screen; + (void) max; + return False; +} + + + +/*** GLX_SUN_get_transparent_index ***/ + +static Status +Fake_glXGetTransparentIndexSUN(Display *dpy, Window overlay, Window underlay, long *pTransparent) +{ + (void) dpy; + (void) overlay; + (void) underlay; + (void) pTransparent; + return 0; +} + + + +/*** GLX_MESA_release_buffers ***/ + +/* + * Release the depth, stencil, accum buffers attached to a GLXDrawable + * (a window or pixmap) prior to destroying the GLXDrawable. + */ +static Bool +Fake_glXReleaseBuffersMESA( Display *dpy, GLXDrawable d ) +{ + XMesaBuffer b = XMesaFindBuffer(dpy, d); + if (b) { + XMesaDestroyBuffer(b); + return True; + } + return False; +} + + + +/*** GLX_MESA_set_3dfx_mode ***/ + +static Bool +Fake_glXSet3DfxModeMESA( int mode ) +{ + return XMesaSetFXmode( mode ); +} + + + +/*** GLX_NV_vertex_array range ***/ +static void * +Fake_glXAllocateMemoryNV( GLsizei size, + GLfloat readFrequency, + GLfloat writeFrequency, + GLfloat priority ) +{ + (void) size; + (void) readFrequency; + (void) writeFrequency; + (void) priority; + return NULL; +} + + +static void +Fake_glXFreeMemoryNV( GLvoid *pointer ) +{ + (void) pointer; +} + + +/*** GLX_MESA_agp_offset ***/ + +static GLuint +Fake_glXGetAGPOffsetMESA( const GLvoid *pointer ) +{ + (void) pointer; + return ~0; +} + + +/*** GLX_EXT_texture_from_pixmap ***/ + +static void +Fake_glXBindTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer, + const int *attrib_list) +{ + XMesaBuffer b = XMesaFindBuffer(dpy, drawable); + if (b) + XMesaBindTexImage(dpy, b, buffer, attrib_list); +} + +static void +Fake_glXReleaseTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer) +{ + XMesaBuffer b = XMesaFindBuffer(dpy, drawable); + if (b) + XMesaReleaseTexImage(dpy, b, buffer); +} + + +/* silence warning */ +extern struct _glxapi_table *_mesa_GetGLXDispatchTable(void); + + +/** + * Create a new GLX API dispatch table with its function pointers + * initialized to point to Mesa's "fake" GLX API functions. + * Note: there's a similar function (_real_GetGLXDispatchTable) that + * returns a new dispatch table with all pointers initalized to point + * to "real" GLX functions (which understand GLX wire protocol, etc). + */ +struct _glxapi_table * +_mesa_GetGLXDispatchTable(void) +{ + static struct _glxapi_table glx; + + /* be sure our dispatch table size <= libGL's table */ + { + GLuint size = sizeof(struct _glxapi_table) / sizeof(void *); + (void) size; + assert(_glxapi_get_dispatch_table_size() >= size); + } + + /* initialize the whole table to no-ops */ + _glxapi_set_no_op_table(&glx); + + /* now initialize the table with the functions I implement */ + glx.ChooseVisual = Fake_glXChooseVisual; + glx.CopyContext = Fake_glXCopyContext; + glx.CreateContext = Fake_glXCreateContext; + glx.CreateGLXPixmap = Fake_glXCreateGLXPixmap; + glx.DestroyContext = Fake_glXDestroyContext; + glx.DestroyGLXPixmap = Fake_glXDestroyGLXPixmap; + glx.GetConfig = Fake_glXGetConfig; + /*glx.GetCurrentContext = Fake_glXGetCurrentContext;*/ + /*glx.GetCurrentDrawable = Fake_glXGetCurrentDrawable;*/ + glx.IsDirect = Fake_glXIsDirect; + glx.MakeCurrent = Fake_glXMakeCurrent; + glx.QueryExtension = Fake_glXQueryExtension; + glx.QueryVersion = Fake_glXQueryVersion; + glx.SwapBuffers = Fake_glXSwapBuffers; + glx.UseXFont = Fake_glXUseXFont; + glx.WaitGL = Fake_glXWaitGL; + glx.WaitX = Fake_glXWaitX; + + /*** GLX_VERSION_1_1 ***/ + glx.GetClientString = Fake_glXGetClientString; + glx.QueryExtensionsString = Fake_glXQueryExtensionsString; + glx.QueryServerString = Fake_glXQueryServerString; + + /*** GLX_VERSION_1_2 ***/ + /*glx.GetCurrentDisplay = Fake_glXGetCurrentDisplay;*/ + + /*** GLX_VERSION_1_3 ***/ + glx.ChooseFBConfig = Fake_glXChooseFBConfig; + glx.CreateNewContext = Fake_glXCreateNewContext; + glx.CreatePbuffer = Fake_glXCreatePbuffer; + glx.CreatePixmap = Fake_glXCreatePixmap; + glx.CreateWindow = Fake_glXCreateWindow; + glx.DestroyPbuffer = Fake_glXDestroyPbuffer; + glx.DestroyPixmap = Fake_glXDestroyPixmap; + glx.DestroyWindow = Fake_glXDestroyWindow; + /*glx.GetCurrentReadDrawable = Fake_glXGetCurrentReadDrawable;*/ + glx.GetFBConfigAttrib = Fake_glXGetFBConfigAttrib; + glx.GetFBConfigs = Fake_glXGetFBConfigs; + glx.GetSelectedEvent = Fake_glXGetSelectedEvent; + glx.GetVisualFromFBConfig = Fake_glXGetVisualFromFBConfig; + glx.MakeContextCurrent = Fake_glXMakeContextCurrent; + glx.QueryContext = Fake_glXQueryContext; + glx.QueryDrawable = Fake_glXQueryDrawable; + glx.SelectEvent = Fake_glXSelectEvent; + + /*** GLX_SGI_swap_control ***/ + glx.SwapIntervalSGI = Fake_glXSwapIntervalSGI; + + /*** GLX_SGI_video_sync ***/ + glx.GetVideoSyncSGI = Fake_glXGetVideoSyncSGI; + glx.WaitVideoSyncSGI = Fake_glXWaitVideoSyncSGI; + + /*** GLX_SGI_make_current_read ***/ + glx.MakeCurrentReadSGI = Fake_glXMakeCurrentReadSGI; + /*glx.GetCurrentReadDrawableSGI = Fake_glXGetCurrentReadDrawableSGI;*/ + +/*** GLX_SGIX_video_source ***/ +#if defined(_VL_H) + glx.CreateGLXVideoSourceSGIX = Fake_glXCreateGLXVideoSourceSGIX; + glx.DestroyGLXVideoSourceSGIX = Fake_glXDestroyGLXVideoSourceSGIX; +#endif + + /*** GLX_EXT_import_context ***/ + glx.FreeContextEXT = Fake_glXFreeContextEXT; + glx.GetContextIDEXT = Fake_glXGetContextIDEXT; + /*glx.GetCurrentDisplayEXT = Fake_glXGetCurrentDisplayEXT;*/ + glx.ImportContextEXT = Fake_glXImportContextEXT; + glx.QueryContextInfoEXT = Fake_glXQueryContextInfoEXT; + + /*** GLX_SGIX_fbconfig ***/ + glx.GetFBConfigAttribSGIX = Fake_glXGetFBConfigAttribSGIX; + glx.ChooseFBConfigSGIX = Fake_glXChooseFBConfigSGIX; + glx.CreateGLXPixmapWithConfigSGIX = Fake_glXCreateGLXPixmapWithConfigSGIX; + glx.CreateContextWithConfigSGIX = Fake_glXCreateContextWithConfigSGIX; + glx.GetVisualFromFBConfigSGIX = Fake_glXGetVisualFromFBConfigSGIX; + glx.GetFBConfigFromVisualSGIX = Fake_glXGetFBConfigFromVisualSGIX; + + /*** GLX_SGIX_pbuffer ***/ + glx.CreateGLXPbufferSGIX = Fake_glXCreateGLXPbufferSGIX; + glx.DestroyGLXPbufferSGIX = Fake_glXDestroyGLXPbufferSGIX; + glx.QueryGLXPbufferSGIX = Fake_glXQueryGLXPbufferSGIX; + glx.SelectEventSGIX = Fake_glXSelectEventSGIX; + glx.GetSelectedEventSGIX = Fake_glXGetSelectedEventSGIX; + + /*** GLX_SGI_cushion ***/ + glx.CushionSGI = Fake_glXCushionSGI; + + /*** GLX_SGIX_video_resize ***/ + glx.BindChannelToWindowSGIX = Fake_glXBindChannelToWindowSGIX; + glx.ChannelRectSGIX = Fake_glXChannelRectSGIX; + glx.QueryChannelRectSGIX = Fake_glXQueryChannelRectSGIX; + glx.QueryChannelDeltasSGIX = Fake_glXQueryChannelDeltasSGIX; + glx.ChannelRectSyncSGIX = Fake_glXChannelRectSyncSGIX; + + /*** GLX_SGIX_dmbuffer **/ +#if defined(_DM_BUFFER_H_) + glx.AssociateDMPbufferSGIX = NULL; +#endif + + /*** GLX_SGIX_swap_group ***/ + glx.JoinSwapGroupSGIX = Fake_glXJoinSwapGroupSGIX; + + /*** GLX_SGIX_swap_barrier ***/ + glx.BindSwapBarrierSGIX = Fake_glXBindSwapBarrierSGIX; + glx.QueryMaxSwapBarriersSGIX = Fake_glXQueryMaxSwapBarriersSGIX; + + /*** GLX_SUN_get_transparent_index ***/ + glx.GetTransparentIndexSUN = Fake_glXGetTransparentIndexSUN; + + /*** GLX_MESA_copy_sub_buffer ***/ + glx.CopySubBufferMESA = Fake_glXCopySubBufferMESA; + + /*** GLX_MESA_release_buffers ***/ + glx.ReleaseBuffersMESA = Fake_glXReleaseBuffersMESA; + + /*** GLX_MESA_pixmap_colormap ***/ + glx.CreateGLXPixmapMESA = Fake_glXCreateGLXPixmapMESA; + + /*** GLX_MESA_set_3dfx_mode ***/ + glx.Set3DfxModeMESA = Fake_glXSet3DfxModeMESA; + + /*** GLX_NV_vertex_array_range ***/ + glx.AllocateMemoryNV = Fake_glXAllocateMemoryNV; + glx.FreeMemoryNV = Fake_glXFreeMemoryNV; + + /*** GLX_MESA_agp_offset ***/ + glx.GetAGPOffsetMESA = Fake_glXGetAGPOffsetMESA; + + /*** GLX_EXT_texture_from_pixmap ***/ + glx.BindTexImageEXT = Fake_glXBindTexImageEXT; + glx.ReleaseTexImageEXT = Fake_glXReleaseTexImageEXT; + + return &glx; +} diff --git a/src/gallium/winsys/xlib/glxapi.c b/src/gallium/winsys/xlib/glxapi.c new file mode 100644 index 0000000000..c059fc3edb --- /dev/null +++ b/src/gallium/winsys/xlib/glxapi.c @@ -0,0 +1,1390 @@ +/* + * Mesa 3-D graphics library + * Version: 7.1 + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/* + * This is the GLX API dispatcher. Calls to the glX* functions are + * either routed to the real GLX encoders or to Mesa's pseudo-GLX functions. + * See the glxapi.h file for more details. + */ + + +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "main/glheader.h" +#include "glapi/glapi.h" +#include "glxapi.h" +#include "pipe/p_thread.h" + + +extern struct _glxapi_table *_real_GetGLXDispatchTable(void); +extern struct _glxapi_table *_mesa_GetGLXDispatchTable(void); + + +struct display_dispatch { + Display *Dpy; + struct _glxapi_table *Table; + struct display_dispatch *Next; +}; + +static struct display_dispatch *DispatchList = NULL; + + +/* Display -> Dispatch caching */ +static Display *prevDisplay = NULL; +static struct _glxapi_table *prevTable = NULL; + + +static struct _glxapi_table * +get_dispatch(Display *dpy) +{ + if (!dpy) + return NULL; + + /* search list of display/dispatch pairs for this display */ + { + const struct display_dispatch *d = DispatchList; + while (d) { + if (d->Dpy == dpy) { + prevDisplay = dpy; + prevTable = d->Table; + return d->Table; /* done! */ + } + d = d->Next; + } + } + + /* A new display, determine if we should use real GLX + * or Mesa's pseudo-GLX. + */ + { + struct _glxapi_table *t = _mesa_GetGLXDispatchTable(); + + if (t) { + struct display_dispatch *d; + d = (struct display_dispatch *) malloc(sizeof(struct display_dispatch)); + if (d) { + d->Dpy = dpy; + d->Table = t; + /* insert at head of list */ + d->Next = DispatchList; + DispatchList = d; + /* update cache */ + prevDisplay = dpy; + prevTable = t; + return t; + } + } + } + + /* If we get here that means we can't use real GLX on this display + * and the Mesa pseudo-GLX software renderer wasn't compiled in. + * Or, we ran out of memory! + */ + return NULL; +} + + +/* Don't use the GET_DISPATCH defined in glthread.h */ +#undef GET_DISPATCH + +#define GET_DISPATCH(DPY, TABLE) \ + if (DPY == prevDisplay) { \ + TABLE = prevTable; \ + } \ + else if (!DPY) { \ + TABLE = NULL; \ + } \ + else { \ + TABLE = get_dispatch(DPY); \ + } + + + + +/** + * GLX API current context. + */ +pipe_tsd ContextTSD; + + +static void +SetCurrentContext(GLXContext c) +{ + pipe_tsd_set(&ContextTSD, c); +} + + +/* + * GLX API entrypoints + */ + +/*** GLX_VERSION_1_0 ***/ + +XVisualInfo PUBLIC * +glXChooseVisual(Display *dpy, int screen, int *list) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return NULL; + return (t->ChooseVisual)(dpy, screen, list); +} + + +void PUBLIC +glXCopyContext(Display *dpy, GLXContext src, GLXContext dst, unsigned long mask) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->CopyContext)(dpy, src, dst, mask); +} + + +GLXContext PUBLIC +glXCreateContext(Display *dpy, XVisualInfo *visinfo, GLXContext shareList, Bool direct) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->CreateContext)(dpy, visinfo, shareList, direct); +} + + +GLXPixmap PUBLIC +glXCreateGLXPixmap(Display *dpy, XVisualInfo *visinfo, Pixmap pixmap) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->CreateGLXPixmap)(dpy, visinfo, pixmap); +} + + +void PUBLIC +glXDestroyContext(Display *dpy, GLXContext ctx) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + if (glXGetCurrentContext() == ctx) + SetCurrentContext(NULL); + (t->DestroyContext)(dpy, ctx); +} + + +void PUBLIC +glXDestroyGLXPixmap(Display *dpy, GLXPixmap pixmap) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->DestroyGLXPixmap)(dpy, pixmap); +} + + +int PUBLIC +glXGetConfig(Display *dpy, XVisualInfo *visinfo, int attrib, int *value) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return GLX_NO_EXTENSION; + return (t->GetConfig)(dpy, visinfo, attrib, value); +} + + +GLXContext PUBLIC +glXGetCurrentContext(void) +{ + return (GLXContext) pipe_tsd_get(&ContextTSD); +} + + +GLXDrawable PUBLIC +glXGetCurrentDrawable(void) +{ + __GLXcontext *gc = (__GLXcontext *) glXGetCurrentContext(); + return gc ? gc->currentDrawable : 0; +} + + +Bool PUBLIC +glXIsDirect(Display *dpy, GLXContext ctx) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return False; + return (t->IsDirect)(dpy, ctx); +} + + +Bool PUBLIC +glXMakeCurrent(Display *dpy, GLXDrawable drawable, GLXContext ctx) +{ + Bool b; + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) { + return False; + } + b = (*t->MakeCurrent)(dpy, drawable, ctx); + if (b) { + SetCurrentContext(ctx); + } + return b; +} + + +Bool PUBLIC +glXQueryExtension(Display *dpy, int *errorb, int *event) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return False; + return (t->QueryExtension)(dpy, errorb, event); +} + + +Bool PUBLIC +glXQueryVersion(Display *dpy, int *maj, int *min) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return False; + return (t->QueryVersion)(dpy, maj, min); +} + + +void PUBLIC +glXSwapBuffers(Display *dpy, GLXDrawable drawable) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->SwapBuffers)(dpy, drawable); +} + + +void PUBLIC +glXUseXFont(Font font, int first, int count, int listBase) +{ + struct _glxapi_table *t; + Display *dpy = glXGetCurrentDisplay(); + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->UseXFont)(font, first, count, listBase); +} + + +void PUBLIC +glXWaitGL(void) +{ + struct _glxapi_table *t; + Display *dpy = glXGetCurrentDisplay(); + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->WaitGL)(); +} + + +void PUBLIC +glXWaitX(void) +{ + struct _glxapi_table *t; + Display *dpy = glXGetCurrentDisplay(); + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->WaitX)(); +} + + + +/*** GLX_VERSION_1_1 ***/ + +const char PUBLIC * +glXGetClientString(Display *dpy, int name) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return NULL; + return (t->GetClientString)(dpy, name); +} + + +const char PUBLIC * +glXQueryExtensionsString(Display *dpy, int screen) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return NULL; + return (t->QueryExtensionsString)(dpy, screen); +} + + +const char PUBLIC * +glXQueryServerString(Display *dpy, int screen, int name) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return NULL; + return (t->QueryServerString)(dpy, screen, name); +} + + +/*** GLX_VERSION_1_2 ***/ + +Display PUBLIC * +glXGetCurrentDisplay(void) +{ + /* Same code as in libGL's glxext.c */ + __GLXcontext *gc = (__GLXcontext *) glXGetCurrentContext(); + if (NULL == gc) return NULL; + return gc->currentDpy; +} + + + +/*** GLX_VERSION_1_3 ***/ + +GLXFBConfig PUBLIC * +glXChooseFBConfig(Display *dpy, int screen, const int *attribList, int *nitems) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->ChooseFBConfig)(dpy, screen, attribList, nitems); +} + + +GLXContext PUBLIC +glXCreateNewContext(Display *dpy, GLXFBConfig config, int renderType, GLXContext shareList, Bool direct) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->CreateNewContext)(dpy, config, renderType, shareList, direct); +} + + +GLXPbuffer PUBLIC +glXCreatePbuffer(Display *dpy, GLXFBConfig config, const int *attribList) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->CreatePbuffer)(dpy, config, attribList); +} + + +GLXPixmap PUBLIC +glXCreatePixmap(Display *dpy, GLXFBConfig config, Pixmap pixmap, const int *attribList) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->CreatePixmap)(dpy, config, pixmap, attribList); +} + + +GLXWindow PUBLIC +glXCreateWindow(Display *dpy, GLXFBConfig config, Window win, const int *attribList) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->CreateWindow)(dpy, config, win, attribList); +} + + +void PUBLIC +glXDestroyPbuffer(Display *dpy, GLXPbuffer pbuf) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->DestroyPbuffer)(dpy, pbuf); +} + + +void PUBLIC +glXDestroyPixmap(Display *dpy, GLXPixmap pixmap) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->DestroyPixmap)(dpy, pixmap); +} + + +void PUBLIC +glXDestroyWindow(Display *dpy, GLXWindow window) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->DestroyWindow)(dpy, window); +} + + +GLXDrawable PUBLIC +glXGetCurrentReadDrawable(void) +{ + __GLXcontext *gc = (__GLXcontext *) glXGetCurrentContext(); + return gc ? gc->currentReadable : 0; +} + + +int PUBLIC +glXGetFBConfigAttrib(Display *dpy, GLXFBConfig config, int attribute, int *value) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return GLX_NO_EXTENSION; + return (t->GetFBConfigAttrib)(dpy, config, attribute, value); +} + + +GLXFBConfig PUBLIC * +glXGetFBConfigs(Display *dpy, int screen, int *nelements) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->GetFBConfigs)(dpy, screen, nelements); +} + +void PUBLIC +glXGetSelectedEvent(Display *dpy, GLXDrawable drawable, unsigned long *mask) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->GetSelectedEvent)(dpy, drawable, mask); +} + + +XVisualInfo PUBLIC * +glXGetVisualFromFBConfig(Display *dpy, GLXFBConfig config) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return NULL; + return (t->GetVisualFromFBConfig)(dpy, config); +} + + +Bool PUBLIC +glXMakeContextCurrent(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx) +{ + Bool b; + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return False; + b = (t->MakeContextCurrent)(dpy, draw, read, ctx); + if (b) { + SetCurrentContext(ctx); + } + return b; +} + + +int PUBLIC +glXQueryContext(Display *dpy, GLXContext ctx, int attribute, int *value) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + assert(t); + if (!t) + return 0; /* XXX correct? */ + return (t->QueryContext)(dpy, ctx, attribute, value); +} + + +void PUBLIC +glXQueryDrawable(Display *dpy, GLXDrawable draw, int attribute, unsigned int *value) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->QueryDrawable)(dpy, draw, attribute, value); +} + + +void PUBLIC +glXSelectEvent(Display *dpy, GLXDrawable drawable, unsigned long mask) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->SelectEvent)(dpy, drawable, mask); +} + + + +/*** GLX_SGI_swap_control ***/ + +int PUBLIC +glXSwapIntervalSGI(int interval) +{ + struct _glxapi_table *t; + Display *dpy = glXGetCurrentDisplay(); + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->SwapIntervalSGI)(interval); +} + + + +/*** GLX_SGI_video_sync ***/ + +int PUBLIC +glXGetVideoSyncSGI(unsigned int *count) +{ + struct _glxapi_table *t; + Display *dpy = glXGetCurrentDisplay(); + GET_DISPATCH(dpy, t); + if (!t || !glXGetCurrentContext()) + return GLX_BAD_CONTEXT; + return (t->GetVideoSyncSGI)(count); +} + +int PUBLIC +glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count) +{ + struct _glxapi_table *t; + Display *dpy = glXGetCurrentDisplay(); + GET_DISPATCH(dpy, t); + if (!t || !glXGetCurrentContext()) + return GLX_BAD_CONTEXT; + return (t->WaitVideoSyncSGI)(divisor, remainder, count); +} + + + +/*** GLX_SGI_make_current_read ***/ + +Bool PUBLIC +glXMakeCurrentReadSGI(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return False; + return (t->MakeCurrentReadSGI)(dpy, draw, read, ctx); +} + +GLXDrawable PUBLIC +glXGetCurrentReadDrawableSGI(void) +{ + return glXGetCurrentReadDrawable(); +} + + +#if defined(_VL_H) + +GLXVideoSourceSGIX PUBLIC +glXCreateGLXVideoSourceSGIX(Display *dpy, int screen, VLServer server, VLPath path, int nodeClass, VLNode drainNode) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->CreateGLXVideoSourceSGIX)(dpy, screen, server, path, nodeClass, drainNode); +} + +void PUBLIC +glXDestroyGLXVideoSourceSGIX(Display *dpy, GLXVideoSourceSGIX src) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->DestroyGLXVideoSourceSGIX)(dpy, src); +} + +#endif + + +/*** GLX_EXT_import_context ***/ + +void PUBLIC +glXFreeContextEXT(Display *dpy, GLXContext context) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->FreeContextEXT)(dpy, context); +} + +GLXContextID PUBLIC +glXGetContextIDEXT(const GLXContext context) +{ + return ((__GLXcontext *) context)->xid; +} + +Display PUBLIC * +glXGetCurrentDisplayEXT(void) +{ + return glXGetCurrentDisplay(); +} + +GLXContext PUBLIC +glXImportContextEXT(Display *dpy, GLXContextID contextID) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->ImportContextEXT)(dpy, contextID); +} + +int PUBLIC +glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute,int *value) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; /* XXX ok? */ + return (t->QueryContextInfoEXT)(dpy, context, attribute, value); +} + + + +/*** GLX_SGIX_fbconfig ***/ + +int PUBLIC +glXGetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config, int attribute, int *value) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->GetFBConfigAttribSGIX)(dpy, config, attribute, value); +} + +GLXFBConfigSGIX PUBLIC * +glXChooseFBConfigSGIX(Display *dpy, int screen, int *attrib_list, int *nelements) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->ChooseFBConfigSGIX)(dpy, screen, attrib_list, nelements); +} + +GLXPixmap PUBLIC +glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, Pixmap pixmap) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->CreateGLXPixmapWithConfigSGIX)(dpy, config, pixmap); +} + +GLXContext PUBLIC +glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int render_type, GLXContext share_list, Bool direct) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->CreateContextWithConfigSGIX)(dpy, config, render_type, share_list, direct); +} + +XVisualInfo PUBLIC * +glXGetVisualFromFBConfigSGIX(Display *dpy, GLXFBConfigSGIX config) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->GetVisualFromFBConfigSGIX)(dpy, config); +} + +GLXFBConfigSGIX PUBLIC +glXGetFBConfigFromVisualSGIX(Display *dpy, XVisualInfo *vis) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->GetFBConfigFromVisualSGIX)(dpy, vis); +} + + + +/*** GLX_SGIX_pbuffer ***/ + +GLXPbufferSGIX PUBLIC +glXCreateGLXPbufferSGIX(Display *dpy, GLXFBConfigSGIX config, unsigned int width, unsigned int height, int *attrib_list) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->CreateGLXPbufferSGIX)(dpy, config, width, height, attrib_list); +} + +void PUBLIC +glXDestroyGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->DestroyGLXPbufferSGIX)(dpy, pbuf); +} + +int PUBLIC +glXQueryGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf, int attribute, unsigned int *value) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->QueryGLXPbufferSGIX)(dpy, pbuf, attribute, value); +} + +void PUBLIC +glXSelectEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long mask) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->SelectEventSGIX)(dpy, drawable, mask); +} + +void PUBLIC +glXGetSelectedEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long *mask) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->GetSelectedEventSGIX)(dpy, drawable, mask); +} + + + +/*** GLX_SGI_cushion ***/ + +void PUBLIC +glXCushionSGI(Display *dpy, Window win, float cushion) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->CushionSGI)(dpy, win, cushion); +} + + + +/*** GLX_SGIX_video_resize ***/ + +int PUBLIC +glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel , Window window) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->BindChannelToWindowSGIX)(dpy, screen, channel, window); +} + +int PUBLIC +glXChannelRectSGIX(Display *dpy, int screen, int channel, int x, int y, int w, int h) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->ChannelRectSGIX)(dpy, screen, channel, x, y, w, h); +} + +int PUBLIC +glXQueryChannelRectSGIX(Display *dpy, int screen, int channel, int *x, int *y, int *w, int *h) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->QueryChannelRectSGIX)(dpy, screen, channel, x, y, w, h); +} + +int PUBLIC +glXQueryChannelDeltasSGIX(Display *dpy, int screen, int channel, int *dx, int *dy, int *dw, int *dh) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->QueryChannelDeltasSGIX)(dpy, screen, channel, dx, dy, dw, dh); +} + +int PUBLIC +glXChannelRectSyncSGIX(Display *dpy, int screen, int channel, GLenum synctype) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->ChannelRectSyncSGIX)(dpy, screen, channel, synctype); +} + + + +#if defined(_DM_BUFFER_H_) + +Bool PUBLIC +glXAssociateDMPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuffer, DMparams *params, DMbuffer dmbuffer) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return False; + return (t->AssociateDMPbufferSGIX)(dpy, pbuffer, params, dmbuffer); +} + +#endif + + +/*** GLX_SGIX_swap_group ***/ + +void PUBLIC +glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable, GLXDrawable member) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (*t->JoinSwapGroupSGIX)(dpy, drawable, member); +} + + +/*** GLX_SGIX_swap_barrier ***/ + +void PUBLIC +glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable, int barrier) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (*t->BindSwapBarrierSGIX)(dpy, drawable, barrier); +} + +Bool PUBLIC +glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen, int *max) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return False; + return (*t->QueryMaxSwapBarriersSGIX)(dpy, screen, max); +} + + + +/*** GLX_SUN_get_transparent_index ***/ + +Status PUBLIC +glXGetTransparentIndexSUN(Display *dpy, Window overlay, Window underlay, long *pTransparent) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return False; + return (*t->GetTransparentIndexSUN)(dpy, overlay, underlay, pTransparent); +} + + + +/*** GLX_MESA_copy_sub_buffer ***/ + +void PUBLIC +glXCopySubBufferMESA(Display *dpy, GLXDrawable drawable, int x, int y, int width, int height) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->CopySubBufferMESA)(dpy, drawable, x, y, width, height); +} + + + +/*** GLX_MESA_release_buffers ***/ + +Bool PUBLIC +glXReleaseBuffersMESA(Display *dpy, Window w) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return False; + return (t->ReleaseBuffersMESA)(dpy, w); +} + + + +/*** GLX_MESA_pixmap_colormap ***/ + +GLXPixmap PUBLIC +glXCreateGLXPixmapMESA(Display *dpy, XVisualInfo *visinfo, Pixmap pixmap, Colormap cmap) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (!t) + return 0; + return (t->CreateGLXPixmapMESA)(dpy, visinfo, pixmap, cmap); +} + + + +/*** GLX_MESA_set_3dfx_mode ***/ + +Bool PUBLIC +glXSet3DfxModeMESA(int mode) +{ + struct _glxapi_table *t; + Display *dpy = glXGetCurrentDisplay(); + GET_DISPATCH(dpy, t); + if (!t) + return False; + return (t->Set3DfxModeMESA)(mode); +} + + + +/*** GLX_NV_vertex_array_range ***/ + +void PUBLIC * +glXAllocateMemoryNV( GLsizei size, + GLfloat readFrequency, + GLfloat writeFrequency, + GLfloat priority ) +{ + struct _glxapi_table *t; + Display *dpy = glXGetCurrentDisplay(); + GET_DISPATCH(dpy, t); + if (!t) + return NULL; + return (t->AllocateMemoryNV)(size, readFrequency, writeFrequency, priority); +} + + +void PUBLIC +glXFreeMemoryNV( GLvoid *pointer ) +{ + struct _glxapi_table *t; + Display *dpy = glXGetCurrentDisplay(); + GET_DISPATCH(dpy, t); + if (!t) + return; + (t->FreeMemoryNV)(pointer); +} + + + + +/*** GLX_MESA_agp_offset */ + +GLuint PUBLIC +glXGetAGPOffsetMESA( const GLvoid *pointer ) +{ + struct _glxapi_table *t; + Display *dpy = glXGetCurrentDisplay(); + GET_DISPATCH(dpy, t); + if (!t) + return ~0; + return (t->GetAGPOffsetMESA)(pointer); +} + + +/*** GLX_MESA_allocate_memory */ + +void * +glXAllocateMemoryMESA(Display *dpy, int scrn, size_t size, + float readfreq, float writefreq, float priority) +{ + /* dummy */ + return NULL; +} + +void +glXFreeMemoryMESA(Display *dpy, int scrn, void *pointer) +{ + /* dummy */ +} + + +GLuint +glXGetMemoryOffsetMESA(Display *dpy, int scrn, const void *pointer) +{ + /* dummy */ + return 0; +} + + +/*** GLX_EXT_texture_from_pixmap */ + +void +glXBindTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer, + const int *attrib_list) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (t) + t->BindTexImageEXT(dpy, drawable, buffer, attrib_list); +} + +void +glXReleaseTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer) +{ + struct _glxapi_table *t; + GET_DISPATCH(dpy, t); + if (t) + t->ReleaseTexImageEXT(dpy, drawable, buffer); +} + + +/**********************************************************************/ +/* GLX API management functions */ +/**********************************************************************/ + + +const char * +_glxapi_get_version(void) +{ + return "1.3"; +} + + +/* + * Return array of extension strings. + */ +const char ** +_glxapi_get_extensions(void) +{ + static const char *extensions[] = { +#ifdef GLX_EXT_import_context + "GLX_EXT_import_context", +#endif +#ifdef GLX_SGI_video_sync + "GLX_SGI_video_sync", +#endif +#ifdef GLX_MESA_copy_sub_buffer + "GLX_MESA_copy_sub_buffer", +#endif +#ifdef GLX_MESA_release_buffers + "GLX_MESA_release_buffers", +#endif +#ifdef GLX_MESA_pixmap_colormap + "GLX_MESA_pixmap_colormap", +#endif +#ifdef GLX_MESA_set_3dfx_mode + "GLX_MESA_set_3dfx_mode", +#endif +#ifdef GLX_SGIX_fbconfig + "GLX_SGIX_fbconfig", +#endif +#ifdef GLX_SGIX_pbuffer + "GLX_SGIX_pbuffer", +#endif +#ifdef GLX_EXT_texture_from_pixmap + "GLX_EXT_texture_from_pixmap", +#endif + NULL + }; + return extensions; +} + + +/* + * Return size of the GLX dispatch table, in entries, not bytes. + */ +GLuint +_glxapi_get_dispatch_table_size(void) +{ + return sizeof(struct _glxapi_table) / sizeof(void *); +} + + +static int +generic_no_op_func(void) +{ + return 0; +} + + +/* + * Initialize all functions in given dispatch table to be no-ops + */ +void +_glxapi_set_no_op_table(struct _glxapi_table *t) +{ + typedef int (*nop_func)(void); + nop_func *dispatch = (nop_func *) t; + GLuint n = _glxapi_get_dispatch_table_size(); + GLuint i; + for (i = 0; i < n; i++) { + dispatch[i] = generic_no_op_func; + } +} + + +struct name_address_pair { + const char *Name; + __GLXextFuncPtr Address; +}; + +static struct name_address_pair GLX_functions[] = { + /*** GLX_VERSION_1_0 ***/ + { "glXChooseVisual", (__GLXextFuncPtr) glXChooseVisual }, + { "glXCopyContext", (__GLXextFuncPtr) glXCopyContext }, + { "glXCreateContext", (__GLXextFuncPtr) glXCreateContext }, + { "glXCreateGLXPixmap", (__GLXextFuncPtr) glXCreateGLXPixmap }, + { "glXDestroyContext", (__GLXextFuncPtr) glXDestroyContext }, + { "glXDestroyGLXPixmap", (__GLXextFuncPtr) glXDestroyGLXPixmap }, + { "glXGetConfig", (__GLXextFuncPtr) glXGetConfig }, + { "glXGetCurrentContext", (__GLXextFuncPtr) glXGetCurrentContext }, + { "glXGetCurrentDrawable", (__GLXextFuncPtr) glXGetCurrentDrawable }, + { "glXIsDirect", (__GLXextFuncPtr) glXIsDirect }, + { "glXMakeCurrent", (__GLXextFuncPtr) glXMakeCurrent }, + { "glXQueryExtension", (__GLXextFuncPtr) glXQueryExtension }, + { "glXQueryVersion", (__GLXextFuncPtr) glXQueryVersion }, + { "glXSwapBuffers", (__GLXextFuncPtr) glXSwapBuffers }, + { "glXUseXFont", (__GLXextFuncPtr) glXUseXFont }, + { "glXWaitGL", (__GLXextFuncPtr) glXWaitGL }, + { "glXWaitX", (__GLXextFuncPtr) glXWaitX }, + + /*** GLX_VERSION_1_1 ***/ + { "glXGetClientString", (__GLXextFuncPtr) glXGetClientString }, + { "glXQueryExtensionsString", (__GLXextFuncPtr) glXQueryExtensionsString }, + { "glXQueryServerString", (__GLXextFuncPtr) glXQueryServerString }, + + /*** GLX_VERSION_1_2 ***/ + { "glXGetCurrentDisplay", (__GLXextFuncPtr) glXGetCurrentDisplay }, + + /*** GLX_VERSION_1_3 ***/ + { "glXChooseFBConfig", (__GLXextFuncPtr) glXChooseFBConfig }, + { "glXCreateNewContext", (__GLXextFuncPtr) glXCreateNewContext }, + { "glXCreatePbuffer", (__GLXextFuncPtr) glXCreatePbuffer }, + { "glXCreatePixmap", (__GLXextFuncPtr) glXCreatePixmap }, + { "glXCreateWindow", (__GLXextFuncPtr) glXCreateWindow }, + { "glXDestroyPbuffer", (__GLXextFuncPtr) glXDestroyPbuffer }, + { "glXDestroyPixmap", (__GLXextFuncPtr) glXDestroyPixmap }, + { "glXDestroyWindow", (__GLXextFuncPtr) glXDestroyWindow }, + { "glXGetCurrentReadDrawable", (__GLXextFuncPtr) glXGetCurrentReadDrawable }, + { "glXGetFBConfigAttrib", (__GLXextFuncPtr) glXGetFBConfigAttrib }, + { "glXGetFBConfigs", (__GLXextFuncPtr) glXGetFBConfigs }, + { "glXGetSelectedEvent", (__GLXextFuncPtr) glXGetSelectedEvent }, + { "glXGetVisualFromFBConfig", (__GLXextFuncPtr) glXGetVisualFromFBConfig }, + { "glXMakeContextCurrent", (__GLXextFuncPtr) glXMakeContextCurrent }, + { "glXQueryContext", (__GLXextFuncPtr) glXQueryContext }, + { "glXQueryDrawable", (__GLXextFuncPtr) glXQueryDrawable }, + { "glXSelectEvent", (__GLXextFuncPtr) glXSelectEvent }, + + /*** GLX_VERSION_1_4 ***/ + { "glXGetProcAddress", (__GLXextFuncPtr) glXGetProcAddress }, + + /*** GLX_SGI_swap_control ***/ + { "glXSwapIntervalSGI", (__GLXextFuncPtr) glXSwapIntervalSGI }, + + /*** GLX_SGI_video_sync ***/ + { "glXGetVideoSyncSGI", (__GLXextFuncPtr) glXGetVideoSyncSGI }, + { "glXWaitVideoSyncSGI", (__GLXextFuncPtr) glXWaitVideoSyncSGI }, + + /*** GLX_SGI_make_current_read ***/ + { "glXMakeCurrentReadSGI", (__GLXextFuncPtr) glXMakeCurrentReadSGI }, + { "glXGetCurrentReadDrawableSGI", (__GLXextFuncPtr) glXGetCurrentReadDrawableSGI }, + + /*** GLX_SGIX_video_source ***/ +#if defined(_VL_H) + { "glXCreateGLXVideoSourceSGIX", (__GLXextFuncPtr) glXCreateGLXVideoSourceSGIX }, + { "glXDestroyGLXVideoSourceSGIX", (__GLXextFuncPtr) glXDestroyGLXVideoSourceSGIX }, +#endif + + /*** GLX_EXT_import_context ***/ + { "glXFreeContextEXT", (__GLXextFuncPtr) glXFreeContextEXT }, + { "glXGetContextIDEXT", (__GLXextFuncPtr) glXGetContextIDEXT }, + { "glXGetCurrentDisplayEXT", (__GLXextFuncPtr) glXGetCurrentDisplayEXT }, + { "glXImportContextEXT", (__GLXextFuncPtr) glXImportContextEXT }, + { "glXQueryContextInfoEXT", (__GLXextFuncPtr) glXQueryContextInfoEXT }, + + /*** GLX_SGIX_fbconfig ***/ + { "glXGetFBConfigAttribSGIX", (__GLXextFuncPtr) glXGetFBConfigAttribSGIX }, + { "glXChooseFBConfigSGIX", (__GLXextFuncPtr) glXChooseFBConfigSGIX }, + { "glXCreateGLXPixmapWithConfigSGIX", (__GLXextFuncPtr) glXCreateGLXPixmapWithConfigSGIX }, + { "glXCreateContextWithConfigSGIX", (__GLXextFuncPtr) glXCreateContextWithConfigSGIX }, + { "glXGetVisualFromFBConfigSGIX", (__GLXextFuncPtr) glXGetVisualFromFBConfigSGIX }, + { "glXGetFBConfigFromVisualSGIX", (__GLXextFuncPtr) glXGetFBConfigFromVisualSGIX }, + + /*** GLX_SGIX_pbuffer ***/ + { "glXCreateGLXPbufferSGIX", (__GLXextFuncPtr) glXCreateGLXPbufferSGIX }, + { "glXDestroyGLXPbufferSGIX", (__GLXextFuncPtr) glXDestroyGLXPbufferSGIX }, + { "glXQueryGLXPbufferSGIX", (__GLXextFuncPtr) glXQueryGLXPbufferSGIX }, + { "glXSelectEventSGIX", (__GLXextFuncPtr) glXSelectEventSGIX }, + { "glXGetSelectedEventSGIX", (__GLXextFuncPtr) glXGetSelectedEventSGIX }, + + /*** GLX_SGI_cushion ***/ + { "glXCushionSGI", (__GLXextFuncPtr) glXCushionSGI }, + + /*** GLX_SGIX_video_resize ***/ + { "glXBindChannelToWindowSGIX", (__GLXextFuncPtr) glXBindChannelToWindowSGIX }, + { "glXChannelRectSGIX", (__GLXextFuncPtr) glXChannelRectSGIX }, + { "glXQueryChannelRectSGIX", (__GLXextFuncPtr) glXQueryChannelRectSGIX }, + { "glXQueryChannelDeltasSGIX", (__GLXextFuncPtr) glXQueryChannelDeltasSGIX }, + { "glXChannelRectSyncSGIX", (__GLXextFuncPtr) glXChannelRectSyncSGIX }, + + /*** GLX_SGIX_dmbuffer **/ +#if defined(_DM_BUFFER_H_) + { "glXAssociateDMPbufferSGIX", (__GLXextFuncPtr) glXAssociateDMPbufferSGIX }, +#endif + + /*** GLX_SGIX_swap_group ***/ + { "glXJoinSwapGroupSGIX", (__GLXextFuncPtr) glXJoinSwapGroupSGIX }, + + /*** GLX_SGIX_swap_barrier ***/ + { "glXBindSwapBarrierSGIX", (__GLXextFuncPtr) glXBindSwapBarrierSGIX }, + { "glXQueryMaxSwapBarriersSGIX", (__GLXextFuncPtr) glXQueryMaxSwapBarriersSGIX }, + + /*** GLX_SUN_get_transparent_index ***/ + { "glXGetTransparentIndexSUN", (__GLXextFuncPtr) glXGetTransparentIndexSUN }, + + /*** GLX_MESA_copy_sub_buffer ***/ + { "glXCopySubBufferMESA", (__GLXextFuncPtr) glXCopySubBufferMESA }, + + /*** GLX_MESA_pixmap_colormap ***/ + { "glXCreateGLXPixmapMESA", (__GLXextFuncPtr) glXCreateGLXPixmapMESA }, + + /*** GLX_MESA_release_buffers ***/ + { "glXReleaseBuffersMESA", (__GLXextFuncPtr) glXReleaseBuffersMESA }, + + /*** GLX_MESA_set_3dfx_mode ***/ + { "glXSet3DfxModeMESA", (__GLXextFuncPtr) glXSet3DfxModeMESA }, + + /*** GLX_ARB_get_proc_address ***/ + { "glXGetProcAddressARB", (__GLXextFuncPtr) glXGetProcAddressARB }, + + /*** GLX_NV_vertex_array_range ***/ + { "glXAllocateMemoryNV", (__GLXextFuncPtr) glXAllocateMemoryNV }, + { "glXFreeMemoryNV", (__GLXextFuncPtr) glXFreeMemoryNV }, + + /*** GLX_MESA_agp_offset ***/ + { "glXGetAGPOffsetMESA", (__GLXextFuncPtr) glXGetAGPOffsetMESA }, + + /*** GLX_MESA_allocate_memory ***/ + { "glXAllocateMemoryMESA", (__GLXextFuncPtr) glXAllocateMemoryMESA }, + { "glXFreeMemoryMESA", (__GLXextFuncPtr) glXFreeMemoryMESA }, + { "glXGetMemoryOffsetMESA", (__GLXextFuncPtr) glXGetMemoryOffsetMESA }, + + /*** GLX_EXT_texture_from_pixmap ***/ + { "glXBindTexImageEXT", (__GLXextFuncPtr) glXBindTexImageEXT }, + { "glXReleaseTexImageEXT", (__GLXextFuncPtr) glXReleaseTexImageEXT }, + + { NULL, NULL } /* end of list */ +}; + + + +/* + * Return address of named glX function, or NULL if not found. + */ +__GLXextFuncPtr +_glxapi_get_proc_address(const char *funcName) +{ + GLuint i; + for (i = 0; GLX_functions[i].Name; i++) { + if (strcmp(GLX_functions[i].Name, funcName) == 0) + return GLX_functions[i].Address; + } + return NULL; +} + + + +/* + * This function does not get dispatched through the dispatch table + * since it's really a "meta" function. + */ +__GLXextFuncPtr +glXGetProcAddressARB(const GLubyte *procName) +{ + __GLXextFuncPtr f; + + f = _glxapi_get_proc_address((const char *) procName); + if (f) { + return f; + } + + f = (__GLXextFuncPtr) _glapi_get_proc_address((const char *) procName); + return f; +} + + +/* GLX 1.4 */ +void (*glXGetProcAddress(const GLubyte *procName))() +{ + return glXGetProcAddressARB(procName); +} diff --git a/src/gallium/winsys/xlib/glxapi.h b/src/gallium/winsys/xlib/glxapi.h new file mode 100644 index 0000000000..37de81e55a --- /dev/null +++ b/src/gallium/winsys/xlib/glxapi.h @@ -0,0 +1,228 @@ +/* + * Mesa 3-D graphics library + * Version: 6.3 + * + * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef _glxapi_h_ +#define _glxapi_h_ + + +#define GLX_GLXEXT_PROTOTYPES +#include "GL/glx.h" + + +/* The GLX API dispatcher (i.e. this code) is being built into stand-alone + * Mesa. We don't know anything about XFree86 or real GLX so we define a + * minimal __GLXContextRec here so some of the functions in this file can + * work properly. + */ +typedef struct __GLXcontextRec { + Display *currentDpy; + GLboolean isDirect; + GLXDrawable currentDrawable; + GLXDrawable currentReadable; + XID xid; +} __GLXcontext; + + +/* + * Almost all the GLX API functions get routed through this dispatch table. + * The exceptions are the glXGetCurrentXXX() functions. + * + * This dispatch table allows multiple GLX client-side modules to coexist. + * Specifically, a real GLX library (like SGI's or the Utah GLX) and Mesa's + * pseudo-GLX can be present at the same time. The former being used on + * GLX-enabled X servers and the later on non-GLX X servers. + * + * Red Hat has been using this since Red Hat Linux 7.0 (I think). + * This'll be a standard feature in XFree86 4.3. It basically allows one + * libGL to do both DRI-rendering and "fake GLX" rendering to X displays + * that lack the GLX extension. + */ +struct _glxapi_table { + /*** GLX_VERSION_1_0 ***/ + XVisualInfo *(*ChooseVisual)(Display *dpy, int screen, int *list); + void (*CopyContext)(Display *dpy, GLXContext src, GLXContext dst, unsigned long mask); + GLXContext (*CreateContext)(Display *dpy, XVisualInfo *visinfo, GLXContext shareList, Bool direct); + GLXPixmap (*CreateGLXPixmap)(Display *dpy, XVisualInfo *visinfo, Pixmap pixmap); + void (*DestroyContext)(Display *dpy, GLXContext ctx); + void (*DestroyGLXPixmap)(Display *dpy, GLXPixmap pixmap); + int (*GetConfig)(Display *dpy, XVisualInfo *visinfo, int attrib, int *value); + /*GLXContext (*GetCurrentContext)(void);*/ + /*GLXDrawable (*GetCurrentDrawable)(void);*/ + Bool (*IsDirect)(Display *dpy, GLXContext ctx); + Bool (*MakeCurrent)(Display *dpy, GLXDrawable drawable, GLXContext ctx); + Bool (*QueryExtension)(Display *dpy, int *errorb, int *event); + Bool (*QueryVersion)(Display *dpy, int *maj, int *min); + void (*SwapBuffers)(Display *dpy, GLXDrawable drawable); + void (*UseXFont)(Font font, int first, int count, int listBase); + void (*WaitGL)(void); + void (*WaitX)(void); + + /*** GLX_VERSION_1_1 ***/ + const char *(*GetClientString)(Display *dpy, int name); + const char *(*QueryExtensionsString)(Display *dpy, int screen); + const char *(*QueryServerString)(Display *dpy, int screen, int name); + + /*** GLX_VERSION_1_2 ***/ + /*Display *(*GetCurrentDisplay)(void);*/ + + /*** GLX_VERSION_1_3 ***/ + GLXFBConfig *(*ChooseFBConfig)(Display *dpy, int screen, const int *attribList, int *nitems); + GLXContext (*CreateNewContext)(Display *dpy, GLXFBConfig config, int renderType, GLXContext shareList, Bool direct); + GLXPbuffer (*CreatePbuffer)(Display *dpy, GLXFBConfig config, const int *attribList); + GLXPixmap (*CreatePixmap)(Display *dpy, GLXFBConfig config, Pixmap pixmap, const int *attribList); + GLXWindow (*CreateWindow)(Display *dpy, GLXFBConfig config, Window win, const int *attribList); + void (*DestroyPbuffer)(Display *dpy, GLXPbuffer pbuf); + void (*DestroyPixmap)(Display *dpy, GLXPixmap pixmap); + void (*DestroyWindow)(Display *dpy, GLXWindow window); + /*GLXDrawable (*GetCurrentReadDrawable)(void);*/ + int (*GetFBConfigAttrib)(Display *dpy, GLXFBConfig config, int attribute, int *value); + GLXFBConfig *(*GetFBConfigs)(Display *dpy, int screen, int *nelements); + void (*GetSelectedEvent)(Display *dpy, GLXDrawable drawable, unsigned long *mask); + XVisualInfo *(*GetVisualFromFBConfig)(Display *dpy, GLXFBConfig config); + Bool (*MakeContextCurrent)(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx); + int (*QueryContext)(Display *dpy, GLXContext ctx, int attribute, int *value); + void (*QueryDrawable)(Display *dpy, GLXDrawable draw, int attribute, unsigned int *value); + void (*SelectEvent)(Display *dpy, GLXDrawable drawable, unsigned long mask); + + /*** GLX_SGI_swap_control ***/ + int (*SwapIntervalSGI)(int); + + /*** GLX_SGI_video_sync ***/ + int (*GetVideoSyncSGI)(unsigned int *count); + int (*WaitVideoSyncSGI)(int divisor, int remainder, unsigned int *count); + + /*** GLX_SGI_make_current_read ***/ + Bool (*MakeCurrentReadSGI)(Display *, GLXDrawable, GLXDrawable, GLXContext); + /*GLXDrawable (*GetCurrentReadDrawableSGI)(void);*/ + + /*** GLX_SGIX_video_source (needs video library) ***/ +#if defined(_VL_H_) + GLXVideoSourceSGIX (*CreateGLXVideoSourceSGIX)(Display *, int, VLServer, VLPath, int, VLNode); + void (*DestroyGLXVideoSourceSGIX)(Display *, GLXVideoSourceSGIX); +#else + void *CreateGLXVideoSourceSGIX; + void *DestroyGLXVideoSourceSGIX; +#endif + + /*** GLX_EXT_import_context ***/ + void (*FreeContextEXT)(Display *dpy, GLXContext context); + GLXContextID (*GetContextIDEXT)(const GLXContext context); + /*Display *(*GetCurrentDisplayEXT)(void);*/ + GLXContext (*ImportContextEXT)(Display *dpy, GLXContextID contextID); + int (*QueryContextInfoEXT)(Display *dpy, GLXContext context, int attribute,int *value); + + /*** GLX_SGIX_fbconfig ***/ + int (*GetFBConfigAttribSGIX)(Display *, GLXFBConfigSGIX, int, int *); + GLXFBConfigSGIX * (*ChooseFBConfigSGIX)(Display *, int, int *, int *); + GLXPixmap (*CreateGLXPixmapWithConfigSGIX)(Display *, GLXFBConfigSGIX, Pixmap); + GLXContext (*CreateContextWithConfigSGIX)(Display *, GLXFBConfigSGIX, int, GLXContext, Bool); + XVisualInfo * (*GetVisualFromFBConfigSGIX)(Display *, GLXFBConfigSGIX); + GLXFBConfigSGIX (*GetFBConfigFromVisualSGIX)(Display *, XVisualInfo *); + + /*** GLX_SGIX_pbuffer ***/ + GLXPbufferSGIX (*CreateGLXPbufferSGIX)(Display *, GLXFBConfigSGIX, unsigned int, unsigned int, int *); + void (*DestroyGLXPbufferSGIX)(Display *, GLXPbufferSGIX); + int (*QueryGLXPbufferSGIX)(Display *, GLXPbufferSGIX, int, unsigned int *); + void (*SelectEventSGIX)(Display *, GLXDrawable, unsigned long); + void (*GetSelectedEventSGIX)(Display *, GLXDrawable, unsigned long *); + + /*** GLX_SGI_cushion ***/ + void (*CushionSGI)(Display *, Window, float); + + /*** GLX_SGIX_video_resize ***/ + int (*BindChannelToWindowSGIX)(Display *, int, int, Window); + int (*ChannelRectSGIX)(Display *, int, int, int, int, int, int); + int (*QueryChannelRectSGIX)(Display *, int, int, int *, int *, int *, int *); + int (*QueryChannelDeltasSGIX)(Display *, int, int, int *, int *, int *, int *); + int (*ChannelRectSyncSGIX)(Display *, int, int, GLenum); + + /*** GLX_SGIX_dmbuffer (needs dmedia library) ***/ +#if defined (_DM_BUFFER_H_) + Bool (*AssociateDMPbufferSGIX)(Display *, GLXPbufferSGIX, DMparams *, DMbuffer); +#else + void *AssociciateDMPbufferSGIX; +#endif + + /*** GLX_SGIX_swap_group ***/ + void (*JoinSwapGroupSGIX)(Display *, GLXDrawable, GLXDrawable); + + /*** GLX_SGIX_swap_barrier ***/ + void (*BindSwapBarrierSGIX)(Display *, GLXDrawable, int); + Bool (*QueryMaxSwapBarriersSGIX)(Display *, int, int *); + + /*** GLX_SUN_get_transparent_index ***/ + Status (*GetTransparentIndexSUN)(Display *, Window, Window, long *); + + /*** GLX_MESA_copy_sub_buffer ***/ + void (*CopySubBufferMESA)(Display *dpy, GLXDrawable drawable, int x, int y, int width, int height); + + /*** GLX_MESA_release_buffers ***/ + Bool (*ReleaseBuffersMESA)(Display *dpy, Window w); + + /*** GLX_MESA_pixmap_colormap ***/ + GLXPixmap (*CreateGLXPixmapMESA)(Display *dpy, XVisualInfo *visinfo, Pixmap pixmap, Colormap cmap); + + /*** GLX_MESA_set_3dfx_mode ***/ + Bool (*Set3DfxModeMESA)(int mode); + + /*** GLX_NV_vertex_array_range ***/ + void * (*AllocateMemoryNV)( GLsizei size, + GLfloat readFrequency, + GLfloat writeFrequency, + GLfloat priority ); + void (*FreeMemoryNV)( GLvoid *pointer ); + + /*** GLX_MESA_agp_offset ***/ + GLuint (*GetAGPOffsetMESA)( const GLvoid *pointer ); + + /*** GLX_EXT_texture_from_pixmap ***/ + void (*BindTexImageEXT)(Display *dpy, GLXDrawable drawable, int buffer, + const int *attrib_list); + void (*ReleaseTexImageEXT)(Display *dpy, GLXDrawable drawable, int buffer); +}; + + + +extern const char * +_glxapi_get_version(void); + + +extern const char ** +_glxapi_get_extensions(void); + + +extern GLuint +_glxapi_get_dispatch_table_size(void); + + +extern void +_glxapi_set_no_op_table(struct _glxapi_table *t); + + +extern __GLXextFuncPtr +_glxapi_get_proc_address(const char *funcName); + + +#endif diff --git a/src/gallium/winsys/xlib/glxheader.h b/src/gallium/winsys/xlib/glxheader.h new file mode 100644 index 0000000000..a402191f13 --- /dev/null +++ b/src/gallium/winsys/xlib/glxheader.h @@ -0,0 +1,62 @@ +/* + * Mesa 3-D graphics library + * Version: 6.5.1 + * + * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef GLX_HEADER_H +#define GLX_HEADER_H + +#ifdef __VMS +#include <GL/vms_x_fix.h> +#endif + +#include "glheader.h" + +#ifdef XFree86Server + +# include "resource.h" +# include "windowstr.h" + +#else + +# include <X11/Xlib.h> +# include <X11/Xlibint.h> +# include <X11/Xutil.h> +# ifdef USE_XSHM /* was SHM */ +# include <sys/ipc.h> +# include <sys/shm.h> +# include <X11/extensions/XShm.h> +# endif +# include <GL/glx.h> +# include <sys/time.h> + +#endif + + + +/* this silences a compiler warning on several systems */ +struct timespec; +struct itimerspec; + + +#endif /*GLX_HEADER*/ diff --git a/src/gallium/winsys/xlib/realglx.c b/src/gallium/winsys/xlib/realglx.c new file mode 100644 index 0000000000..30adb7465b --- /dev/null +++ b/src/gallium/winsys/xlib/realglx.c @@ -0,0 +1,180 @@ + +/* + * Mesa 3-D graphics library + * Version: 5.1 + * + * Copyright (C) 1999-2002 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#include <assert.h> +#include <GL/glx.h> +#include "realglx.h" +#include "glxapi.h" + + +struct _glxapi_table * +_real_GetGLXDispatchTable(void) +{ + static struct _glxapi_table glx; + + /* be sure our dispatch table size <= libGL's table */ + { + GLuint size = sizeof(struct _glxapi_table) / sizeof(void *); + (void) size; + assert(_glxapi_get_dispatch_table_size() >= size); + } + + /* initialize the whole table to no-ops */ + _glxapi_set_no_op_table(&glx); + + /* now initialize the table with the functions I implement */ + + /*** GLX_VERSION_1_0 ***/ + glx.ChooseVisual = _real_glXChooseVisual; + glx.CopyContext = _real_glXCopyContext; + glx.CreateContext = _real_glXCreateContext; + glx.CreateGLXPixmap = _real_glXCreateGLXPixmap; + glx.DestroyContext = _real_glXDestroyContext; + glx.DestroyGLXPixmap = _real_glXDestroyGLXPixmap; + glx.GetConfig = _real_glXGetConfig; + /*glx.GetCurrentContext = _real_glXGetCurrentContext;*/ + /*glx.GetCurrentDrawable = _real_glXGetCurrentDrawable;*/ + glx.IsDirect = _real_glXIsDirect; + glx.MakeCurrent = _real_glXMakeCurrent; + glx.QueryExtension = _real_glXQueryExtension; + glx.QueryVersion = _real_glXQueryVersion; + glx.SwapBuffers = _real_glXSwapBuffers; + glx.UseXFont = _real_glXUseXFont; + glx.WaitGL = _real_glXWaitGL; + glx.WaitX = _real_glXWaitX; + + /*** GLX_VERSION_1_1 ***/ + glx.GetClientString = _real_glXGetClientString; + glx.QueryExtensionsString = _real_glXQueryExtensionsString; + glx.QueryServerString = _real_glXQueryServerString; + + /*** GLX_VERSION_1_2 ***/ + /*glx.GetCurrentDisplay = _real_glXGetCurrentDisplay;*/ + + /*** GLX_VERSION_1_3 ***/ + glx.ChooseFBConfig = _real_glXChooseFBConfig; + glx.CreateNewContext = _real_glXCreateNewContext; + glx.CreatePbuffer = _real_glXCreatePbuffer; + glx.CreatePixmap = _real_glXCreatePixmap; + glx.CreateWindow = _real_glXCreateWindow; + glx.DestroyPbuffer = _real_glXDestroyPbuffer; + glx.DestroyPixmap = _real_glXDestroyPixmap; + glx.DestroyWindow = _real_glXDestroyWindow; + /*glx.GetCurrentReadDrawable = _real_glXGetCurrentReadDrawable;*/ + glx.GetFBConfigAttrib = _real_glXGetFBConfigAttrib; + glx.GetFBConfigs = _real_glXGetFBConfigs; + glx.GetSelectedEvent = _real_glXGetSelectedEvent; + glx.GetVisualFromFBConfig = _real_glXGetVisualFromFBConfig; + glx.MakeContextCurrent = _real_glXMakeContextCurrent; + glx.QueryContext = _real_glXQueryContext; + glx.QueryDrawable = _real_glXQueryDrawable; + glx.SelectEvent = _real_glXSelectEvent; + + /*** GLX_SGI_swap_control ***/ + glx.SwapIntervalSGI = _real_glXSwapIntervalSGI; + + /*** GLX_SGI_video_sync ***/ + glx.GetVideoSyncSGI = _real_glXGetVideoSyncSGI; + glx.WaitVideoSyncSGI = _real_glXWaitVideoSyncSGI; + + /*** GLX_SGI_make_current_read ***/ + glx.MakeCurrentReadSGI = _real_glXMakeCurrentReadSGI; + /*glx.GetCurrentReadDrawableSGI = _real_glXGetCurrentReadDrawableSGI;*/ + +#if defined(_VL_H) + /*** GLX_SGIX_video_source ***/ + glx.CreateGLXVideoSourceSGIX = _real_glXCreateGLXVideoSourceSGIX; + glx.DestroyGLXVideoSourceSGIX = _real_glXDestroyGLXVideoSourceSGIX; +#endif + + /*** GLX_EXT_import_context ***/ + glx.FreeContextEXT = _real_glXFreeContextEXT; + /*glx.GetContextIDEXT = _real_glXGetContextIDEXT;*/ + /*glx.GetCurrentDisplayEXT = _real_glXGetCurrentDisplayEXT;*/ + glx.ImportContextEXT = _real_glXImportContextEXT; + glx.QueryContextInfoEXT = _real_glXQueryContextInfoEXT; + + /*** GLX_SGIX_fbconfig ***/ + glx.GetFBConfigAttribSGIX = _real_glXGetFBConfigAttribSGIX; + glx.ChooseFBConfigSGIX = _real_glXChooseFBConfigSGIX; + glx.CreateGLXPixmapWithConfigSGIX = _real_glXCreateGLXPixmapWithConfigSGIX; + glx.CreateContextWithConfigSGIX = _real_glXCreateContextWithConfigSGIX; + glx.GetVisualFromFBConfigSGIX = _real_glXGetVisualFromFBConfigSGIX; + glx.GetFBConfigFromVisualSGIX = _real_glXGetFBConfigFromVisualSGIX; + + /*** GLX_SGIX_pbuffer ***/ + glx.CreateGLXPbufferSGIX = _real_glXCreateGLXPbufferSGIX; + glx.DestroyGLXPbufferSGIX = _real_glXDestroyGLXPbufferSGIX; + glx.QueryGLXPbufferSGIX = _real_glXQueryGLXPbufferSGIX; + glx.SelectEventSGIX = _real_glXSelectEventSGIX; + glx.GetSelectedEventSGIX = _real_glXGetSelectedEventSGIX; + + /*** GLX_SGI_cushion ***/ + glx.CushionSGI = _real_glXCushionSGI; + + /*** GLX_SGIX_video_resize ***/ + glx.BindChannelToWindowSGIX = _real_glXBindChannelToWindowSGIX; + glx.ChannelRectSGIX = _real_glXChannelRectSGIX; + glx.QueryChannelRectSGIX = _real_glXQueryChannelRectSGIX; + glx.QueryChannelDeltasSGIX = _real_glXQueryChannelDeltasSGIX; + glx.ChannelRectSyncSGIX = _real_glXChannelRectSyncSGIX; + +#if defined(_DM_BUFFER_H_) + /*** (GLX_SGIX_dmbuffer ***/ + glx.AssociateDMPbufferSGIX = NULL; +#endif + + /*** GLX_SGIX_swap_group ***/ + glx.JoinSwapGroupSGIX = _real_glXJoinSwapGroupSGIX; + + /*** GLX_SGIX_swap_barrier ***/ + glx.BindSwapBarrierSGIX = _real_glXBindSwapBarrierSGIX; + glx.QueryMaxSwapBarriersSGIX = _real_glXQueryMaxSwapBarriersSGIX; + + /*** GLX_SUN_get_transparent_index ***/ + glx.GetTransparentIndexSUN = _real_glXGetTransparentIndexSUN; + + /*** GLX_MESA_copy_sub_buffer ***/ + glx.CopySubBufferMESA = _real_glXCopySubBufferMESA; + + /*** GLX_MESA_release_buffers ***/ + glx.ReleaseBuffersMESA = _real_glXReleaseBuffersMESA; + + /*** GLX_MESA_pixmap_colormap ***/ + glx.CreateGLXPixmapMESA = _real_glXCreateGLXPixmapMESA; + + /*** GLX_MESA_set_3dfx_mode ***/ + glx.Set3DfxModeMESA = _real_glXSet3DfxModeMESA; + + /*** GLX_NV_vertex_array_range ***/ + glx.AllocateMemoryNV = _real_glXAllocateMemoryNV; + glx.FreeMemoryNV = _real_glXFreeMemoryNV; + + /*** GLX_MESA_agp_offset ***/ + glx.GetAGPOffsetMESA = _real_glXGetAGPOffsetMESA; + + return &glx; +} diff --git a/src/gallium/winsys/xlib/realglx.h b/src/gallium/winsys/xlib/realglx.h new file mode 100644 index 0000000000..150129db68 --- /dev/null +++ b/src/gallium/winsys/xlib/realglx.h @@ -0,0 +1,326 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef REALGLX_H +#define REALGLX_H + + +extern struct _glxapi_table * +_real_GetGLXDispatchTable(void); + + +/* + * Basically just need these to prevent compiler warnings. + */ + + +extern XVisualInfo * +_real_glXChooseVisual( Display *dpy, int screen, int *list ); + +extern GLXContext +_real_glXCreateContext( Display *dpy, XVisualInfo *visinfo, + GLXContext share_list, Bool direct ); + +extern GLXPixmap +_real_glXCreateGLXPixmap( Display *dpy, XVisualInfo *visinfo, Pixmap pixmap ); + +extern GLXPixmap +_real_glXCreateGLXPixmapMESA( Display *dpy, XVisualInfo *visinfo, + Pixmap pixmap, Colormap cmap ); + +extern void +_real_glXDestroyGLXPixmap( Display *dpy, GLXPixmap pixmap ); + +extern void +_real_glXCopyContext( Display *dpy, GLXContext src, GLXContext dst, + unsigned long mask ); + +extern Bool +_real_glXMakeCurrent( Display *dpy, GLXDrawable drawable, GLXContext ctx ); + +extern Bool +_real_glXQueryExtension( Display *dpy, int *errorb, int *event ); + +extern void +_real_glXDestroyContext( Display *dpy, GLXContext ctx ); + +extern Bool +_real_glXIsDirect( Display *dpy, GLXContext ctx ); + +extern void +_real_glXSwapBuffers( Display *dpy, GLXDrawable drawable ); + +extern void +_real_glXUseXFont( Font font, int first, int count, int listbase ); + +extern Bool +_real_glXQueryVersion( Display *dpy, int *maj, int *min ); + +extern int +_real_glXGetConfig( Display *dpy, XVisualInfo *visinfo, + int attrib, int *value ); + +extern void +_real_glXWaitGL( void ); + + +extern void +_real_glXWaitX( void ); + +/* GLX 1.1 and later */ +extern const char * +_real_glXQueryExtensionsString( Display *dpy, int screen ); + +/* GLX 1.1 and later */ +extern const char * +_real_glXQueryServerString( Display *dpy, int screen, int name ); + +/* GLX 1.1 and later */ +extern const char * +_real_glXGetClientString( Display *dpy, int name ); + + +/* + * GLX 1.3 and later + */ + +extern GLXFBConfig * +_real_glXChooseFBConfig( Display *dpy, int screen, + const int *attribList, int *nitems ); + +extern int +_real_glXGetFBConfigAttrib( Display *dpy, GLXFBConfig config, + int attribute, int *value ); + +extern GLXFBConfig * +_real_glXGetFBConfigs( Display *dpy, int screen, int *nelements ); + +extern XVisualInfo * +_real_glXGetVisualFromFBConfig( Display *dpy, GLXFBConfig config ); + +extern GLXWindow +_real_glXCreateWindow( Display *dpy, GLXFBConfig config, Window win, + const int *attribList ); + +extern void +_real_glXDestroyWindow( Display *dpy, GLXWindow window ); + +extern GLXPixmap +_real_glXCreatePixmap( Display *dpy, GLXFBConfig config, Pixmap pixmap, + const int *attribList ); + +extern void +_real_glXDestroyPixmap( Display *dpy, GLXPixmap pixmap ); + +extern GLXPbuffer +_real_glXCreatePbuffer( Display *dpy, GLXFBConfig config, + const int *attribList ); + +extern void +_real_glXDestroyPbuffer( Display *dpy, GLXPbuffer pbuf ); + +extern void +_real_glXQueryDrawable( Display *dpy, GLXDrawable draw, int attribute, + unsigned int *value ); + +extern GLXContext +_real_glXCreateNewContext( Display *dpy, GLXFBConfig config, + int renderType, GLXContext shareList, Bool direct ); + + +extern Bool +_real_glXMakeContextCurrent( Display *dpy, GLXDrawable draw, + GLXDrawable read, GLXContext ctx ); + +extern int +_real_glXQueryContext( Display *dpy, GLXContext ctx, int attribute, int *value ); + +extern void +_real_glXSelectEvent( Display *dpy, GLXDrawable drawable, unsigned long mask ); + +extern void +_real_glXGetSelectedEvent( Display *dpy, GLXDrawable drawable, + unsigned long *mask ); + +#ifdef GLX_SGI_swap_control +extern int +_real_glXSwapIntervalSGI(int interval); +#endif + + +#ifdef GLX_SGI_video_sync +extern int +_real_glXGetVideoSyncSGI(unsigned int *count); + +extern int +_real_glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count); +#endif + + +#ifdef GLX_SGI_make_current_read +extern Bool +_real_glXMakeCurrentReadSGI(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx); + +extern GLXDrawable +_real_glXGetCurrentReadDrawableSGI(void); +#endif + +#if defined(_VL_H) && defined(GLX_SGIX_video_source) +extern GLXVideoSourceSGIX +_real_glXCreateGLXVideoSourceSGIX(Display *dpy, int screen, VLServer server, VLPath path, int nodeClass, VLNode drainNode); + +extern void +_real_glXDestroyGLXVideoSourceSGIX(Display *dpy, GLXVideoSourceSGIX src); +#endif + +#ifdef GLX_EXT_import_context +extern void +_real_glXFreeContextEXT(Display *dpy, GLXContext context); + +extern GLXContextID +_real_glXGetContextIDEXT(const GLXContext context); + +extern Display * +_real_glXGetCurrentDisplayEXT(void); + +extern GLXContext +_real_glXImportContextEXT(Display *dpy, GLXContextID contextID); + +extern int +_real_glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute, int *value); +#endif + +#ifdef GLX_SGIX_fbconfig +extern int +_real_glXGetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config, int attribute, int *value); + +extern GLXFBConfigSGIX * +_real_glXChooseFBConfigSGIX(Display *dpy, int screen, int *attrib_list, int *nelements); + +extern GLXPixmap +_real_glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, Pixmap pixmap); + +extern GLXContext +_real_glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int render_type, GLXContext share_list, Bool direct); + +extern XVisualInfo * +_real_glXGetVisualFromFBConfigSGIX(Display *dpy, GLXFBConfigSGIX config); + +extern GLXFBConfigSGIX +_real_glXGetFBConfigFromVisualSGIX(Display *dpy, XVisualInfo *vis); +#endif + +#ifdef GLX_SGIX_pbuffer +extern GLXPbufferSGIX +_real_glXCreateGLXPbufferSGIX(Display *dpy, GLXFBConfigSGIX config, unsigned int width, unsigned int height, int *attrib_list); + +extern void +_real_glXDestroyGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf); + +extern int +_real_glXQueryGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf, int attribute, unsigned int *value); + +extern void +_real_glXSelectEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long mask); + +extern void +_real_glXGetSelectedEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long *mask); +#endif + +#ifdef GLX_SGI_cushion +extern void +_real_glXCushionSGI(Display *dpy, Window win, float cushion); +#endif + +#ifdef GLX_SGIX_video_resize +extern int +_real_glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel , Window window); + +extern int +_real_glXChannelRectSGIX(Display *dpy, int screen, int channel, int x, int y, int w, int h); + +extern int +_real_glXQueryChannelRectSGIX(Display *dpy, int screen, int channel, int *x, int *y, int *w, int *h); + +extern int +_real_glXQueryChannelDeltasSGIX(Display *dpy, int screen, int channel, int *dx, int *dy, int *dw, int *dh); + +extern int +_real_glXChannelRectSyncSGIX(Display *dpy, int screen, int channel, GLenum synctype); +#endif + +#if defined(_DM_BUFFER_H_) && defined(GLX_SGIX_dmbuffer) +extern Bool +_real_glXAssociateDMPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuffer, DMparams *params, DMbuffer dmbuffer); +#endif + +#ifdef GLX_SGIX_swap_group +extern void +_real_glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable, GLXDrawable member); +#endif + +#ifdef GLX_SGIX_swap_barrier +extern void +_real_glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable, int barrier); + +extern Bool +_real_glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen, int *max); +#endif + +#ifdef GLX_SUN_get_transparent_index +extern Status +_real_glXGetTransparentIndexSUN(Display *dpy, Window overlay, Window underlay, long *pTransparent); +#endif + +#ifdef GLX_MESA_release_buffers +extern Bool +_real_glXReleaseBuffersMESA( Display *dpy, GLXDrawable d ); +#endif + +#ifdef GLX_MESA_set_3dfx_mode +extern Bool +_real_glXSet3DfxModeMESA( int mode ); +#endif + +#ifdef GLX_NV_vertex_array_range +extern void * +_real_glXAllocateMemoryNV(GLsizei size, GLfloat readfreq, GLfloat writefreq, GLfloat priority); +extern void +_real_glXFreeMemoryNV(GLvoid *pointer); +#endif + +#ifdef GLX_MESA_agp_offset +extern GLuint +_real_glXGetAGPOffsetMESA(const GLvoid *pointer); +#endif + +#ifdef GLX_MESA_copy_sub_buffer +extern void +_real_glXCopySubBufferMESA( Display *dpy, GLXDrawable drawable, + int x, int y, int width, int height ); +#endif + +#endif /* REALGLX_H */ diff --git a/src/gallium/winsys/xlib/xfonts.c b/src/gallium/winsys/xlib/xfonts.c new file mode 100644 index 0000000000..d72c600bd1 --- /dev/null +++ b/src/gallium/winsys/xlib/xfonts.c @@ -0,0 +1,377 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/* xfonts.c -- glXUseXFont() for Mesa written by + * Copyright (C) 1995 Thorsten.Ohl @ Physik.TH-Darmstadt.de + */ + +#ifdef __VMS +#include <GL/vms_x_fix.h> +#endif + +#include "glxheader.h" +#include "context.h" +#include "imports.h" +#include "xfonts.h" + + +/* Some debugging info. */ + +#ifdef DEBUG +#undef _R +#undef _G +#undef _B +#include <ctype.h> + +int debug_xfonts = 0; + +static void +dump_char_struct(XCharStruct * ch, char *prefix) +{ + printf("%slbearing = %d, rbearing = %d, width = %d\n", + prefix, ch->lbearing, ch->rbearing, ch->width); + printf("%sascent = %d, descent = %d, attributes = %u\n", + prefix, ch->ascent, ch->descent, (unsigned int) ch->attributes); +} + +static void +dump_font_struct(XFontStruct * font) +{ + printf("ascent = %d, descent = %d\n", font->ascent, font->descent); + printf("char_or_byte2 = (%u,%u)\n", + font->min_char_or_byte2, font->max_char_or_byte2); + printf("byte1 = (%u,%u)\n", font->min_byte1, font->max_byte1); + printf("all_chars_exist = %s\n", font->all_chars_exist ? "True" : "False"); + printf("default_char = %c (\\%03o)\n", + (char) (isprint(font->default_char) ? font->default_char : ' '), + font->default_char); + dump_char_struct(&font->min_bounds, "min> "); + dump_char_struct(&font->max_bounds, "max> "); +#if 0 + for (c = font->min_char_or_byte2; c <= font->max_char_or_byte2; c++) { + char prefix[8]; + sprintf(prefix, "%d> ", c); + dump_char_struct(&font->per_char[c], prefix); + } +#endif +} + +static void +dump_bitmap(unsigned int width, unsigned int height, GLubyte * bitmap) +{ + unsigned int x, y; + + printf(" "); + for (x = 0; x < 8 * width; x++) + printf("%o", 7 - (x % 8)); + putchar('\n'); + for (y = 0; y < height; y++) { + printf("%3o:", y); + for (x = 0; x < 8 * width; x++) + putchar((bitmap[width * (height - y - 1) + x / 8] & (1 << (7 - (x % + 8)))) + ? '*' : '.'); + printf(" "); + for (x = 0; x < width; x++) + printf("0x%02x, ", bitmap[width * (height - y - 1) + x]); + putchar('\n'); + } +} +#endif /* DEBUG */ + + +/* Implementation. */ + +/* Fill a BITMAP with a character C from thew current font + in the graphics context GC. WIDTH is the width in bytes + and HEIGHT is the height in bits. + + Note that the generated bitmaps must be used with + + glPixelStorei (GL_UNPACK_SWAP_BYTES, GL_FALSE); + glPixelStorei (GL_UNPACK_LSB_FIRST, GL_FALSE); + glPixelStorei (GL_UNPACK_ROW_LENGTH, 0); + glPixelStorei (GL_UNPACK_SKIP_ROWS, 0); + glPixelStorei (GL_UNPACK_SKIP_PIXELS, 0); + glPixelStorei (GL_UNPACK_ALIGNMENT, 1); + + Possible optimizations: + + * use only one reusable pixmap with the maximum dimensions. + * draw the entire font into a single pixmap (careful with + proportional fonts!). +*/ + + +/* + * Generate OpenGL-compatible bitmap. + */ +static void +fill_bitmap(Display * dpy, Window win, GC gc, + unsigned int width, unsigned int height, + int x0, int y0, unsigned int c, GLubyte * bitmap) +{ + XImage *image; + unsigned int x, y; + Pixmap pixmap; + XChar2b char2b; + + pixmap = XCreatePixmap(dpy, win, 8 * width, height, 1); + XSetForeground(dpy, gc, 0); + XFillRectangle(dpy, pixmap, gc, 0, 0, 8 * width, height); + XSetForeground(dpy, gc, 1); + + char2b.byte1 = (c >> 8) & 0xff; + char2b.byte2 = (c & 0xff); + + XDrawString16(dpy, pixmap, gc, x0, y0, &char2b, 1); + + image = XGetImage(dpy, pixmap, 0, 0, 8 * width, height, 1, XYPixmap); + if (image) { + /* Fill the bitmap (X11 and OpenGL are upside down wrt each other). */ + for (y = 0; y < height; y++) + for (x = 0; x < 8 * width; x++) + if (XGetPixel(image, x, y)) + bitmap[width * (height - y - 1) + x / 8] |= + (1 << (7 - (x % 8))); + XDestroyImage(image); + } + + XFreePixmap(dpy, pixmap); +} + +/* + * determine if a given glyph is valid and return the + * corresponding XCharStruct. + */ +static XCharStruct * +isvalid(XFontStruct * fs, unsigned int which) +{ + unsigned int rows, pages; + unsigned int byte1 = 0, byte2 = 0; + int i, valid = 1; + + rows = fs->max_byte1 - fs->min_byte1 + 1; + pages = fs->max_char_or_byte2 - fs->min_char_or_byte2 + 1; + + if (rows == 1) { + /* "linear" fonts */ + if ((fs->min_char_or_byte2 > which) || (fs->max_char_or_byte2 < which)) + valid = 0; + } + else { + /* "matrix" fonts */ + byte2 = which & 0xff; + byte1 = which >> 8; + if ((fs->min_char_or_byte2 > byte2) || + (fs->max_char_or_byte2 < byte2) || + (fs->min_byte1 > byte1) || (fs->max_byte1 < byte1)) + valid = 0; + } + + if (valid) { + if (fs->per_char) { + if (rows == 1) { + /* "linear" fonts */ + return (fs->per_char + (which - fs->min_char_or_byte2)); + } + else { + /* "matrix" fonts */ + i = ((byte1 - fs->min_byte1) * pages) + + (byte2 - fs->min_char_or_byte2); + return (fs->per_char + i); + } + } + else { + return (&fs->min_bounds); + } + } + return (NULL); +} + + +void +Fake_glXUseXFont(Font font, int first, int count, int listbase) +{ + Display *dpy; + Window win; + Pixmap pixmap; + GC gc; + XGCValues values; + unsigned long valuemask; + XFontStruct *fs; + GLint swapbytes, lsbfirst, rowlength; + GLint skiprows, skippixels, alignment; + unsigned int max_width, max_height, max_bm_width, max_bm_height; + GLubyte *bm; + int i; + + dpy = glXGetCurrentDisplay(); + if (!dpy) + return; /* I guess glXMakeCurrent wasn't called */ + win = RootWindow(dpy, DefaultScreen(dpy)); + + fs = XQueryFont(dpy, font); + if (!fs) { + _mesa_error(NULL, GL_INVALID_VALUE, + "Couldn't get font structure information"); + return; + } + + /* Allocate a bitmap that can fit all characters. */ + max_width = fs->max_bounds.rbearing - fs->min_bounds.lbearing; + max_height = fs->max_bounds.ascent + fs->max_bounds.descent; + max_bm_width = (max_width + 7) / 8; + max_bm_height = max_height; + + bm = (GLubyte *) MALLOC((max_bm_width * max_bm_height) * sizeof(GLubyte)); + if (!bm) { + XFreeFontInfo(NULL, fs, 1); + _mesa_error(NULL, GL_OUT_OF_MEMORY, + "Couldn't allocate bitmap in glXUseXFont()"); + return; + } + +#if 0 + /* get the page info */ + pages = fs->max_char_or_byte2 - fs->min_char_or_byte2 + 1; + firstchar = (fs->min_byte1 << 8) + fs->min_char_or_byte2; + lastchar = (fs->max_byte1 << 8) + fs->max_char_or_byte2; + rows = fs->max_byte1 - fs->min_byte1 + 1; + unsigned int first_char, last_char, pages, rows; +#endif + + /* Save the current packing mode for bitmaps. */ + glGetIntegerv(GL_UNPACK_SWAP_BYTES, &swapbytes); + glGetIntegerv(GL_UNPACK_LSB_FIRST, &lsbfirst); + glGetIntegerv(GL_UNPACK_ROW_LENGTH, &rowlength); + glGetIntegerv(GL_UNPACK_SKIP_ROWS, &skiprows); + glGetIntegerv(GL_UNPACK_SKIP_PIXELS, &skippixels); + glGetIntegerv(GL_UNPACK_ALIGNMENT, &alignment); + + /* Enforce a standard packing mode which is compatible with + fill_bitmap() from above. This is actually the default mode, + except for the (non)alignment. */ + glPixelStorei(GL_UNPACK_SWAP_BYTES, GL_FALSE); + glPixelStorei(GL_UNPACK_LSB_FIRST, GL_FALSE); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + glPixelStorei(GL_UNPACK_SKIP_ROWS, 0); + glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + + pixmap = XCreatePixmap(dpy, win, 10, 10, 1); + values.foreground = BlackPixel(dpy, DefaultScreen(dpy)); + values.background = WhitePixel(dpy, DefaultScreen(dpy)); + values.font = fs->fid; + valuemask = GCForeground | GCBackground | GCFont; + gc = XCreateGC(dpy, pixmap, valuemask, &values); + XFreePixmap(dpy, pixmap); + +#ifdef DEBUG + if (debug_xfonts) + dump_font_struct(fs); +#endif + + for (i = 0; i < count; i++) { + unsigned int width, height, bm_width, bm_height; + GLfloat x0, y0, dx, dy; + XCharStruct *ch; + int x, y; + unsigned int c = first + i; + int list = listbase + i; + int valid; + + /* check on index validity and get the bounds */ + ch = isvalid(fs, c); + if (!ch) { + ch = &fs->max_bounds; + valid = 0; + } + else { + valid = 1; + } + +#ifdef DEBUG + if (debug_xfonts) { + char s[7]; + sprintf(s, isprint(c) ? "%c> " : "\\%03o> ", c); + dump_char_struct(ch, s); + } +#endif + + /* glBitmap()' parameters: + straight from the glXUseXFont(3) manpage. */ + width = ch->rbearing - ch->lbearing; + height = ch->ascent + ch->descent; + x0 = -ch->lbearing; + y0 = ch->descent - 0; /* XXX used to subtract 1 here */ + /* but that caused a conformace failure */ + dx = ch->width; + dy = 0; + + /* X11's starting point. */ + x = -ch->lbearing; + y = ch->ascent; + + /* Round the width to a multiple of eight. We will use this also + for the pixmap for capturing the X11 font. This is slightly + inefficient, but it makes the OpenGL part real easy. */ + bm_width = (width + 7) / 8; + bm_height = height; + + glNewList(list, GL_COMPILE); + if (valid && (bm_width > 0) && (bm_height > 0)) { + + MEMSET(bm, '\0', bm_width * bm_height); + fill_bitmap(dpy, win, gc, bm_width, bm_height, x, y, c, bm); + + glBitmap(width, height, x0, y0, dx, dy, bm); +#ifdef DEBUG + if (debug_xfonts) { + printf("width/height = %u/%u\n", width, height); + printf("bm_width/bm_height = %u/%u\n", bm_width, bm_height); + dump_bitmap(bm_width, bm_height, bm); + } +#endif + } + else { + glBitmap(0, 0, 0.0, 0.0, dx, dy, NULL); + } + glEndList(); + } + + FREE(bm); + XFreeFontInfo(NULL, fs, 1); + XFreeGC(dpy, gc); + + /* Restore saved packing modes. */ + glPixelStorei(GL_UNPACK_SWAP_BYTES, swapbytes); + glPixelStorei(GL_UNPACK_LSB_FIRST, lsbfirst); + glPixelStorei(GL_UNPACK_ROW_LENGTH, rowlength); + glPixelStorei(GL_UNPACK_SKIP_ROWS, skiprows); + glPixelStorei(GL_UNPACK_SKIP_PIXELS, skippixels); + glPixelStorei(GL_UNPACK_ALIGNMENT, alignment); +} diff --git a/src/gallium/winsys/xlib/xfonts.h b/src/gallium/winsys/xlib/xfonts.h new file mode 100644 index 0000000000..e36f42f817 --- /dev/null +++ b/src/gallium/winsys/xlib/xfonts.h @@ -0,0 +1,41 @@ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2000 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef XFONTS_H +#define XFONTS_H + +#ifdef __VMS +#include <GL/vms_x_fix.h> +#endif + +#include <X11/Xlib.h> + + +extern void Fake_glXUseXFont( Font font, int first, int count, int listbase ); + + +#endif + diff --git a/src/gallium/winsys/xlib/xm_api.c b/src/gallium/winsys/xlib/xm_api.c new file mode 100644 index 0000000000..d28a6423b9 --- /dev/null +++ b/src/gallium/winsys/xlib/xm_api.c @@ -0,0 +1,1422 @@ +/* + * Mesa 3-D graphics library + * Version: 7.1 + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file xm_api.c + * + * All the XMesa* API functions. + * + * + * NOTES: + * + * The window coordinate system origin (0,0) is in the lower-left corner + * of the window. X11's window coordinate origin is in the upper-left + * corner of the window. Therefore, most drawing functions in this + * file have to flip Y coordinates. + * + * Define USE_XSHM in the Makefile with -DUSE_XSHM if you want to compile + * in support for the MIT Shared Memory extension. If enabled, when you + * use an Ximage for the back buffer in double buffered mode, the "swap" + * operation will be faster. You must also link with -lXext. + * + * Byte swapping: If the Mesa host and the X display use a different + * byte order then there's some trickiness to be aware of when using + * XImages. The byte ordering used for the XImage is that of the X + * display, not the Mesa host. + * The color-to-pixel encoding for True/DirectColor must be done + * according to the display's visual red_mask, green_mask, and blue_mask. + * If XPutPixel is used to put a pixel into an XImage then XPutPixel will + * do byte swapping if needed. If one wants to directly "poke" the pixel + * into the XImage's buffer then the pixel must be byte swapped first. + * + */ + +#ifdef __CYGWIN__ +#undef WIN32 +#undef __WIN32__ +#endif + +#include "glxheader.h" +#include "GL/xmesa.h" +#include "xmesaP.h" +#include "main/context.h" +#include "main/framebuffer.h" + +#include "state_tracker/st_public.h" +#include "state_tracker/st_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" + +#include "xm_winsys_aub.h" + +/** + * Global X driver lock + */ +pipe_mutex _xmesa_lock; + + +int xmesa_mode; + + +/**********************************************************************/ +/***** X Utility Functions *****/ +/**********************************************************************/ + + +/** + * Return the host's byte order as LSBFirst or MSBFirst ala X. + */ +#ifndef XFree86Server +static int host_byte_order( void ) +{ + int i = 1; + char *cptr = (char *) &i; + return (*cptr==1) ? LSBFirst : MSBFirst; +} +#endif + + +/** + * Check if the X Shared Memory extension is available. + * Return: 0 = not available + * 1 = shared XImage support available + * 2 = shared Pixmap support available also + */ +int xmesa_check_for_xshm( XMesaDisplay *display ) +{ +#if defined(USE_XSHM) && !defined(XFree86Server) + int major, minor, ignore; + Bool pixmaps; + + if (getenv("SP_NO_RAST")) + return 0; + + if (getenv("MESA_NOSHM")) { + return 0; + } + + if (XQueryExtension( display, "MIT-SHM", &ignore, &ignore, &ignore )) { + if (XShmQueryVersion( display, &major, &minor, &pixmaps )==True) { + return (pixmaps==True) ? 2 : 1; + } + else { + return 0; + } + } + else { + return 0; + } +#else + /* No XSHM support */ + return 0; +#endif +} + + +/** + * Return the true number of bits per pixel for XImages. + * For example, if we request a 24-bit deep visual we may actually need/get + * 32bpp XImages. This function returns the appropriate bpp. + * Input: dpy - the X display + * visinfo - desribes the visual to be used for XImages + * Return: true number of bits per pixel for XImages + */ +static int +bits_per_pixel( XMesaVisual xmv ) +{ +#ifdef XFree86Server + const int depth = xmv->nplanes; + int i; + assert(depth > 0); + for (i = 0; i < screenInfo.numPixmapFormats; i++) { + if (screenInfo.formats[i].depth == depth) + return screenInfo.formats[i].bitsPerPixel; + } + return depth; /* should never get here, but this should be safe */ +#else + XMesaDisplay *dpy = xmv->display; + XMesaVisualInfo visinfo = xmv->visinfo; + XMesaImage *img; + int bitsPerPixel; + /* Create a temporary XImage */ + img = XCreateImage( dpy, visinfo->visual, visinfo->depth, + ZPixmap, 0, /*format, offset*/ + (char*) MALLOC(8), /*data*/ + 1, 1, /*width, height*/ + 32, /*bitmap_pad*/ + 0 /*bytes_per_line*/ + ); + assert(img); + /* grab the bits/pixel value */ + bitsPerPixel = img->bits_per_pixel; + /* free the XImage */ + _mesa_free( img->data ); + img->data = NULL; + XMesaDestroyImage( img ); + return bitsPerPixel; +#endif +} + + + +/* + * Determine if a given X window ID is valid (window exists). + * Do this by calling XGetWindowAttributes() for the window and + * checking if we catch an X error. + * Input: dpy - the display + * win - the window to check for existance + * Return: GL_TRUE - window exists + * GL_FALSE - window doesn't exist + */ +#ifndef XFree86Server +static GLboolean WindowExistsFlag; + +static int window_exists_err_handler( XMesaDisplay* dpy, XErrorEvent* xerr ) +{ + (void) dpy; + if (xerr->error_code == BadWindow) { + WindowExistsFlag = GL_FALSE; + } + return 0; +} + +static GLboolean window_exists( XMesaDisplay *dpy, Window win ) +{ + XWindowAttributes wa; + int (*old_handler)( XMesaDisplay*, XErrorEvent* ); + WindowExistsFlag = GL_TRUE; + old_handler = XSetErrorHandler(window_exists_err_handler); + XGetWindowAttributes( dpy, win, &wa ); /* dummy request */ + XSetErrorHandler(old_handler); + return WindowExistsFlag; +} + +static Status +get_drawable_size( XMesaDisplay *dpy, Drawable d, uint *width, uint *height ) +{ + Window root; + Status stat; + int xpos, ypos; + unsigned int w, h, bw, depth; + stat = XGetGeometry(dpy, d, &root, &xpos, &ypos, &w, &h, &bw, &depth); + *width = w; + *height = h; + return stat; +} +#endif + + +/** + * Return the size of the window (or pixmap) that corresponds to the + * given XMesaBuffer. + * \param width returns width in pixels + * \param height returns height in pixels + */ +static void +xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b, + GLuint *width, GLuint *height) +{ +#ifdef XFree86Server + *width = MIN2(b->drawable->width, MAX_WIDTH); + *height = MIN2(b->drawable->height, MAX_HEIGHT); +#else + Status stat; + + pipe_mutex_lock(_xmesa_lock); + XSync(b->xm_visual->display, 0); /* added for Chromium */ + stat = get_drawable_size(dpy, b->drawable, width, height); + pipe_mutex_unlock(_xmesa_lock); + + if (!stat) { + /* probably querying a window that's recently been destroyed */ + _mesa_warning(NULL, "XGetGeometry failed!\n"); + *width = *height = 1; + } +#endif +} + + +/** + * Choose the pixel format for the given visual. + * This will tell the gallium driver how to pack pixel data into + * drawing surfaces. + */ +static GLuint +choose_pixel_format(XMesaVisual v) +{ + if ( GET_REDMASK(v) == 0x0000ff + && GET_GREENMASK(v) == 0x00ff00 + && GET_BLUEMASK(v) == 0xff0000 + && v->BitsPerPixel == 32) { + if (CHECK_BYTE_ORDER(v)) { + /* no byteswapping needed */ + return 0 /* PIXEL_FORMAT_U_A8_B8_G8_R8 */; + } + else { + return PIPE_FORMAT_R8G8B8A8_UNORM; + } + } + else if ( GET_REDMASK(v) == 0xff0000 + && GET_GREENMASK(v) == 0x00ff00 + && GET_BLUEMASK(v) == 0x0000ff + && v->BitsPerPixel == 32) { + if (CHECK_BYTE_ORDER(v)) { + /* no byteswapping needed */ + return PIPE_FORMAT_A8R8G8B8_UNORM; + } + else { + return PIPE_FORMAT_B8G8R8A8_UNORM; + } + } + else if ( GET_REDMASK(v) == 0xf800 + && GET_GREENMASK(v) == 0x07e0 + && GET_BLUEMASK(v) == 0x001f + && CHECK_BYTE_ORDER(v) + && v->BitsPerPixel == 16) { + /* 5-6-5 RGB */ + return PIPE_FORMAT_R5G6B5_UNORM; + } + + assert(0); + return 0; +} + + + +/**********************************************************************/ +/***** Linked list of XMesaBuffers *****/ +/**********************************************************************/ + +XMesaBuffer XMesaBufferList = NULL; + + +/** + * Allocate a new XMesaBuffer object which corresponds to the given drawable. + * Note that XMesaBuffer is derived from GLframebuffer. + * The new XMesaBuffer will not have any size (Width=Height=0). + * + * \param d the corresponding X drawable (window or pixmap) + * \param type either WINDOW, PIXMAP or PBUFFER, describing d + * \param vis the buffer's visual + * \param cmap the window's colormap, if known. + * \return new XMesaBuffer or NULL if any problem + */ +static XMesaBuffer +create_xmesa_buffer(XMesaDrawable d, BufferType type, + XMesaVisual vis, XMesaColormap cmap) +{ + XMesaBuffer b; + GLframebuffer *fb; + enum pipe_format colorFormat, depthFormat, stencilFormat; + uint width, height; + + ASSERT(type == WINDOW || type == PIXMAP || type == PBUFFER); + + b = (XMesaBuffer) CALLOC_STRUCT(xmesa_buffer); + if (!b) + return NULL; + + b->drawable = d; + + b->xm_visual = vis; + b->type = type; + b->cmap = cmap; + + /* determine PIPE_FORMATs for buffers */ + colorFormat = choose_pixel_format(vis); + + if (vis->mesa_visual.depthBits == 0) + depthFormat = PIPE_FORMAT_NONE; +#ifdef GALLIUM_CELL /* XXX temporary for Cell! */ + else + depthFormat = PIPE_FORMAT_S8Z24_UNORM; +#else + else if (vis->mesa_visual.depthBits <= 16) + depthFormat = PIPE_FORMAT_Z16_UNORM; + else if (vis->mesa_visual.depthBits <= 24) + depthFormat = PIPE_FORMAT_S8Z24_UNORM; + else + depthFormat = PIPE_FORMAT_Z32_UNORM; +#endif + + if (vis->mesa_visual.stencilBits == 8) { + if (depthFormat == PIPE_FORMAT_S8Z24_UNORM) + stencilFormat = depthFormat; + else + stencilFormat = PIPE_FORMAT_S8_UNORM; + } + else { + /* no stencil */ + stencilFormat = PIPE_FORMAT_NONE; + if (depthFormat == PIPE_FORMAT_S8Z24_UNORM) { + /* use 24-bit Z, undefined stencil channel */ + depthFormat = PIPE_FORMAT_X8Z24_UNORM; + } + } + + + get_drawable_size(vis->display, d, &width, &height); + + /* + * Create framebuffer, but we'll plug in our own renderbuffers below. + */ + b->stfb = st_create_framebuffer(&vis->mesa_visual, + colorFormat, depthFormat, stencilFormat, + width, height, + (void *) b); + fb = &b->stfb->Base; + + /* + * Create scratch XImage for xmesa_display_surface() + */ + b->tempImage = XCreateImage(vis->display, + vis->visinfo->visual, + vis->visinfo->depth, + ZPixmap, 0, /* format, offset */ + NULL, /* data */ + 0, 0, /* size */ + 32, /* bitmap_pad */ + 0); /* bytes_per_line */ + + /* GLX_EXT_texture_from_pixmap */ + b->TextureTarget = 0; + b->TextureFormat = GLX_TEXTURE_FORMAT_NONE_EXT; + b->TextureMipmap = 0; + + /* insert buffer into linked list */ + b->Next = XMesaBufferList; + XMesaBufferList = b; + + return b; +} + + +/** + * Find an XMesaBuffer by matching X display and colormap but NOT matching + * the notThis buffer. + */ +XMesaBuffer +xmesa_find_buffer(XMesaDisplay *dpy, XMesaColormap cmap, XMesaBuffer notThis) +{ + XMesaBuffer b; + for (b = XMesaBufferList; b; b = b->Next) { + if (b->xm_visual->display == dpy && + b->cmap == cmap && + b != notThis) { + return b; + } + } + return NULL; +} + + +/** + * Remove buffer from linked list, delete if no longer referenced. + */ +static void +xmesa_free_buffer(XMesaBuffer buffer) +{ + XMesaBuffer prev = NULL, b; + + for (b = XMesaBufferList; b; b = b->Next) { + if (b == buffer) { + struct gl_framebuffer *fb = &buffer->stfb->Base; + + /* unlink buffer from list */ + if (prev) + prev->Next = buffer->Next; + else + XMesaBufferList = buffer->Next; + + /* mark as delete pending */ + fb->DeletePending = GL_TRUE; + + /* Since the X window for the XMesaBuffer is going away, we don't + * want to dereference this pointer in the future. + */ + b->drawable = 0; + + buffer->tempImage->data = NULL; + XDestroyImage(buffer->tempImage); + + /* Unreference. If count = zero we'll really delete the buffer */ + _mesa_unreference_framebuffer(&fb); + + XFreeGC(b->xm_visual->display, b->gc); + + free(buffer); + + return; + } + /* continue search */ + prev = b; + } + /* buffer not found in XMesaBufferList */ + _mesa_problem(NULL,"xmesa_free_buffer() - buffer not found\n"); +} + + + +/**********************************************************************/ +/***** Misc Private Functions *****/ +/**********************************************************************/ + + +/** + * When a context is bound for the first time, we can finally finish + * initializing the context's visual and buffer information. + * \param v the XMesaVisual to initialize + * \param b the XMesaBuffer to initialize (may be NULL) + * \param rgb_flag TRUE = RGBA mode, FALSE = color index mode + * \param window the window/pixmap we're rendering into + * \param cmap the colormap associated with the window/pixmap + * \return GL_TRUE=success, GL_FALSE=failure + */ +static GLboolean +initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b, + GLboolean rgb_flag, XMesaDrawable window, + XMesaColormap cmap) +{ +#ifdef XFree86Server + int client = (window) ? CLIENT_ID(window->id) : 0; +#endif + + ASSERT(!b || b->xm_visual == v); + + /* Save true bits/pixel */ + v->BitsPerPixel = bits_per_pixel(v); + assert(v->BitsPerPixel > 0); + + if (rgb_flag == GL_FALSE) { + /* COLOR-INDEXED WINDOW: not supported*/ + return GL_FALSE; + } + else { + /* RGB WINDOW: + * We support RGB rendering into almost any kind of visual. + */ + const int xclass = v->mesa_visual.visualType; + if (xclass != GLX_TRUE_COLOR && xclass == !GLX_DIRECT_COLOR) { + _mesa_warning(NULL, + "XMesa: RGB mode rendering not supported in given visual.\n"); + return GL_FALSE; + } + v->mesa_visual.indexBits = 0; + + if (v->BitsPerPixel == 32) { + /* We use XImages for all front/back buffers. If an X Window or + * X Pixmap is 32bpp, there's no guarantee that the alpha channel + * will be preserved. For XImages we're in luck. + */ + v->mesa_visual.alphaBits = 8; + } + } + + /* + * If MESA_INFO env var is set print out some debugging info + * which can help Brian figure out what's going on when a user + * reports bugs. + */ + if (_mesa_getenv("MESA_INFO")) { + _mesa_printf("X/Mesa visual = %p\n", (void *) v); + _mesa_printf("X/Mesa level = %d\n", v->mesa_visual.level); + _mesa_printf("X/Mesa depth = %d\n", GET_VISUAL_DEPTH(v)); + _mesa_printf("X/Mesa bits per pixel = %d\n", v->BitsPerPixel); + } + + if (b && window) { + /* these should have been set in create_xmesa_buffer */ + ASSERT(b->drawable == window); + + /* Setup for single/double buffering */ + if (v->mesa_visual.doubleBufferMode) { + /* Double buffered */ + b->shm = xmesa_check_for_xshm( v->display ); + } + + /* X11 graphics context */ +#ifdef XFree86Server + b->gc = CreateScratchGC(v->display, window->depth); +#else + b->gc = XCreateGC( v->display, window, 0, NULL ); +#endif + XMesaSetFunction( v->display, b->gc, GXcopy ); + } + + return GL_TRUE; +} + + + +#define NUM_VISUAL_TYPES 6 + +/** + * Convert an X visual type to a GLX visual type. + * + * \param visualType X visual type (i.e., \c TrueColor, \c StaticGray, etc.) + * to be converted. + * \return If \c visualType is a valid X visual type, a GLX visual type will + * be returned. Otherwise \c GLX_NONE will be returned. + * + * \note + * This code was lifted directly from lib/GL/glx/glcontextmodes.c in the + * DRI CVS tree. + */ +static GLint +xmesa_convert_from_x_visual_type( int visualType ) +{ + static const int glx_visual_types[ NUM_VISUAL_TYPES ] = { + GLX_STATIC_GRAY, GLX_GRAY_SCALE, + GLX_STATIC_COLOR, GLX_PSEUDO_COLOR, + GLX_TRUE_COLOR, GLX_DIRECT_COLOR + }; + + return ( (unsigned) visualType < NUM_VISUAL_TYPES ) + ? glx_visual_types[ visualType ] : GLX_NONE; +} + + +/**********************************************************************/ +/***** Public Functions *****/ +/**********************************************************************/ + + +/* + * Create a new X/Mesa visual. + * Input: display - X11 display + * visinfo - an XVisualInfo pointer + * rgb_flag - GL_TRUE = RGB mode, + * GL_FALSE = color index mode + * alpha_flag - alpha buffer requested? + * db_flag - GL_TRUE = double-buffered, + * GL_FALSE = single buffered + * stereo_flag - stereo visual? + * ximage_flag - GL_TRUE = use an XImage for back buffer, + * GL_FALSE = use an off-screen pixmap for back buffer + * depth_size - requested bits/depth values, or zero + * stencil_size - requested bits/stencil values, or zero + * accum_red_size - requested bits/red accum values, or zero + * accum_green_size - requested bits/green accum values, or zero + * accum_blue_size - requested bits/blue accum values, or zero + * accum_alpha_size - requested bits/alpha accum values, or zero + * num_samples - number of samples/pixel if multisampling, or zero + * level - visual level, usually 0 + * visualCaveat - ala the GLX extension, usually GLX_NONE + * Return; a new XMesaVisual or 0 if error. + */ +PUBLIC +XMesaVisual XMesaCreateVisual( XMesaDisplay *display, + XMesaVisualInfo visinfo, + GLboolean rgb_flag, + GLboolean alpha_flag, + GLboolean db_flag, + GLboolean stereo_flag, + GLboolean ximage_flag, + GLint depth_size, + GLint stencil_size, + GLint accum_red_size, + GLint accum_green_size, + GLint accum_blue_size, + GLint accum_alpha_size, + GLint num_samples, + GLint level, + GLint visualCaveat ) +{ + XMesaVisual v; + GLint red_bits, green_bits, blue_bits, alpha_bits; + +#ifndef XFree86Server + /* For debugging only */ + if (_mesa_getenv("MESA_XSYNC")) { + /* This makes debugging X easier. + * In your debugger, set a breakpoint on _XError to stop when an + * X protocol error is generated. + */ + XSynchronize( display, 1 ); + } +#endif + + v = (XMesaVisual) CALLOC_STRUCT(xmesa_visual); + if (!v) { + return NULL; + } + + v->display = display; + + /* Save a copy of the XVisualInfo struct because the user may X_mesa_free() + * the struct but we may need some of the information contained in it + * at a later time. + */ +#ifndef XFree86Server + v->visinfo = (XVisualInfo *) MALLOC(sizeof(*visinfo)); + if(!v->visinfo) { + _mesa_free(v); + return NULL; + } + MEMCPY(v->visinfo, visinfo, sizeof(*visinfo)); +#endif + + v->ximage_flag = ximage_flag; + +#ifdef XFree86Server + /* We could calculate these values by ourselves. nplanes is either the sum + * of the red, green, and blue bits or the number index bits. + * ColormapEntries is either (1U << index_bits) or + * (1U << max(redBits, greenBits, blueBits)). + */ + assert(visinfo->nplanes > 0); + v->nplanes = visinfo->nplanes; + v->ColormapEntries = visinfo->ColormapEntries; + + v->mesa_visual.redMask = visinfo->redMask; + v->mesa_visual.greenMask = visinfo->greenMask; + v->mesa_visual.blueMask = visinfo->blueMask; + v->mesa_visual.visualID = visinfo->vid; + v->mesa_visual.screen = 0; /* FIXME: What should be done here? */ +#else + v->mesa_visual.redMask = visinfo->red_mask; + v->mesa_visual.greenMask = visinfo->green_mask; + v->mesa_visual.blueMask = visinfo->blue_mask; + v->mesa_visual.visualID = visinfo->visualid; + v->mesa_visual.screen = visinfo->screen; +#endif + +#if defined(XFree86Server) || !(defined(__cplusplus) || defined(c_plusplus)) + v->mesa_visual.visualType = xmesa_convert_from_x_visual_type(visinfo->class); +#else + v->mesa_visual.visualType = xmesa_convert_from_x_visual_type(visinfo->c_class); +#endif + + v->mesa_visual.visualRating = visualCaveat; + + if (alpha_flag) + v->mesa_visual.alphaBits = 8; + + (void) initialize_visual_and_buffer( v, NULL, rgb_flag, 0, 0 ); + + { + const int xclass = v->mesa_visual.visualType; + if (xclass == GLX_TRUE_COLOR || xclass == GLX_DIRECT_COLOR) { + red_bits = _mesa_bitcount(GET_REDMASK(v)); + green_bits = _mesa_bitcount(GET_GREENMASK(v)); + blue_bits = _mesa_bitcount(GET_BLUEMASK(v)); + } + else { + /* this is an approximation */ + int depth; + depth = GET_VISUAL_DEPTH(v); + red_bits = depth / 3; + depth -= red_bits; + green_bits = depth / 2; + depth -= green_bits; + blue_bits = depth; + alpha_bits = 0; + assert( red_bits + green_bits + blue_bits == GET_VISUAL_DEPTH(v) ); + } + alpha_bits = v->mesa_visual.alphaBits; + } + + _mesa_initialize_visual( &v->mesa_visual, + rgb_flag, db_flag, stereo_flag, + red_bits, green_bits, + blue_bits, alpha_bits, + v->mesa_visual.indexBits, + depth_size, + stencil_size, + accum_red_size, accum_green_size, + accum_blue_size, accum_alpha_size, + 0 ); + + /* XXX minor hack */ + v->mesa_visual.level = level; + return v; +} + + +PUBLIC +void XMesaDestroyVisual( XMesaVisual v ) +{ +#ifndef XFree86Server + _mesa_free(v->visinfo); +#endif + _mesa_free(v); +} + + + +/** + * Create a new XMesaContext. + * \param v the XMesaVisual + * \param share_list another XMesaContext with which to share display + * lists or NULL if no sharing is wanted. + * \return an XMesaContext or NULL if error. + */ +PUBLIC +XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) +{ + static GLboolean firstTime = GL_TRUE; + struct pipe_context *pipe; + XMesaContext c; + GLcontext *mesaCtx; + uint pf; + + if (firstTime) { + pipe_mutex_init(_xmesa_lock); + firstTime = GL_FALSE; + } + + /* Note: the XMesaContext contains a Mesa GLcontext struct (inheritance) */ + c = (XMesaContext) CALLOC_STRUCT(xmesa_context); + if (!c) + return NULL; + + pf = choose_pixel_format(v); + assert(pf); + + c->xm_visual = v; + c->xm_buffer = NULL; /* set later by XMesaMakeCurrent */ + + if (!getenv("XM_AUB")) { + xmesa_mode = XMESA_SOFTPIPE; + pipe = xmesa_create_pipe_context( c, pf ); + } + else { + xmesa_mode = XMESA_AUB; + pipe = xmesa_create_i965simple(xmesa_get_pipe_winsys_aub(v)); + } + + if (pipe == NULL) + goto fail; + + c->st = st_create_context(pipe, &v->mesa_visual, + share_list ? share_list->st : NULL); + if (c->st == NULL) + goto fail; + + mesaCtx = c->st->ctx; + c->st->ctx->DriverCtx = c; + +#if 00 + _mesa_enable_sw_extensions(mesaCtx); + _mesa_enable_1_3_extensions(mesaCtx); + _mesa_enable_1_4_extensions(mesaCtx); + _mesa_enable_1_5_extensions(mesaCtx); + _mesa_enable_2_0_extensions(mesaCtx); +#endif + +#ifdef XFree86Server + /* If we're running in the X server, do bounds checking to prevent + * segfaults and server crashes! + */ + mesaCtx->Const.CheckArrayBounds = GL_TRUE; +#endif + + return c; + + fail: + if (c->st) + st_destroy_context(c->st); + else if (pipe) + pipe->destroy(pipe); + FREE(c); + return NULL; +} + + + +PUBLIC +void XMesaDestroyContext( XMesaContext c ) +{ + struct pipe_screen *screen = c->st->pipe->screen; + st_destroy_context(c->st); + /* FIXME: We should destroy the screen here, but if we do so, surfaces may + * outlive it, causing segfaults + screen->destroy(screen); + */ + _mesa_free(c); +} + + + +/** + * Private function for creating an XMesaBuffer which corresponds to an + * X window or pixmap. + * \param v the window's XMesaVisual + * \param w the window we're wrapping + * \return new XMesaBuffer or NULL if error + */ +PUBLIC XMesaBuffer +XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w) +{ +#ifndef XFree86Server + XWindowAttributes attr; +#endif + XMesaBuffer b; + XMesaColormap cmap; + int depth; + + assert(v); + assert(w); + + /* Check that window depth matches visual depth */ +#ifdef XFree86Server + depth = ((XMesaDrawable)w)->depth; +#else + XGetWindowAttributes( v->display, w, &attr ); + depth = attr.depth; +#endif + if (GET_VISUAL_DEPTH(v) != depth) { + _mesa_warning(NULL, "XMesaCreateWindowBuffer: depth mismatch between visual (%d) and window (%d)!\n", + GET_VISUAL_DEPTH(v), depth); + return NULL; + } + + /* Find colormap */ +#ifdef XFree86Server + cmap = (ColormapPtr)LookupIDByType(wColormap(w), RT_COLORMAP); +#else + if (attr.colormap) { + cmap = attr.colormap; + } + else { + _mesa_warning(NULL, "Window %u has no colormap!\n", (unsigned int) w); + /* this is weird, a window w/out a colormap!? */ + /* OK, let's just allocate a new one and hope for the best */ + cmap = XCreateColormap(v->display, w, attr.visual, AllocNone); + } +#endif + + b = create_xmesa_buffer((XMesaDrawable) w, WINDOW, v, cmap); + if (!b) + return NULL; + + if (!initialize_visual_and_buffer( v, b, v->mesa_visual.rgbMode, + (XMesaDrawable) w, cmap )) { + xmesa_free_buffer(b); + return NULL; + } + + return b; +} + + + +/** + * Create a new XMesaBuffer from an X pixmap. + * + * \param v the XMesaVisual + * \param p the pixmap + * \param cmap the colormap, may be 0 if using a \c GLX_TRUE_COLOR or + * \c GLX_DIRECT_COLOR visual for the pixmap + * \returns new XMesaBuffer or NULL if error + */ +PUBLIC XMesaBuffer +XMesaCreatePixmapBuffer(XMesaVisual v, XMesaPixmap p, XMesaColormap cmap) +{ + XMesaBuffer b; + + assert(v); + + b = create_xmesa_buffer((XMesaDrawable) p, PIXMAP, v, cmap); + if (!b) + return NULL; + + if (!initialize_visual_and_buffer(v, b, v->mesa_visual.rgbMode, + (XMesaDrawable) p, cmap)) { + xmesa_free_buffer(b); + return NULL; + } + + return b; +} + + +/** + * For GLX_EXT_texture_from_pixmap + */ +XMesaBuffer +XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap p, + XMesaColormap cmap, + int format, int target, int mipmap) +{ + GET_CURRENT_CONTEXT(ctx); + XMesaBuffer b; + GLuint width, height; + + assert(v); + + b = create_xmesa_buffer((XMesaDrawable) p, PIXMAP, v, cmap); + if (!b) + return NULL; + + /* get pixmap size, update framebuffer/renderbuffer dims */ + xmesa_get_window_size(v->display, b, &width, &height); + _mesa_resize_framebuffer(NULL, &(b->stfb->Base), width, height); + + if (target == 0) { + /* examine dims */ + if (ctx->Extensions.ARB_texture_non_power_of_two) { + target = GLX_TEXTURE_2D_EXT; + } + else if ( _mesa_bitcount(width) == 1 + && _mesa_bitcount(height) == 1) { + /* power of two size */ + if (height == 1) { + target = GLX_TEXTURE_1D_EXT; + } + else { + target = GLX_TEXTURE_2D_EXT; + } + } + else if (ctx->Extensions.NV_texture_rectangle) { + target = GLX_TEXTURE_RECTANGLE_EXT; + } + else { + /* non power of two textures not supported */ + XMesaDestroyBuffer(b); + return 0; + } + } + + b->TextureTarget = target; + b->TextureFormat = format; + b->TextureMipmap = mipmap; + + if (!initialize_visual_and_buffer(v, b, v->mesa_visual.rgbMode, + (XMesaDrawable) p, cmap)) { + xmesa_free_buffer(b); + return NULL; + } + + return b; +} + + + +XMesaBuffer +XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap, + unsigned int width, unsigned int height) +{ +#ifndef XFree86Server + XMesaWindow root; + XMesaDrawable drawable; /* X Pixmap Drawable */ + XMesaBuffer b; + + /* allocate pixmap for front buffer */ + root = RootWindow( v->display, v->visinfo->screen ); + drawable = XCreatePixmap(v->display, root, width, height, + v->visinfo->depth); + if (!drawable) + return NULL; + + b = create_xmesa_buffer(drawable, PBUFFER, v, cmap); + if (!b) + return NULL; + + if (!initialize_visual_and_buffer(v, b, v->mesa_visual.rgbMode, + drawable, cmap)) { + xmesa_free_buffer(b); + return NULL; + } + + return b; +#else + return 0; +#endif +} + + + +/* + * Deallocate an XMesaBuffer structure and all related info. + */ +PUBLIC void +XMesaDestroyBuffer(XMesaBuffer b) +{ + xmesa_free_buffer(b); +} + + +/** + * Query the current window size and update the corresponding GLframebuffer + * and all attached renderbuffers. + * Called when: + * 1. the first time a buffer is bound to a context. + * 2. from the XMesaResizeBuffers() API function. + * 3. SwapBuffers. XXX probabaly from xm_flush_frontbuffer() too... + * Note: it's possible (and legal) for xmctx to be NULL. That can happen + * when resizing a buffer when no rendering context is bound. + */ +void +xmesa_check_and_update_buffer_size(XMesaContext xmctx, XMesaBuffer drawBuffer) +{ + GLuint width, height; + xmesa_get_window_size(drawBuffer->xm_visual->display, drawBuffer, &width, &height); + st_resize_framebuffer(drawBuffer->stfb, width, height); +} + + +/* + * Bind buffer b to context c and make c the current rendering context. + */ +GLboolean XMesaMakeCurrent( XMesaContext c, XMesaBuffer b ) +{ + return XMesaMakeCurrent2( c, b, b ); +} + + +/* + * Bind buffer b to context c and make c the current rendering context. + */ +PUBLIC +GLboolean XMesaMakeCurrent2( XMesaContext c, XMesaBuffer drawBuffer, + XMesaBuffer readBuffer ) +{ + if (c) { + if (!drawBuffer || !readBuffer) + return GL_FALSE; /* must specify buffers! */ + +#if 0 + /* XXX restore this optimization */ + if (&(c->mesa) == _mesa_get_current_context() + && c->mesa.DrawBuffer == &drawBuffer->mesa_buffer + && c->mesa.ReadBuffer == &readBuffer->mesa_buffer + && xmesa_buffer(c->mesa.DrawBuffer)->wasCurrent) { + /* same context and buffer, do nothing */ + return GL_TRUE; + } +#endif + + c->xm_buffer = drawBuffer; + + /* Call this periodically to detect when the user has begun using + * GL rendering from multiple threads. + */ + _glapi_check_multithread(); + + st_make_current(c->st, drawBuffer->stfb, readBuffer->stfb); + + xmesa_check_and_update_buffer_size(c, drawBuffer); + if (readBuffer != drawBuffer) + xmesa_check_and_update_buffer_size(c, readBuffer); + + /* Solution to Stephane Rehel's problem with glXReleaseBuffersMESA(): */ + drawBuffer->wasCurrent = GL_TRUE; + } + else { + /* Detach */ + st_make_current( NULL, NULL, NULL ); + } + return GL_TRUE; +} + + +/* + * Unbind the context c from its buffer. + */ +GLboolean XMesaUnbindContext( XMesaContext c ) +{ + /* A no-op for XFree86 integration purposes */ + return GL_TRUE; +} + + +XMesaContext XMesaGetCurrentContext( void ) +{ + GET_CURRENT_CONTEXT(ctx); + if (ctx) { + XMesaContext xmesa = xmesa_context(ctx); + return xmesa; + } + else { + return 0; + } +} + + +XMesaBuffer XMesaGetCurrentBuffer( void ) +{ + GET_CURRENT_CONTEXT(ctx); + if (ctx) { + XMesaBuffer xmbuf = xmesa_buffer(ctx->DrawBuffer); + return xmbuf; + } + else { + return 0; + } +} + + +/* New in Mesa 3.1 */ +XMesaBuffer XMesaGetCurrentReadBuffer( void ) +{ + GET_CURRENT_CONTEXT(ctx); + if (ctx) { + return xmesa_buffer(ctx->ReadBuffer); + } + else { + return 0; + } +} + + +#ifdef XFree86Server +PUBLIC +GLboolean XMesaForceCurrent(XMesaContext c) +{ + if (c) { + _glapi_set_dispatch(c->mesa.CurrentDispatch); + + if (&(c->mesa) != _mesa_get_current_context()) { + _mesa_make_current(&c->mesa, c->mesa.DrawBuffer, c->mesa.ReadBuffer); + } + } + else { + _mesa_make_current(NULL, NULL, NULL); + } + return GL_TRUE; +} + + +PUBLIC +GLboolean XMesaLoseCurrent(XMesaContext c) +{ + (void) c; + _mesa_make_current(NULL, NULL, NULL); + return GL_TRUE; +} + + +PUBLIC +GLboolean XMesaCopyContext( XMesaContext xm_src, XMesaContext xm_dst, GLuint mask ) +{ + _mesa_copy_context(&xm_src->mesa, &xm_dst->mesa, mask); + return GL_TRUE; +} +#endif /* XFree86Server */ + + +#ifndef FX +GLboolean XMesaSetFXmode( GLint mode ) +{ + (void) mode; + return GL_FALSE; +} +#endif + + + +/* + * Copy the back buffer to the front buffer. If there's no back buffer + * this is a no-op. + */ +PUBLIC +void XMesaSwapBuffers( XMesaBuffer b ) +{ + struct pipe_surface *surf; + + /* If we're swapping the buffer associated with the current context + * we have to flush any pending rendering commands first. + */ + st_notify_swapbuffers(b->stfb); + + surf = st_get_framebuffer_surface(b->stfb, ST_SURFACE_BACK_LEFT); + if (surf) { + if (xmesa_mode == XMESA_AUB) + xmesa_display_aub( surf ); + else + xmesa_display_surface(b, surf); + } + + xmesa_check_and_update_buffer_size(NULL, b); +} + + + +/* + * Copy sub-region of back buffer to front buffer + */ +void XMesaCopySubBuffer( XMesaBuffer b, int x, int y, int width, int height ) +{ + struct pipe_surface *surf_front + = st_get_framebuffer_surface(b->stfb, ST_SURFACE_FRONT_LEFT); + struct pipe_surface *surf_back + = st_get_framebuffer_surface(b->stfb, ST_SURFACE_BACK_LEFT); + struct pipe_context *pipe = NULL; /* XXX fix */ + + if (!surf_front || !surf_back) + return; + + pipe->surface_copy(pipe, + FALSE, + surf_front, x, y, /* dest */ + surf_back, x, y, /* src */ + width, height); +} + + + +/* + * Return the depth buffer associated with an XMesaBuffer. + * Input: b - the XMesa buffer handle + * Output: width, height - size of buffer in pixels + * bytesPerValue - bytes per depth value (2 or 4) + * buffer - pointer to depth buffer values + * Return: GL_TRUE or GL_FALSE to indicate success or failure. + */ +GLboolean XMesaGetDepthBuffer( XMesaBuffer b, GLint *width, GLint *height, + GLint *bytesPerValue, void **buffer ) +{ + *width = 0; + *height = 0; + *bytesPerValue = 0; + *buffer = 0; + return GL_FALSE; +} + + +void XMesaFlush( XMesaContext c ) +{ + if (c && c->xm_visual->display) { +#ifdef XFree86Server + /* NOT_NEEDED */ +#else + st_finish(c->st); + XSync( c->xm_visual->display, False ); +#endif + } +} + + + +const char *XMesaGetString( XMesaContext c, int name ) +{ + (void) c; + if (name==XMESA_VERSION) { + return "5.0"; + } + else if (name==XMESA_EXTENSIONS) { + return ""; + } + else { + return NULL; + } +} + + + +XMesaBuffer XMesaFindBuffer( XMesaDisplay *dpy, XMesaDrawable d ) +{ + XMesaBuffer b; + for (b=XMesaBufferList; b; b=b->Next) { + if (b->drawable == d && b->xm_visual->display == dpy) { + return b; + } + } + return NULL; +} + + +/** + * Free/destroy all XMesaBuffers associated with given display. + */ +void xmesa_destroy_buffers_on_display(XMesaDisplay *dpy) +{ + XMesaBuffer b, next; + for (b = XMesaBufferList; b; b = next) { + next = b->Next; + if (b->xm_visual->display == dpy) { + xmesa_free_buffer(b); + } + } +} + + +/* + * Look for XMesaBuffers whose X window has been destroyed. + * Deallocate any such XMesaBuffers. + */ +void XMesaGarbageCollect( void ) +{ + XMesaBuffer b, next; + for (b=XMesaBufferList; b; b=next) { + next = b->Next; + if (b->xm_visual && + b->xm_visual->display && + b->drawable && + b->type == WINDOW) { +#ifdef XFree86Server + /* NOT_NEEDED */ +#else + XSync(b->xm_visual->display, False); + if (!window_exists( b->xm_visual->display, b->drawable )) { + /* found a dead window, free the ancillary info */ + XMesaDestroyBuffer( b ); + } +#endif + } + } +} + + +unsigned long XMesaDitherColor( XMesaContext xmesa, GLint x, GLint y, + GLfloat red, GLfloat green, + GLfloat blue, GLfloat alpha ) +{ + /* no longer supported */ + return 0; +} + + +/* + * This is typically called when the window size changes and we need + * to reallocate the buffer's back/depth/stencil/accum buffers. + */ +PUBLIC void +XMesaResizeBuffers( XMesaBuffer b ) +{ + GET_CURRENT_CONTEXT(ctx); + XMesaContext xmctx = xmesa_context(ctx); + if (!xmctx) + return; + xmesa_check_and_update_buffer_size(xmctx, b); +} + + + + +PUBLIC void +XMesaBindTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer, + const int *attrib_list) +{ +} + + + +PUBLIC void +XMesaReleaseTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer) +{ +} + diff --git a/src/gallium/winsys/xlib/xm_image.c b/src/gallium/winsys/xlib/xm_image.c new file mode 100644 index 0000000000..087b4e4c3a --- /dev/null +++ b/src/gallium/winsys/xlib/xm_image.c @@ -0,0 +1,133 @@ +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin <kevin@precisioninsight.com> + * Brian Paul <brian@precisioninsight.com> + */ + +#include <stdlib.h> +#include <X11/Xmd.h> + +#include "glxheader.h" +#include "xmesaP.h" + +#ifdef XFree86Server + +#ifdef ROUNDUP +#undef ROUNDUP +#endif + +#define ROUNDUP(nbytes, pad) ((((nbytes) + ((pad)-1)) / (pad)) * ((pad)>>3)) + +XMesaImage *XMesaCreateImage(int bitsPerPixel, int width, int height, char *data) +{ + XMesaImage *image; + + image = (XMesaImage *)xalloc(sizeof(XMesaImage)); + + if (image) { + image->width = width; + image->height = height; + image->data = data; + /* Always pad to 32 bits */ + image->bytes_per_line = ROUNDUP((bitsPerPixel * width), 32); + image->bits_per_pixel = bitsPerPixel; + } + + return image; +} + +void XMesaDestroyImage(XMesaImage *image) +{ + if (image->data) + free(image->data); + xfree(image); +} + +unsigned long XMesaGetPixel(XMesaImage *image, int x, int y) +{ + CARD8 *row = (CARD8 *)(image->data + y*image->bytes_per_line); + CARD8 *i8; + CARD16 *i16; + CARD32 *i32; + switch (image->bits_per_pixel) { + case 8: + i8 = (CARD8 *)row; + return i8[x]; + break; + case 15: + case 16: + i16 = (CARD16 *)row; + return i16[x]; + break; + case 24: /* WARNING: architecture specific code */ + i8 = (CARD8 *)row; + return (((CARD32)i8[x*3]) | + (((CARD32)i8[x*3+1])<<8) | + (((CARD32)i8[x*3+2])<<16)); + break; + case 32: + i32 = (CARD32 *)row; + return i32[x]; + break; + } + return 0; +} + +#ifndef XMESA_USE_PUTPIXEL_MACRO +void XMesaPutPixel(XMesaImage *image, int x, int y, unsigned long pixel) +{ + CARD8 *row = (CARD8 *)(image->data + y*image->bytes_per_line); + CARD8 *i8; + CARD16 *i16; + CARD32 *i32; + switch (image->bits_per_pixel) { + case 8: + i8 = (CARD8 *)row; + i8[x] = (CARD8)pixel; + break; + case 15: + case 16: + i16 = (CARD16 *)row; + i16[x] = (CARD16)pixel; + break; + case 24: /* WARNING: architecture specific code */ + i8 = (CARD8 *)__row; + i8[x*3] = (CARD8)(p); + i8[x*3+1] = (CARD8)(p>>8); + i8[x*3+2] = (CARD8)(p>>16); + case 32: + i32 = (CARD32 *)row; + i32[x] = (CARD32)pixel; + break; + } +} +#endif + +#endif /* XFree86Server */ diff --git a/src/gallium/winsys/xlib/xm_image.h b/src/gallium/winsys/xlib/xm_image.h new file mode 100644 index 0000000000..2a5e0f3777 --- /dev/null +++ b/src/gallium/winsys/xlib/xm_image.h @@ -0,0 +1,77 @@ +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin <kevin@precisioninsight.com> + * Brian Paul <brian@precisioninsight.com> + */ + +#ifndef _XM_IMAGE_H_ +#define _XM_IMAGE_H_ + +#define XMESA_USE_PUTPIXEL_MACRO + +extern XMesaImage *XMesaCreateImage(int bitsPerPixel, int width, int height, + char *data); +extern void XMesaDestroyImage(XMesaImage *image); +extern unsigned long XMesaGetPixel(XMesaImage *image, int x, int y); +#ifdef XMESA_USE_PUTPIXEL_MACRO +#define XMesaPutPixel(__i,__x,__y,__p) \ +{ \ + CARD8 *__row = (CARD8 *)(__i->data + __y*__i->bytes_per_line); \ + CARD8 *__i8; \ + CARD16 *__i16; \ + CARD32 *__i32; \ + switch (__i->bits_per_pixel) { \ + case 8: \ + __i8 = (CARD8 *)__row; \ + __i8[__x] = (CARD8)__p; \ + break; \ + case 15: \ + case 16: \ + __i16 = (CARD16 *)__row; \ + __i16[__x] = (CARD16)__p; \ + break; \ + case 24: /* WARNING: architecture specific code */ \ + __i8 = (CARD8 *)__row; \ + __i8[__x*3] = (CARD8)(__p); \ + __i8[__x*3+1] = (CARD8)(__p>>8); \ + __i8[__x*3+2] = (CARD8)(__p>>16); \ + break; \ + case 32: \ + __i32 = (CARD32 *)__row; \ + __i32[__x] = (CARD32)__p; \ + break; \ + } \ +} +#else +extern void XMesaPutPixel(XMesaImage *image, int x, int y, + unsigned long pixel); +#endif + +#endif /* _XM_IMAGE_H_ */ diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c new file mode 100644 index 0000000000..acb5ad8f71 --- /dev/null +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -0,0 +1,719 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/* + * Authors: + * Keith Whitwell + * Brian Paul + */ + + +#include "glxheader.h" +#include "xmesaP.h" + +#undef ASSERT +#undef Elements + +#include "pipe/p_winsys.h" +#include "pipe/p_format.h" +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "softpipe/sp_winsys.h" + +#ifdef GALLIUM_CELL +#include "cell/ppu/cell_context.h" +#include "cell/ppu/cell_screen.h" +#include "cell/ppu/cell_winsys.h" +#else +#define TILE_SIZE 32 /* avoid compilation errors */ +#endif + +#ifdef GALLIUM_TRACE +#include "trace/tr_screen.h" +#include "trace/tr_context.h" +#endif + +#include "xm_winsys_aub.h" + + +/** + * Subclass of pipe_buffer for Xlib winsys. + * Low-level OS/window system memory buffer + */ +struct xm_buffer +{ + struct pipe_buffer base; + boolean userBuffer; /** Is this a user-space buffer? */ + void *data; + void *mapped; + + XImage *tempImage; + int shm; +#if defined(USE_XSHM) && !defined(XFree86Server) + XShmSegmentInfo shminfo; +#endif +}; + + +/** + * Subclass of pipe_winsys for Xlib winsys + */ +struct xmesa_pipe_winsys +{ + struct pipe_winsys base; + struct xmesa_visual *xm_visual; + int shm; +}; + + + +/** Cast wrapper */ +static INLINE struct xm_buffer * +xm_buffer( struct pipe_buffer *buf ) +{ + return (struct xm_buffer *)buf; +} + + +/** + * X Shared Memory Image extension code + */ +#if defined(USE_XSHM) && !defined(XFree86Server) + +#define XSHM_ENABLED(b) ((b)->shm) + +static volatile int mesaXErrorFlag = 0; + +/** + * Catches potential Xlib errors. + */ +static int +mesaHandleXError(XMesaDisplay *dpy, XErrorEvent *event) +{ + (void) dpy; + (void) event; + mesaXErrorFlag = 1; + return 0; +} + + +static GLboolean alloc_shm(struct xm_buffer *buf, unsigned size) +{ + XShmSegmentInfo *const shminfo = & buf->shminfo; + + shminfo->shmid = shmget(IPC_PRIVATE, size, IPC_CREAT|0777); + if (shminfo->shmid < 0) { + return GL_FALSE; + } + + shminfo->shmaddr = (char *) shmat(shminfo->shmid, 0, 0); + if (shminfo->shmaddr == (char *) -1) { + shmctl(shminfo->shmid, IPC_RMID, 0); + return GL_FALSE; + } + + shminfo->readOnly = False; + return GL_TRUE; +} + + +/** + * Allocate a shared memory XImage back buffer for the given XMesaBuffer. + */ +static void +alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb, + unsigned width, unsigned height) +{ + /* + * We have to do a _lot_ of error checking here to be sure we can + * really use the XSHM extension. It seems different servers trigger + * errors at different points if the extension won't work. Therefore + * we have to be very careful... + */ +#if 0 + GC gc; +#endif + int (*old_handler)(XMesaDisplay *, XErrorEvent *); + + b->tempImage = XShmCreateImage(xmb->xm_visual->display, + xmb->xm_visual->visinfo->visual, + xmb->xm_visual->visinfo->depth, + ZPixmap, + NULL, + &b->shminfo, + width, height); + if (b->tempImage == NULL) { + b->shm = 0; + return; + } + + + mesaXErrorFlag = 0; + old_handler = XSetErrorHandler(mesaHandleXError); + /* This may trigger the X protocol error we're ready to catch: */ + XShmAttach(xmb->xm_visual->display, &b->shminfo); + XSync(xmb->xm_visual->display, False); + + if (mesaXErrorFlag) { + /* we are on a remote display, this error is normal, don't print it */ + XFlush(xmb->xm_visual->display); + mesaXErrorFlag = 0; + XDestroyImage(b->tempImage); + b->tempImage = NULL; + b->shm = 0; + (void) XSetErrorHandler(old_handler); + return; + } + + + /* Finally, try an XShmPutImage to be really sure the extension works */ +#if 0 + gc = XCreateGC(xmb->xm_visual->display, xmb->drawable, 0, NULL); + XShmPutImage(xmb->xm_visual->display, xmb->drawable, gc, + b->tempImage, 0, 0, 0, 0, 1, 1 /*one pixel*/, False); + XSync(xmb->xm_visual->display, False); + XFreeGC(xmb->xm_visual->display, gc); + (void) XSetErrorHandler(old_handler); + if (mesaXErrorFlag) { + XFlush(xmb->xm_visual->display); + mesaXErrorFlag = 0; + XDestroyImage(b->tempImage); + b->tempImage = NULL; + b->shm = 0; + return; + } +#endif +} + +#else + +#define XSHM_ENABLED(b) 0 + +static void +alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb, + unsigned width, unsigned height) +{ + b->shm = 0; +} +#endif /* USE_XSHM */ + + + + +/* Most callbacks map direcly onto dri_bufmgr operations: + */ +static void * +xm_buffer_map(struct pipe_winsys *pws, struct pipe_buffer *buf, + unsigned flags) +{ + struct xm_buffer *xm_buf = xm_buffer(buf); + xm_buf->mapped = xm_buf->data; + return xm_buf->mapped; +} + +static void +xm_buffer_unmap(struct pipe_winsys *pws, struct pipe_buffer *buf) +{ + struct xm_buffer *xm_buf = xm_buffer(buf); + xm_buf->mapped = NULL; +} + +static void +xm_buffer_destroy(struct pipe_winsys *pws, + struct pipe_buffer *buf) +{ + struct xm_buffer *oldBuf = xm_buffer(buf); + + if (oldBuf->data) { +#if defined(USE_XSHM) && !defined(XFree86Server) + if (oldBuf->shminfo.shmid >= 0) { + shmdt(oldBuf->shminfo.shmaddr); + shmctl(oldBuf->shminfo.shmid, IPC_RMID, 0); + + oldBuf->shminfo.shmid = -1; + oldBuf->shminfo.shmaddr = (char *) -1; + } + else +#endif + { + if (!oldBuf->userBuffer) { + align_free(oldBuf->data); + } + } + + oldBuf->data = NULL; + } + + free(oldBuf); +} + + +/** + * For Cell. Basically, rearrange the pixels/quads from this layout: + * +--+--+--+--+ + * |p0|p1|p2|p3|.... + * +--+--+--+--+ + * + * to this layout: + * +--+--+ + * |p0|p1|.... + * +--+--+ + * |p2|p3| + * +--+--+ + */ +static void +twiddle_tile(const uint *tileIn, uint *tileOut) +{ + int y, x; + + for (y = 0; y < TILE_SIZE; y+=2) { + for (x = 0; x < TILE_SIZE; x+=2) { + int k = 4 * (y/2 * TILE_SIZE/2 + x/2); + tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k]; + tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1]; + tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2]; + tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3]; + } + } +} + + + +/** + * Display a surface that's in a tiled configuration. That is, all the + * pixels for a TILE_SIZExTILE_SIZE block are contiguous in memory. + */ +static void +xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf) +{ + XImage *ximage; + struct xm_buffer *xm_buf = xm_buffer(surf->buffer); + const uint tilesPerRow = (surf->width + TILE_SIZE - 1) / TILE_SIZE; + uint x, y; + + if (XSHM_ENABLED(xm_buf) && (xm_buf->tempImage == NULL)) { + alloc_shm_ximage(xm_buf, b, TILE_SIZE, TILE_SIZE); + } + + ximage = (XSHM_ENABLED(xm_buf)) ? xm_buf->tempImage : b->tempImage; + + /* check that the XImage has been previously initialized */ + assert(ximage->format); + assert(ximage->bitmap_unit); + + if (!XSHM_ENABLED(xm_buf)) { + /* update XImage's fields */ + ximage->width = TILE_SIZE; + ximage->height = TILE_SIZE; + ximage->bytes_per_line = TILE_SIZE * 4; + } + + for (y = 0; y < surf->height; y += TILE_SIZE) { + for (x = 0; x < surf->width; x += TILE_SIZE) { + uint tmpTile[TILE_SIZE * TILE_SIZE]; + int tx = x / TILE_SIZE; + int ty = y / TILE_SIZE; + int offset = ty * tilesPerRow + tx; + int w = TILE_SIZE; + int h = TILE_SIZE; + + if (y + h > surf->height) + h = surf->height - y; + if (x + w > surf->width) + w = surf->width - x; + + /* offset in pixels */ + offset *= TILE_SIZE * TILE_SIZE; + + if (0 && XSHM_ENABLED(xm_buf)) { + ximage->data = (char *) xm_buf->data + 4 * offset; + /* make copy of tile data */ + memcpy(tmpTile, (uint *) ximage->data, sizeof(tmpTile)); + /* twiddle from temp to ximage in shared memory */ + twiddle_tile(tmpTile, (uint *) ximage->data); + /* display image in shared memory */ +#if defined(USE_XSHM) && !defined(XFree86Server) + XShmPutImage(b->xm_visual->display, b->drawable, b->gc, + ximage, 0, 0, x, y, w, h, False); +#endif + } + else { + /* twiddle from ximage buffer to temp tile */ + twiddle_tile((uint *) xm_buf->data + offset, tmpTile); + /* display temp tile data */ + ximage->data = (char *) tmpTile; + XPutImage(b->xm_visual->display, b->drawable, b->gc, + ximage, 0, 0, x, y, w, h); + } + } + } +} + + +/** + * Display/copy the image in the surface into the X window specified + * by the XMesaBuffer. + */ +void +xmesa_display_surface(XMesaBuffer b, const struct pipe_surface *surf) +{ + XImage *ximage; + struct xm_buffer *xm_buf = xm_buffer(surf->buffer); + static boolean no_swap = 0; + static boolean firsttime = 1; + static int tileSize = 0; + + if (firsttime) { + no_swap = getenv("SP_NO_RAST") != NULL; +#ifdef GALLIUM_CELL + if (!getenv("GALLIUM_NOCELL")) { + tileSize = 32; /** probably temporary */ + } +#endif + firsttime = 0; + } + + if (no_swap) + return; + + if (tileSize) { + xmesa_display_surface_tiled(b, surf); + return; + } + + if (XSHM_ENABLED(xm_buf) && (xm_buf->tempImage == NULL)) { + assert(surf->block.width == 1); + assert(surf->block.height == 1); + alloc_shm_ximage(xm_buf, b, surf->stride/surf->block.size, surf->height); + } + + ximage = (XSHM_ENABLED(xm_buf)) ? xm_buf->tempImage : b->tempImage; + ximage->data = xm_buf->data; + + /* display image in Window */ + if (XSHM_ENABLED(xm_buf)) { +#if defined(USE_XSHM) && !defined(XFree86Server) + XShmPutImage(b->xm_visual->display, b->drawable, b->gc, + ximage, 0, 0, 0, 0, surf->width, surf->height, False); +#endif + } else { + /* check that the XImage has been previously initialized */ + assert(ximage->format); + assert(ximage->bitmap_unit); + + /* update XImage's fields */ + ximage->width = surf->width; + ximage->height = surf->height; + ximage->bytes_per_line = surf->stride; + + XPutImage(b->xm_visual->display, b->drawable, b->gc, + ximage, 0, 0, 0, 0, surf->width, surf->height); + } +} + + +static void +xm_flush_frontbuffer(struct pipe_winsys *pws, + struct pipe_surface *surf, + void *context_private) +{ + /* + * The front color buffer is actually just another XImage buffer. + * This function copies that XImage to the actual X Window. + */ + XMesaContext xmctx = (XMesaContext) context_private; + xmesa_display_surface(xmctx->xm_buffer, surf); +} + + + +static const char * +xm_get_name(struct pipe_winsys *pws) +{ + return "Xlib"; +} + + +static struct pipe_buffer * +xm_buffer_create(struct pipe_winsys *pws, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct xm_buffer *buffer = CALLOC_STRUCT(xm_buffer); +#if defined(USE_XSHM) && !defined(XFree86Server) + struct xmesa_pipe_winsys *xpws = (struct xmesa_pipe_winsys *) pws; +#endif + + buffer->base.refcount = 1; + buffer->base.alignment = alignment; + buffer->base.usage = usage; + buffer->base.size = size; + + +#if defined(USE_XSHM) && !defined(XFree86Server) + buffer->shminfo.shmid = -1; + buffer->shminfo.shmaddr = (char *) -1; + + if (xpws->shm && (usage & PIPE_BUFFER_USAGE_PIXEL) != 0) { + buffer->shm = xpws->shm; + + if (alloc_shm(buffer, size)) { + buffer->data = buffer->shminfo.shmaddr; + } + } +#endif + + if (buffer->data == NULL) { + buffer->shm = 0; + + /* align to 16-byte multiple for Cell */ + buffer->data = align_malloc(size, max(alignment, 16)); + } + + return &buffer->base; +} + + +/** + * Create buffer which wraps user-space data. + */ +static struct pipe_buffer * +xm_user_buffer_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) +{ + struct xm_buffer *buffer = CALLOC_STRUCT(xm_buffer); + buffer->base.refcount = 1; + buffer->base.size = bytes; + buffer->userBuffer = TRUE; + buffer->data = ptr; + buffer->shm = 0; + + return &buffer->base; +} + + + +/** + * Round n up to next multiple. + */ +static INLINE unsigned +round_up(unsigned n, unsigned multiple) +{ + return (n + multiple - 1) & ~(multiple - 1); +} + +static int +xm_surface_alloc_storage(struct pipe_winsys *winsys, + struct pipe_surface *surf, + unsigned width, unsigned height, + enum pipe_format format, + unsigned flags, + unsigned tex_usage) +{ + const unsigned alignment = 64; + + surf->width = width; + surf->height = height; + surf->format = format; + pf_get_block(format, &surf->block); + surf->nblocksx = pf_get_nblocksx(&surf->block, width); + surf->nblocksy = pf_get_nblocksy(&surf->block, height); + surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); + surf->usage = flags; + + assert(!surf->buffer); + surf->buffer = winsys->buffer_create(winsys, alignment, + PIPE_BUFFER_USAGE_PIXEL, +#ifdef GALLIUM_CELL /* XXX a bit of a hack */ + surf->stride * round_up(surf->nblocksy, TILE_SIZE)); +#else + surf->stride * surf->nblocksy); +#endif + + if(!surf->buffer) + return -1; + + return 0; +} + + +/** + * Called via winsys->surface_alloc() to create new surfaces. + */ +static struct pipe_surface * +xm_surface_alloc(struct pipe_winsys *ws) +{ + struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); + + assert(ws); + + surface->refcount = 1; + surface->winsys = ws; + + return surface; +} + + + +static void +xm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) +{ + struct pipe_surface *surf = *s; + assert(!surf->texture); + surf->refcount--; + if (surf->refcount == 0) { + if (surf->buffer) + winsys_buffer_reference(winsys, &surf->buffer, NULL); + free(surf); + } + *s = NULL; +} + + +/* + * Fence functions - basically nothing to do, as we don't create any actual + * fence objects. + */ + +static void +xm_fence_reference(struct pipe_winsys *sws, struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ +} + + +static int +xm_fence_signalled(struct pipe_winsys *sws, struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + +static int +xm_fence_finish(struct pipe_winsys *sws, struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + +/** + * Return pointer to a pipe_winsys object. + * For Xlib, this is a singleton object. + * Nothing special for the Xlib driver so no subclassing or anything. + */ +struct pipe_winsys * +xmesa_get_pipe_winsys_aub(struct xmesa_visual *xm_vis) +{ + static struct xmesa_pipe_winsys *ws = NULL; + + if (!ws) { + ws = (struct xmesa_pipe_winsys *) xmesa_create_pipe_winsys_aub(); + } + return &ws->base; +} + + +static struct pipe_winsys * +xmesa_get_pipe_winsys(struct xmesa_visual *xm_vis) +{ + static struct xmesa_pipe_winsys *ws = NULL; + + if (!ws) { + ws = CALLOC_STRUCT(xmesa_pipe_winsys); + + ws->xm_visual = xm_vis; + ws->shm = xmesa_check_for_xshm(xm_vis->display); + + /* Fill in this struct with callbacks that pipe will need to + * communicate with the window system, buffer manager, etc. + */ + ws->base.buffer_create = xm_buffer_create; + ws->base.user_buffer_create = xm_user_buffer_create; + ws->base.buffer_map = xm_buffer_map; + ws->base.buffer_unmap = xm_buffer_unmap; + ws->base.buffer_destroy = xm_buffer_destroy; + + ws->base.surface_alloc = xm_surface_alloc; + ws->base.surface_alloc_storage = xm_surface_alloc_storage; + ws->base.surface_release = xm_surface_release; + + ws->base.fence_reference = xm_fence_reference; + ws->base.fence_signalled = xm_fence_signalled; + ws->base.fence_finish = xm_fence_finish; + + ws->base.flush_frontbuffer = xm_flush_frontbuffer; + ws->base.get_name = xm_get_name; + } + + return &ws->base; +} + + +struct pipe_context * +xmesa_create_pipe_context(XMesaContext xmesa, uint pixelformat) +{ + struct pipe_winsys *pws; + struct pipe_context *pipe; + + if (getenv("XM_AUB")) { + pws = xmesa_get_pipe_winsys_aub(xmesa->xm_visual); + } + else { + pws = xmesa_get_pipe_winsys(xmesa->xm_visual); + } + +#ifdef GALLIUM_CELL + if (!getenv("GALLIUM_NOCELL")) { + struct cell_winsys *cws = cell_get_winsys(pixelformat); + struct pipe_screen *screen = cell_create_screen(pws); + + pipe = cell_create_context(screen, cws); + } + else +#endif + { + struct pipe_screen *screen = softpipe_create_screen(pws); + + pipe = softpipe_create(screen, pws, NULL); + +#ifdef GALLIUM_TRACE + screen = trace_screen_create(screen); + + pipe = trace_context_create(screen, pipe); +#endif + } + + if (pipe) + pipe->priv = xmesa; + + return pipe; +} diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c new file mode 100644 index 0000000000..b7c10b6bca --- /dev/null +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -0,0 +1,586 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/* + * Authors: + * Keith Whitwell + * Brian Paul + */ + + +#include "glxheader.h" +#include "xmesaP.h" + +#include "pipe/p_winsys.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "i965simple/brw_winsys.h" +#include "i965simple/brw_screen.h" +#include "brw_aub.h" +#include "xm_winsys_aub.h" + + + +struct aub_buffer { + char *data; + unsigned offset; + unsigned size; + unsigned refcount; + unsigned map_count; + boolean dump_on_unmap; +}; + + + +struct aub_pipe_winsys { + struct pipe_winsys winsys; + + struct brw_aubfile *aubfile; + + /* This is simple, isn't it: + */ + char *pool; + unsigned size; + unsigned used; +}; + + +/* Turn a pipe winsys into an aub/pipe winsys: + */ +static inline struct aub_pipe_winsys * +aub_pipe_winsys( struct pipe_winsys *winsys ) +{ + return (struct aub_pipe_winsys *)winsys; +} + + + +static INLINE struct aub_buffer * +aub_bo( struct pipe_buffer *bo ) +{ + return (struct aub_buffer *)bo; +} + +static INLINE struct pipe_buffer * +pipe_bo( struct aub_buffer *bo ) +{ + return (struct pipe_buffer *)bo; +} + + + + +static void *aub_buffer_map(struct pipe_winsys *winsys, + struct pipe_buffer *buf, + unsigned flags ) +{ + struct aub_buffer *sbo = aub_bo(buf); + + assert(sbo->data); + + if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) + sbo->dump_on_unmap = 1; + + sbo->map_count++; + return sbo->data; +} + +static void aub_buffer_unmap(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); + struct aub_buffer *sbo = aub_bo(buf); + + sbo->map_count--; + + if (sbo->map_count == 0 && + sbo->dump_on_unmap) { + + sbo->dump_on_unmap = 0; + + brw_aub_gtt_data( iws->aubfile, + sbo->offset, + sbo->data, + sbo->size, + 0, + 0); + } +} + + +static void +aub_buffer_destroy(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + free(buf); +} + + +void xmesa_buffer_subdata_aub(struct pipe_winsys *winsys, + struct pipe_buffer *buf, + unsigned long offset, + unsigned long size, + const void *data, + unsigned aub_type, + unsigned aub_sub_type) +{ + struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); + struct aub_buffer *sbo = aub_bo(buf); + + assert(sbo->size > offset + size); + memcpy(sbo->data + offset, data, size); + + brw_aub_gtt_data( iws->aubfile, + sbo->offset + offset, + sbo->data + offset, + size, + aub_type, + aub_sub_type ); +} + +void xmesa_commands_aub(struct pipe_winsys *winsys, + unsigned *cmds, + unsigned nr_dwords) +{ + struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); + unsigned size = nr_dwords * 4; + + assert(iws->used + size < iws->size); + + brw_aub_gtt_cmds( iws->aubfile, + AUB_BUF_START + iws->used, + cmds, + nr_dwords * sizeof(int) ); + + iws->used += align(size, 4096); +} + + +static struct aub_pipe_winsys *global_winsys = NULL; + +void xmesa_display_aub( /* struct pipe_winsys *winsys, */ + struct pipe_surface *surface ) +{ +// struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); + brw_aub_dump_bmp( global_winsys->aubfile, + surface, + aub_bo(surface->buffer)->offset ); +} + + + +/* Pipe has no concept of pools. We choose the tex/region pool + * for all buffers. + */ +static struct pipe_buffer * +aub_buffer_create(struct pipe_winsys *winsys, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); + struct aub_buffer *sbo = CALLOC_STRUCT(aub_buffer); + + sbo->refcount = 1; + + /* Could reuse buffers that are not referenced in current + * batchbuffer. Can't do that atm, so always reallocate: + */ + assert(iws->used + size < iws->size); + sbo->data = iws->pool + iws->used; + sbo->offset = AUB_BUF_START + iws->used; + iws->used += align(size, 4096); + + sbo->size = size; + + return pipe_bo(sbo); +} + + +static struct pipe_buffer * +aub_user_buffer_create(struct pipe_winsys *winsys, void *ptr, unsigned bytes) +{ + struct aub_buffer *sbo; + + /* Lets hope this is meant for upload, not as a result! + */ + sbo = aub_bo(aub_buffer_create( winsys, 0, 0, 0 )); + + sbo->data = ptr; + sbo->size = bytes; + + return pipe_bo(sbo); +} + + +/* The state tracker (should!) keep track of whether the fake + * frontbuffer has been touched by any rendering since the last time + * we copied its contents to the real frontbuffer. Our task is easy: + */ +static void +aub_flush_frontbuffer( struct pipe_winsys *winsys, + struct pipe_surface *surf, + void *context_private) +{ + xmesa_display_aub( surf ); +} + +static struct pipe_surface * +aub_i915_surface_alloc(struct pipe_winsys *winsys) +{ + struct pipe_surface *surf = CALLOC_STRUCT(pipe_surface); + if (surf) { + surf->refcount = 1; + surf->winsys = winsys; + } + return surf; +} + + +/** + * Round n up to next multiple. + */ +static INLINE unsigned +round_up(unsigned n, unsigned multiple) +{ + return (n + multiple - 1) & ~(multiple - 1); +} + +static int +aub_i915_surface_alloc_storage(struct pipe_winsys *winsys, + struct pipe_surface *surf, + unsigned width, unsigned height, + enum pipe_format format, + unsigned flags, + unsigned tex_usage) +{ + const unsigned alignment = 64; + + surf->width = width; + surf->height = height; + surf->format = format; + pf_get_block(format, &surf->block); + surf->nblocksx = pf_get_nblocksx(&surf->block, width); + surf->nblocksy = pf_get_nblocksy(&surf->block, height); + surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); + surf->usage = flags; + + assert(!surf->buffer); + surf->buffer = winsys->buffer_create(winsys, alignment, + PIPE_BUFFER_USAGE_PIXEL, + surf->stride * surf->nblocksy); + if(!surf->buffer) + return -1; + + return 0; +} + +static void +aub_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) +{ + struct pipe_surface *surf = *s; + surf->refcount--; + if (surf->refcount == 0) { + if (surf->buffer) + winsys_buffer_reference(winsys, &surf->buffer, NULL); + free(surf); + } + *s = NULL; +} + + + +static const char * +aub_get_name( struct pipe_winsys *winsys ) +{ + return "Aub/xlib"; +} + +struct pipe_winsys * +xmesa_create_pipe_winsys_aub( void ) +{ + struct aub_pipe_winsys *iws = CALLOC_STRUCT( aub_pipe_winsys ); + + /* Fill in this struct with callbacks that pipe will need to + * communicate with the window system, buffer manager, etc. + * + * Pipe would be happy with a malloc based memory manager, but + * the SwapBuffers implementation in this winsys driver requires + * that rendering be done to an appropriate _DriBufferObject. + */ + iws->winsys.buffer_create = aub_buffer_create; + iws->winsys.user_buffer_create = aub_user_buffer_create; + iws->winsys.buffer_map = aub_buffer_map; + iws->winsys.buffer_unmap = aub_buffer_unmap; + iws->winsys.buffer_destroy = aub_buffer_destroy; + iws->winsys.flush_frontbuffer = aub_flush_frontbuffer; + iws->winsys.get_name = aub_get_name; + + iws->winsys.surface_alloc = aub_i915_surface_alloc; + iws->winsys.surface_alloc_storage = aub_i915_surface_alloc_storage; + iws->winsys.surface_release = aub_i915_surface_release; + + iws->aubfile = brw_aubfile_create(); + iws->size = AUB_BUF_SIZE; + iws->pool = malloc(AUB_BUF_SIZE); + + /* HACK: static copy of this pointer: + */ + assert(global_winsys == NULL); + global_winsys = iws; + + return &iws->winsys; +} + + +void +xmesa_destroy_pipe_winsys_aub( struct pipe_winsys *winsys ) + +{ + struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); + brw_aub_destroy(iws->aubfile); + free(iws->pool); + free(iws); +} + + + + + + + +#define IWS_BATCHBUFFER_SIZE 1024 + +struct aub_brw_winsys { + struct brw_winsys winsys; /**< batch buffer funcs */ + struct aub_context *aub; + + struct pipe_winsys *pipe_winsys; + + unsigned batch_data[IWS_BATCHBUFFER_SIZE]; + unsigned batch_nr; + unsigned batch_size; + unsigned batch_alloc; +}; + + +/* Turn a i965simple winsys into an aub/i965simple winsys: + */ +static inline struct aub_brw_winsys * +aub_brw_winsys( struct brw_winsys *sws ) +{ + return (struct aub_brw_winsys *)sws; +} + + +/* Simple batchbuffer interface: + */ + +static unsigned *aub_i965_batch_start( struct brw_winsys *sws, + unsigned dwords, + unsigned relocs ) +{ + struct aub_brw_winsys *iws = aub_brw_winsys(sws); + + if (iws->batch_size < iws->batch_nr + dwords) + return NULL; + + iws->batch_alloc = iws->batch_nr + dwords; + return (void *)1; /* not a valid pointer! */ +} + +static void aub_i965_batch_dword( struct brw_winsys *sws, + unsigned dword ) +{ + struct aub_brw_winsys *iws = aub_brw_winsys(sws); + + assert(iws->batch_nr < iws->batch_alloc); + iws->batch_data[iws->batch_nr++] = dword; +} + +static void aub_i965_batch_reloc( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags, + unsigned delta ) +{ + struct aub_brw_winsys *iws = aub_brw_winsys(sws); + + assert(iws->batch_nr < iws->batch_alloc); + iws->batch_data[iws->batch_nr++] = aub_bo(buf)->offset + delta; +} + +static unsigned aub_i965_get_buffer_offset( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags ) +{ + return aub_bo(buf)->offset; +} + +static void aub_i965_batch_end( struct brw_winsys *sws ) +{ + struct aub_brw_winsys *iws = aub_brw_winsys(sws); + + assert(iws->batch_nr <= iws->batch_alloc); + iws->batch_alloc = 0; +} + +static void aub_i965_batch_flush( struct brw_winsys *sws, + struct pipe_fence_handle **fence ) +{ + struct aub_brw_winsys *iws = aub_brw_winsys(sws); + assert(iws->batch_nr <= iws->batch_size); + + if (iws->batch_nr) { + xmesa_commands_aub( iws->pipe_winsys, + iws->batch_data, + iws->batch_nr ); + } + + iws->batch_nr = 0; +} + + + +static void aub_i965_buffer_subdata_typed(struct brw_winsys *winsys, + struct pipe_buffer *buf, + unsigned long offset, + unsigned long size, + const void *data, + unsigned data_type) +{ + struct aub_brw_winsys *iws = aub_brw_winsys(winsys); + unsigned aub_type = DW_GENERAL_STATE; + unsigned aub_sub_type; + + switch (data_type) { + case BRW_CC_VP: + aub_sub_type = DWGS_COLOR_CALC_VIEWPORT_STATE; + break; + case BRW_CC_UNIT: + aub_sub_type = DWGS_COLOR_CALC_STATE; + break; + case BRW_WM_PROG: + aub_sub_type = DWGS_KERNEL_INSTRUCTIONS; + break; + case BRW_SAMPLER_DEFAULT_COLOR: + aub_sub_type = DWGS_SAMPLER_DEFAULT_COLOR; + break; + case BRW_SAMPLER: + aub_sub_type = DWGS_SAMPLER_STATE; + break; + case BRW_WM_UNIT: + aub_sub_type = DWGS_WINDOWER_IZ_STATE; + break; + case BRW_SF_PROG: + aub_sub_type = DWGS_KERNEL_INSTRUCTIONS; + break; + case BRW_SF_VP: + aub_sub_type = DWGS_STRIPS_FANS_VIEWPORT_STATE; + break; + case BRW_SF_UNIT: + aub_sub_type = DWGS_STRIPS_FANS_STATE; + break; + case BRW_VS_UNIT: + aub_sub_type = DWGS_VERTEX_SHADER_STATE; + break; + case BRW_VS_PROG: + aub_sub_type = DWGS_KERNEL_INSTRUCTIONS; + break; + case BRW_GS_UNIT: + aub_sub_type = DWGS_GEOMETRY_SHADER_STATE; + break; + case BRW_GS_PROG: + aub_sub_type = DWGS_KERNEL_INSTRUCTIONS; + break; + case BRW_CLIP_VP: + aub_sub_type = DWGS_CLIPPER_VIEWPORT_STATE; + break; + case BRW_CLIP_UNIT: + aub_sub_type = DWGS_CLIPPER_STATE; + break; + case BRW_CLIP_PROG: + aub_sub_type = DWGS_KERNEL_INSTRUCTIONS; + break; + case BRW_SS_SURFACE: + aub_type = DW_SURFACE_STATE; + aub_sub_type = DWSS_SURFACE_STATE; + break; + case BRW_SS_SURF_BIND: + aub_type = DW_SURFACE_STATE; + aub_sub_type = DWSS_BINDING_TABLE_STATE; + break; + case BRW_CONSTANT_BUFFER: + aub_type = DW_CONSTANT_URB_ENTRY; + aub_sub_type = 0; + break; + + default: + assert(0); + break; + } + + xmesa_buffer_subdata_aub( iws->pipe_winsys, + buf, + offset, + size, + data, + aub_type, + aub_sub_type ); +} + +/** + * Create i965 hardware rendering context. + */ +struct pipe_context * +xmesa_create_i965simple( struct pipe_winsys *winsys ) +{ + struct aub_brw_winsys *iws = CALLOC_STRUCT( aub_brw_winsys ); + struct pipe_screen *screen = brw_create_screen(winsys, 0/* XXX pci_id */); + + /* Fill in this struct with callbacks that i965simple will need to + * communicate with the window system, buffer manager, etc. + */ + iws->winsys.batch_start = aub_i965_batch_start; + iws->winsys.batch_dword = aub_i965_batch_dword; + iws->winsys.batch_reloc = aub_i965_batch_reloc; + iws->winsys.batch_end = aub_i965_batch_end; + iws->winsys.batch_flush = aub_i965_batch_flush; + iws->winsys.buffer_subdata_typed = aub_i965_buffer_subdata_typed; + iws->winsys.get_buffer_offset = aub_i965_get_buffer_offset; + + iws->pipe_winsys = winsys; + + iws->batch_size = IWS_BATCHBUFFER_SIZE; + + /* Create the i965simple context: + */ + return brw_create( screen, + &iws->winsys, + 0 ); +} diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.h b/src/gallium/winsys/xlib/xm_winsys_aub.h new file mode 100644 index 0000000000..cc2a755277 --- /dev/null +++ b/src/gallium/winsys/xlib/xm_winsys_aub.h @@ -0,0 +1,68 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef AUB_WINSYS_H +#define AUB_WINSYS_H + +struct pipe_context; +struct pipe_winsys; +struct pipe_buffer; +struct pipe_surface; + +struct pipe_winsys * +xmesa_create_pipe_winsys_aub( void ); + +void +xmesa_destroy_pipe_winsys_aub( struct pipe_winsys *winsys ); + + + +struct pipe_context * +xmesa_create_i965simple( struct pipe_winsys *winsys ); + + + +void xmesa_buffer_subdata_aub(struct pipe_winsys *winsys, + struct pipe_buffer *buf, + unsigned long offset, + unsigned long size, + const void *data, + unsigned aub_type, + unsigned aub_sub_type); + +void xmesa_commands_aub(struct pipe_winsys *winsys, + unsigned *cmds, + unsigned nr_dwords); + + +void xmesa_display_aub( /* struct pipe_winsys *winsys, */ + struct pipe_surface *surface ); + +extern struct pipe_winsys * +xmesa_get_pipe_winsys_aub(struct xmesa_visual *xm_vis); + +#endif diff --git a/src/gallium/winsys/xlib/xmesaP.h b/src/gallium/winsys/xlib/xmesaP.h new file mode 100644 index 0000000000..fcaeee52bc --- /dev/null +++ b/src/gallium/winsys/xlib/xmesaP.h @@ -0,0 +1,180 @@ +/* + * Mesa 3-D graphics library + * Version: 7.1 + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef XMESAP_H +#define XMESAP_H + + +#include "GL/xmesa.h" +#include "mtypes.h" +#ifdef XFree86Server +#include "xm_image.h" +#endif + +#include "state_tracker/st_context.h" +#include "state_tracker/st_public.h" +#include "pipe/p_thread.h" + + +extern pipe_mutex _xmesa_lock; + +extern XMesaBuffer XMesaBufferList; + +/* + */ +#define XMESA_SOFTPIPE 1 +#define XMESA_AUB 2 +extern int xmesa_mode; + + +/** + * Visual inforation, derived from GLvisual. + * Basically corresponds to an XVisualInfo. + */ +struct xmesa_visual { + GLvisual mesa_visual; /* Device independent visual parameters */ + XMesaDisplay *display; /* The X11 display */ +#ifdef XFree86Server + GLint ColormapEntries; + GLint nplanes; +#else + XMesaVisualInfo visinfo; /* X's visual info (pointer to private copy) */ + XVisualInfo *vishandle; /* Only used in fakeglx.c */ +#endif + GLint BitsPerPixel; /* True bits per pixel for XImages */ + + GLboolean ximage_flag; /* Use XImage for back buffer (not pixmap)? */ +}; + + +/** + * Context info, derived from st_context. + * Basically corresponds to a GLXContext. + */ +struct xmesa_context { + struct st_context *st; + XMesaVisual xm_visual; /** pixel format info */ + XMesaBuffer xm_buffer; /** current drawbuffer */ +}; + + +/** + * Types of X/GLX drawables we might render into. + */ +typedef enum { + WINDOW, /* An X window */ + GLXWINDOW, /* GLX window */ + PIXMAP, /* GLX pixmap */ + PBUFFER /* GLX Pbuffer */ +} BufferType; + + +/** + * Framebuffer information, derived from. + * Basically corresponds to a GLXDrawable. + */ +struct xmesa_buffer { + struct st_framebuffer *stfb; + + GLboolean wasCurrent; /* was ever the current buffer? */ + XMesaVisual xm_visual; /* the X/Mesa visual */ + XMesaDrawable drawable; /* Usually the X window ID */ + XMesaColormap cmap; /* the X colormap */ + BufferType type; /* window, pixmap, pbuffer or glxwindow */ + + XMesaImage *tempImage; + unsigned long selectedEvents;/* for pbuffers only */ + + GLuint shm; /* X Shared Memory extension status: */ + /* 0 = not available */ + /* 1 = XImage support available */ + /* 2 = Pixmap support available too */ +#if defined(USE_XSHM) && !defined(XFree86Server) + XShmSegmentInfo shminfo; +#endif + + XMesaGC gc; /* scratch GC for span, line, tri drawing */ + + /* GLX_EXT_texture_from_pixmap */ + GLint TextureTarget; /** GLX_TEXTURE_1D_EXT, for example */ + GLint TextureFormat; /** GLX_TEXTURE_FORMAT_RGB_EXT, for example */ + GLint TextureMipmap; /** 0 or 1 */ + + struct xmesa_buffer *Next; /* Linked list pointer: */ +}; + + + +/** cast wrapper */ +static INLINE XMesaContext +xmesa_context(GLcontext *ctx) +{ + return (XMesaContext) ctx->DriverCtx; +} + + +/** cast wrapper */ +static INLINE XMesaBuffer +xmesa_buffer(GLframebuffer *fb) +{ + struct st_framebuffer *stfb = (struct st_framebuffer *) fb; + return (XMesaBuffer) st_framebuffer_private(stfb); +} + + +extern void +xmesa_delete_framebuffer(struct gl_framebuffer *fb); + +extern XMesaBuffer +xmesa_find_buffer(XMesaDisplay *dpy, XMesaColormap cmap, XMesaBuffer notThis); + +extern void +xmesa_check_and_update_buffer_size(XMesaContext xmctx, XMesaBuffer drawBuffer); + +extern void +xmesa_destroy_buffers_on_display(XMesaDisplay *dpy); + +extern struct pipe_context * +xmesa_create_pipe_context(XMesaContext xm, uint pixelformat); + +static INLINE GLuint +xmesa_buffer_width(XMesaBuffer b) +{ + return b->stfb->Base.Width; +} + +static INLINE GLuint +xmesa_buffer_height(XMesaBuffer b) +{ + return b->stfb->Base.Height; +} + +extern void +xmesa_display_surface(XMesaBuffer b, const struct pipe_surface *surf); + +extern int +xmesa_check_for_xshm(XMesaDisplay *display); + +#endif diff --git a/src/glu/.gitignore b/src/glu/.gitignore new file mode 100644 index 0000000000..279ea7d434 --- /dev/null +++ b/src/glu/.gitignore @@ -0,0 +1 @@ +exptmp diff --git a/src/glu/descrip.mms b/src/glu/descrip.mms deleted file mode 100644 index 6d5cd858da..0000000000 --- a/src/glu/descrip.mms +++ /dev/null @@ -1,9 +0,0 @@ -# Makefile for Mesa for VMS -# contributed by Jouk Jansen joukj@hrem.stm.tudelft.nl - -all : -# PIPE is avalailable on VMS7.0 and higher. For lower versions split the -#command in two conditional command. JJ - if f$search("SYS$SYSTEM:CXX$COMPILER.EXE") .nes. "" then pipe set default [.sgi] ; $(MMS)$(MMSQUALIFIERS) - if f$search("SYS$SYSTEM:CXX$COMPILER.EXE") .eqs. "" then pipe set default [.mesa] ; $(MMS)$(MMSQUALIFIERS) - set default [-] diff --git a/src/glu/mesa/Makefile.DJ b/src/glu/mesa/Makefile.DJ deleted file mode 100644 index 92bcdaae94..0000000000 --- a/src/glu/mesa/Makefile.DJ +++ /dev/null @@ -1,100 +0,0 @@ -# Mesa 3-D graphics library -# Version: 4.0 -# -# Copyright (C) 1999 Brian Paul All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN -# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -# DOS/DJGPP glu makefile v1.5 for Mesa -# -# Copyright (C) 2002 - Daniel Borca -# Email : dborca@users.sourceforge.net -# Web : http://www.geocities.com/dborca - - -# -# Available options: -# -# Environment variables: -# CFLAGS -# -# Targets: -# all: build GLU -# clean: remove object files -# - - - -.PHONY: all clean - -TOP = ../../.. -LIBDIR = $(TOP)/lib -GLU_LIB = libglu.a -GLU_DXE = glu.dxe -GLU_IMP = libiglu.a - -export LD_LIBRARY_PATH := $(LD_LIBRARY_PATH);$(LIBDIR);$(GLIDE)/lib - -CC = gcc -CFLAGS += -I$(TOP)/include - -AR = ar -ARFLAGS = crus - -HAVEDXE3 = $(wildcard $(DJDIR)/bin/dxe3gen.exe) - -ifeq ($(wildcard $(addsuffix /rm.exe,$(subst ;, ,$(PATH)))),) -UNLINK = del $(subst /,\,$(1)) -else -UNLINK = $(RM) $(1) -endif - -CORE_SOURCES = \ - glu.c \ - mipmap.c \ - nurbs.c \ - nurbscrv.c \ - nurbssrf.c \ - nurbsutl.c \ - polytest.c \ - project.c \ - quadric.c \ - tess.c \ - tesselat.c - -SOURCES = $(CORE_SOURCES) - -OBJECTS = $(SOURCES:.c=.o) - -.c.o: - $(CC) -o $@ $(CFLAGS) -c $< - -all: $(LIBDIR)/$(GLU_LIB) $(LIBDIR)/$(GLU_DXE) $(LIBDIR)/$(GLU_IMP) - -$(LIBDIR)/$(GLU_LIB): $(OBJECTS) - $(AR) $(ARFLAGS) $@ $^ - -$(LIBDIR)/$(GLU_DXE) $(LIBDIR)/$(GLU_IMP): $(OBJECTS) -ifeq ($(HAVEDXE3),) - $(warning Missing DXE3 package... Skipping $(GLU_DXE)) -else - -dxe3gen -o $(LIBDIR)/$(GLU_DXE) -Y $(LIBDIR)/$(GLU_IMP) -D "MesaGLU DJGPP" -E _glu -P gl.dxe -U $^ -endif - -clean: - -$(call UNLINK,*.o) diff --git a/src/glu/mesa/descrip.mms b/src/glu/mesa/descrip.mms deleted file mode 100644 index 2b3f64d8bc..0000000000 --- a/src/glu/mesa/descrip.mms +++ /dev/null @@ -1,61 +0,0 @@ -# Makefile for GLU for VMS -# contributed by Jouk Jansen joukj@hrem.stm.tudelft.nl - -.first - define gl [-.include.gl] - -.include [-]mms-config. - -##### MACROS ##### - -VPATH = RCS - -INCDIR = $disk2:[-.include] -LIBDIR = [-.lib] -CFLAGS = /include=$(INCDIR)/define=(FBIND=1)/name=(as_is,short)/float=ieee/ieee=denorm - -SOURCES = glu.c mipmap.c nurbs.c nurbscrv.c nurbssrf.c nurbsutl.c \ - polytest.c project.c quadric.c tess.c tesselat.c - -OBJECTS =glu.obj,mipmap.obj,nurbs.obj,nurbscrv.obj,nurbssrf.obj,nurbsutl.obj,\ - polytest.obj,project.obj,quadric.obj,tess.obj,tesselat.obj - - -##### RULES ##### - -VERSION=MesaGlu V3.2 - -##### TARGETS ##### - -# Make the library: -$(LIBDIR)$(GLU_LIB) : $(OBJECTS) -.ifdef SHARE - @ WRITE_ SYS$OUTPUT " generating mesagl1.opt" - @ OPEN_/WRITE FILE mesagl1.opt - @ WRITE_ FILE "!" - @ WRITE_ FILE "! mesagl1.opt generated by DESCRIP.$(MMS_EXT)" - @ WRITE_ FILE "!" - @ WRITE_ FILE "IDENTIFICATION=""$(VERSION)""" - @ WRITE_ FILE "GSMATCH=LEQUAL,3,2 - @ WRITE_ FILE "$(OBJECTS)" - @ WRITE_ FILE "[-.lib]libmesagl.exe/SHARE" - @ WRITE_ FILE "SYS$SHARE:DECW$XEXTLIBSHR/SHARE" - @ WRITE_ FILE "SYS$SHARE:DECW$XLIBSHR/SHARE" - @ CLOSE_ FILE - @ WRITE_ SYS$OUTPUT " generating mesagl.map ..." - @ LINK_/NODEB/NOSHARE/NOEXE/MAP=mesagl.map/FULL mesagl1.opt/OPT - @ WRITE_ SYS$OUTPUT " analyzing mesagl.map ..." - @ @[-.vms]ANALYZE_MAP.COM mesagl.map mesagl.opt - @ WRITE_ SYS$OUTPUT " linking $(GLU_LIB) ..." - @ LINK_/noinform/NODEB/SHARE=$(GLU_LIB)/MAP=mesagl.map/FULL mesagl1.opt/opt,mesagl.opt/opt -.else - @ $(MAKELIB) $(GLU_LIB) $(OBJECTS) -.endif - @ rename $(GLU_LIB)* $(LIBDIR) - -clean : - delete *.obj;* - purge - -include mms_depend. - diff --git a/src/glu/mesa/mms_depend b/src/glu/mesa/mms_depend deleted file mode 100644 index ed59ca9de8..0000000000 --- a/src/glu/mesa/mms_depend +++ /dev/null @@ -1,15 +0,0 @@ -# DO NOT DELETE THIS LINE -- make depend depends on it. - -glu.obj : gluP.h [-.include.gl]gl.h [-.include.gl]glu.h -mipmap.obj : gluP.h [-.include.gl]gl.h [-.include.gl]glu.h -nurbs.obj : gluP.h [-.include.gl]gl.h [-.include.gl]glu.h nurbs.h -nurbscrv.obj : nurbs.h gluP.h [-.include.gl]gl.h [-.include.gl]glu.h -nurbssrf.obj : gluP.h [-.include.gl]gl.h [-.include.gl]glu.h nurbs.h -nurbsutl.obj : gluP.h [-.include.gl]gl.h [-.include.gl]glu.h nurbs.h -project.obj : gluP.h [-.include.gl]gl.h [-.include.gl]glu.h -quadric.obj : gluP.h [-.include.gl]gl.h [-.include.gl]glu.h -tess.obj : gluP.h [-.include.gl]gl.h [-.include.gl]glu.h tess.h -tess_fist.obj : gluP.h [-.include.gl]gl.h [-.include.gl]glu.h tess.h -tess_hash.obj : gluP.h [-.include.gl]gl.h [-.include.gl]glu.h tess.h -tess_heap.obj : gluP.h [-.include.gl]gl.h [-.include.gl]glu.h tess.h -tess_clip.obj : gluP.h [-.include.gl]gl.h [-.include.gl]glu.h tess.h diff --git a/src/glu/sgi/.gitignore b/src/glu/sgi/.gitignore new file mode 100644 index 0000000000..279ea7d434 --- /dev/null +++ b/src/glu/sgi/.gitignore @@ -0,0 +1 @@ +exptmp diff --git a/src/glu/sgi/Makefile.DJ b/src/glu/sgi/Makefile.DJ deleted file mode 100644 index b5df3e846a..0000000000 --- a/src/glu/sgi/Makefile.DJ +++ /dev/null @@ -1,188 +0,0 @@ -# Mesa 3-D graphics library -# Version: 4.0 -# -# Copyright (C) 1999 Brian Paul All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN -# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -# DOS/DJGPP glu makefile v1.5 for Mesa -# -# Copyright (C) 2002 - Daniel Borca -# Email : dborca@users.sourceforge.net -# Web : http://www.geocities.com/dborca - - -# -# Available options: -# -# Environment variables: -# CFLAGS -# -# Targets: -# all: build GLU -# clean: remove object files -# - - - -.PHONY: all clean - -TOP = ../../.. -LIBDIR = $(TOP)/lib -GLU_LIB = libglu.a -GLU_DXE = glu.dxe -GLU_IMP = libiglu.a - -export LD_LIBRARY_PATH := $(LD_LIBRARY_PATH);$(LIBDIR);$(GLIDE)/lib - -CC = gcc -CFLAGS += -DNDEBUG -DLIBRARYBUILD -I$(TOP)/include -Iinclude -CXX = gpp -CXXFLAGS = $(CFLAGS) -Ilibnurbs/internals -Ilibnurbs/interface -Ilibnurbs/nurbtess - -AR = ar -ARFLAGS = crus - -HAVEDXE3 = $(wildcard $(DJDIR)/bin/dxe3gen.exe) - -ifeq ($(wildcard $(addsuffix /rm.exe,$(subst ;, ,$(PATH)))),) -UNLINK = del $(subst /,\,$(1)) -else -UNLINK = $(RM) $(1) -endif - -C_SOURCES = \ - libutil/error.c \ - libutil/glue.c \ - libutil/mipmap.c \ - libutil/project.c \ - libutil/quad.c \ - libutil/registry.c \ - libtess/dict.c \ - libtess/geom.c \ - libtess/memalloc.c \ - libtess/mesh.c \ - libtess/normal.c \ - libtess/priorityq.c \ - libtess/render.c \ - libtess/sweep.c \ - libtess/tess.c \ - libtess/tessmono.c - -CC_SOURCES = \ - libnurbs/interface/bezierEval.cc \ - libnurbs/interface/bezierPatch.cc \ - libnurbs/interface/bezierPatchMesh.cc \ - libnurbs/interface/glcurveval.cc \ - libnurbs/interface/glinterface.cc \ - libnurbs/interface/glrenderer.cc \ - libnurbs/interface/glsurfeval.cc \ - libnurbs/interface/incurveeval.cc \ - libnurbs/interface/insurfeval.cc \ - libnurbs/internals/arc.cc \ - libnurbs/internals/arcsorter.cc \ - libnurbs/internals/arctess.cc \ - libnurbs/internals/backend.cc \ - libnurbs/internals/basiccrveval.cc \ - libnurbs/internals/basicsurfeval.cc \ - libnurbs/internals/bin.cc \ - libnurbs/internals/bufpool.cc \ - libnurbs/internals/cachingeval.cc \ - libnurbs/internals/ccw.cc \ - libnurbs/internals/coveandtiler.cc \ - libnurbs/internals/curve.cc \ - libnurbs/internals/curvelist.cc \ - libnurbs/internals/curvesub.cc \ - libnurbs/internals/dataTransform.cc \ - libnurbs/internals/displaylist.cc \ - libnurbs/internals/flist.cc \ - libnurbs/internals/flistsorter.cc \ - libnurbs/internals/hull.cc \ - libnurbs/internals/intersect.cc \ - libnurbs/internals/knotvector.cc \ - libnurbs/internals/mapdesc.cc \ - libnurbs/internals/mapdescv.cc \ - libnurbs/internals/maplist.cc \ - libnurbs/internals/mesher.cc \ - libnurbs/internals/monoTriangulationBackend.cc \ - libnurbs/internals/monotonizer.cc \ - libnurbs/internals/mycode.cc \ - libnurbs/internals/nurbsinterfac.cc \ - libnurbs/internals/nurbstess.cc \ - libnurbs/internals/patch.cc \ - libnurbs/internals/patchlist.cc \ - libnurbs/internals/quilt.cc \ - libnurbs/internals/reader.cc \ - libnurbs/internals/renderhints.cc \ - libnurbs/internals/slicer.cc \ - libnurbs/internals/sorter.cc \ - libnurbs/internals/splitarcs.cc \ - libnurbs/internals/subdivider.cc \ - libnurbs/internals/tobezier.cc \ - libnurbs/internals/trimline.cc \ - libnurbs/internals/trimregion.cc \ - libnurbs/internals/trimvertpool.cc \ - libnurbs/internals/uarray.cc \ - libnurbs/internals/varray.cc \ - libnurbs/nurbtess/directedLine.cc \ - libnurbs/nurbtess/gridWrap.cc \ - libnurbs/nurbtess/monoChain.cc \ - libnurbs/nurbtess/monoPolyPart.cc \ - libnurbs/nurbtess/monoTriangulation.cc \ - libnurbs/nurbtess/partitionX.cc \ - libnurbs/nurbtess/partitionY.cc \ - libnurbs/nurbtess/polyDBG.cc \ - libnurbs/nurbtess/polyUtil.cc \ - libnurbs/nurbtess/primitiveStream.cc \ - libnurbs/nurbtess/quicksort.cc \ - libnurbs/nurbtess/rectBlock.cc \ - libnurbs/nurbtess/sampleComp.cc \ - libnurbs/nurbtess/sampleCompBot.cc \ - libnurbs/nurbtess/sampleCompRight.cc \ - libnurbs/nurbtess/sampleCompTop.cc \ - libnurbs/nurbtess/sampleMonoPoly.cc \ - libnurbs/nurbtess/sampledLine.cc \ - libnurbs/nurbtess/searchTree.cc - -SOURCES = $(C_SOURCES) $(CC_SOURCES) - -OBJECTS = $(addsuffix .o,$(basename $(SOURCES))) - -.c.o: - $(CC) -o $@ $(CFLAGS) -c $< -.cc.o: - $(CXX) -o $@ $(CXXFLAGS) -c $< - -all: $(LIBDIR)/$(GLU_LIB) $(LIBDIR)/$(GLU_DXE) $(LIBDIR)/$(GLU_IMP) - -$(LIBDIR)/$(GLU_LIB): $(OBJECTS) - $(AR) $(ARFLAGS) $@ $^ - -$(LIBDIR)/$(GLU_DXE) $(LIBDIR)/$(GLU_IMP): $(OBJECTS) -ifeq ($(HAVEDXE3),) - $(warning Missing DXE3 package... Skipping $(GLU_DXE)) -else - -dxe3gen -o $(LIBDIR)/$(GLU_DXE) -Y $(LIBDIR)/$(GLU_IMP) -D "MesaGLU/SGI DJGPP" -E _glu -P gl.dxe -U $^ -endif - -clean: - -$(call UNLINK,libutil/*.o) - -$(call UNLINK,libtess/*.o) - -$(call UNLINK,libnurbs/interface/*.o) - -$(call UNLINK,libnurbs/internals/*.o) - -$(call UNLINK,libnurbs/nurbtess/*.o) diff --git a/src/glu/sgi/descrip.mms b/src/glu/sgi/descrip.mms deleted file mode 100644 index 5abc8b2e04..0000000000 --- a/src/glu/sgi/descrip.mms +++ /dev/null @@ -1,451 +0,0 @@ -# Makefile for GLU for VMS -# contributed by Jouk Jansen joukj@hrem.stm.tudelft.nl - -.first - define gl [---.include.gl] - -.include [---]mms-config. - -##### MACROS ##### - -VPATH = RCS - -INCDIR =([-.include],[.include],[.internals],[.libnurbs.internals],\ - [.libnurbs.interface],[.libnurbs.nurbtess]) -LIBDIR = [---.lib] -CFLAGS = /include=$(INCDIR)/name=(as_is,short)/float=ieee/ieee=denorm - -LU_OBJECTS=\ - [.libutil]error.obj, \ - [.libutil]glue.obj, \ - [.libutil]mipmap.obj,\ - [.libutil]project.obj,\ - [.libutil]quad.obj, \ - [.libutil]registry.obj - -LT_OBJECTS=[.libtess]dict.obj, \ - [.libtess]geom.obj, \ - [.libtess]memalloc.obj,\ - [.libtess]mesh.obj, \ - [.libtess]normal.obj,\ - [.libtess]priorityq.obj,\ - [.libtess]render.obj,\ - [.libtess]sweep.obj, \ - [.libtess]tess.obj, \ - [.libtess]tessmono.obj - -LI_OBJECTS=[.libnurbs.interface]bezierEval.obj, \ - [.libnurbs.interface]bezierPatch.obj, \ - [.libnurbs.interface]bezierPatchMesh.obj, \ - [.libnurbs.interface]glcurveval.obj, \ - [.libnurbs.interface]glinterface.obj - -LI_OBJECTS1=[.libnurbs.interface]glrenderer.obj, \ - [.libnurbs.interface]glsurfeval.obj, \ - [.libnurbs.interface]incurveeval.obj, \ - [.libnurbs.interface]insurfeval.obj - -LI2_OBJECTS=[.libnurbs.internals]arc.obj, \ - [.libnurbs.internals]arcsorter.obj, \ - [.libnurbs.internals]arctess.obj, \ - [.libnurbs.internals]backend.obj, \ - [.libnurbs.internals]basiccrveval.obj, \ - [.libnurbs.internals]basicsurfeval.obj - -LI2_OBJECTS1=[.libnurbs.internals]bin.obj, \ - [.libnurbs.internals]bufpool.obj, \ - [.libnurbs.internals]cachingeval.obj, \ - [.libnurbs.internals]ccw.obj, \ - [.libnurbs.internals]coveandtiler.obj, \ - [.libnurbs.internals]curve.obj, \ - [.libnurbs.internals]curvelist.obj - -LI2_OBJECTS2=[.libnurbs.internals]curvesub.obj, \ - [.libnurbs.internals]dataTransform.obj, \ - [.libnurbs.internals]displaylist.obj, \ - [.libnurbs.internals]flist.obj, \ - [.libnurbs.internals]flistsorter.obj - -LI2_OBJECTS3=[.libnurbs.internals]hull.obj, \ - [.libnurbs.internals]intersect.obj, \ - [.libnurbs.internals]knotvector.obj, \ - [.libnurbs.internals]mapdesc.obj - -LI2_OBJECTS4=[.libnurbs.internals]mapdescv.obj, \ - [.libnurbs.internals]maplist.obj, \ - [.libnurbs.internals]mesher.obj, \ - [.libnurbs.internals]monoTriangulationBackend.obj,\ - [.libnurbs.internals]monotonizer.obj - -LI2_OBJECTS5=[.libnurbs.internals]mycode.obj, \ - [.libnurbs.internals]nurbsinterfac.obj, \ - [.libnurbs.internals]nurbstess.obj, \ - [.libnurbs.internals]patch.obj - -LI2_OBJECTS6=[.libnurbs.internals]patchlist.obj, \ - [.libnurbs.internals]quilt.obj, \ - [.libnurbs.internals]reader.obj, \ - [.libnurbs.internals]renderhints.obj, \ - [.libnurbs.internals]slicer.obj - -LI2_OBJECTS7=[.libnurbs.internals]sorter.obj, \ - [.libnurbs.internals]splitarcs.obj, \ - [.libnurbs.internals]subdivider.obj, \ - [.libnurbs.internals]tobezier.obj - -LI2_OBJECTS8=[.libnurbs.internals]trimline.obj, \ - [.libnurbs.internals]trimregion.obj, \ - [.libnurbs.internals]trimvertpool.obj, \ - [.libnurbs.internals]uarray.obj, \ - [.libnurbs.internals]varray.obj - -LN_OBJECTS=[.libnurbs.nurbtess]directedLine.obj, \ - [.libnurbs.nurbtess]gridWrap.obj, \ - [.libnurbs.nurbtess]monoChain.obj, \ - [.libnurbs.nurbtess]monoPolyPart.obj, \ - [.libnurbs.nurbtess]monoTriangulation.obj - -LN_OBJECTS1=[.libnurbs.nurbtess]partitionX.obj, \ - [.libnurbs.nurbtess]partitionY.obj, \ - [.libnurbs.nurbtess]polyDBG.obj - -LN_OBJECTS2=[.libnurbs.nurbtess]polyUtil.obj, \ - [.libnurbs.nurbtess]primitiveStream.obj, \ - [.libnurbs.nurbtess]quicksort.obj, \ - [.libnurbs.nurbtess]rectBlock.obj - -LN_OBJECTS3=[.libnurbs.nurbtess]sampleComp.obj, \ - [.libnurbs.nurbtess]sampleCompBot.obj, \ - [.libnurbs.nurbtess]sampleCompRight.obj - -LN_OBJECTS4=[.libnurbs.nurbtess]sampleCompTop.obj, \ - [.libnurbs.nurbtess]sampleMonoPoly.obj,\ - [.libnurbs.nurbtess]sampledLine.obj, \ - [.libnurbs.nurbtess]searchTree.obj - -##### RULES ##### - -VERSION=MesaGlu V3.5 - -##### TARGETS ##### - -# Make the library: -$(LIBDIR)$(GLU_LIB) : $(LU_OBJECTS) $(LT_OBJECTS) $(LI_OBJECTS) $(LI_OBJECTS1)\ - $(LI2_OBJECTS) $(LI2_OBJECTS1) $(LI2_OBJECTS2)\ - $(LI2_OBJECTS3) $(LI2_OBJECTS4) $(LI2_OBJECTS5)\ - $(LI2_OBJECTS6) $(LI2_OBJECTS7) $(LI2_OBJECTS8)\ - $(LN_OBJECTS) $(LN_OBJECTS1) $(LN_OBJECTS2)\ - $(LN_OBJECTS3) $(LN_OBJECTS4) - @ $(MAKELIB) $(GLU_LIB) $(LU_OBJECTS),$(LT_OBJECTS),$(LI_OBJECTS),\ - $(LI2_OBJECTS),$(LN_OBJECTS) - @ rename $(GLU_LIB)* $(LIBDIR) -.ifdef SHARE - @ WRITE_ SYS$OUTPUT " generating mesagl1.opt" - @ OPEN_/WRITE FILE mesagl1.opt - @ WRITE_ FILE "!" - @ WRITE_ FILE "! mesagl1.opt generated by DESCRIP.$(MMS_EXT)" - @ WRITE_ FILE "!" - @ WRITE_ FILE "IDENTIFICATION=""$(VERSION)""" - @ WRITE_ FILE "GSMATCH=LEQUAL,3,5 - @ WRITE_ FILE "$(LU_OBJECTS)" - @ WRITE_ FILE "$(LT_OBJECTS)" - @ WRITE_ FILE "$(LI_OBJECTS)" - @ WRITE_ FILE "$(LI_OBJECTS1)" - @ WRITE_ FILE "$(LI2_OBJECTS)" - @ WRITE_ FILE "$(LI2_OBJECTS1)" - @ WRITE_ FILE "$(LI2_OBJECTS2)" - @ WRITE_ FILE "$(LI2_OBJECTS3)" - @ WRITE_ FILE "$(LI2_OBJECTS4)" - @ WRITE_ FILE "$(LI2_OBJECTS5)" - @ WRITE_ FILE "$(LI2_OBJECTS6)" - @ WRITE_ FILE "$(LI2_OBJECTS7)" - @ WRITE_ FILE "$(LI2_OBJECTS8)" - @ WRITE_ FILE "$(LN_OBJECTS)" - @ WRITE_ FILE "$(LN_OBJECTS1)" - @ WRITE_ FILE "$(LN_OBJECTS2)" - @ WRITE_ FILE "$(LN_OBJECTS3)" - @ WRITE_ FILE "$(LN_OBJECTS4)" - @ WRITE_ FILE "[---.lib]libmesagl.exe/SHARE" - @ WRITE_ FILE "SYS$SHARE:DECW$XEXTLIBSHR/SHARE" - @ WRITE_ FILE "SYS$SHARE:DECW$XLIBSHR/SHARE" - @ CLOSE_ FILE -# @ WRITE_ SYS$OUTPUT " generating mesagl.map ..." -# @ CXXLINK_/NODEB/NOSHARE/NOEXE/MAP=mesagl.map/FULL mesagl1.opt/OPT -# @ WRITE_ SYS$OUTPUT " analyzing mesagl.map ..." -# @ @[-.vms]ANALYZE_MAP.COM mesagl.map mesagl.opt - @ WRITE_ SYS$OUTPUT " linking $(GLU_SHAR) ..." -# @ CXXLINK_/noinform/NODEB/SHARE=$(GLU_SHAR)/MAP=mesagl.map/FULL mesagl1.opt/opt,mesagl.opt/opt - @ CXXLINK_/noinform/NODEB/SHARE=$(GLU_SHAR)/MAP=mesagl.map/FULL mesagl1.opt/opt,mesaglu.opt/opt - @ rename $(GLU_SHAR)* $(LIBDIR) -.endif - -clean : - delete [...]*.obj;* - purge - -[.libutil]error.obj : [.libutil]error.c - $(CC) $(CFLAGS) /obj=[.libutil]error.obj [.libutil]error.c - -[.libutil]glue.obj : [.libutil]glue.c - $(CC) $(CFLAGS) /obj=[.libutil]glue.obj [.libutil]glue.c - -[.libutil]mipmap.obj : [.libutil]mipmap.c - $(CC) $(CFLAGS) /obj=[.libutil]mipmap.obj [.libutil]mipmap.c - -[.libutil]project.obj : [.libutil]project.c - $(CC) $(CFLAGS) /obj=[.libutil]project.obj [.libutil]project.c - -[.libutil]quad.obj : [.libutil]quad.c - $(CC) $(CFLAGS) /obj=[.libutil]quad.obj [.libutil]quad.c - -[.libutil]registry.obj : [.libutil]registry.c - $(CC) $(CFLAGS) /obj=[.libutil]registry.obj [.libutil]registry.c - -[.libtess]dict.obj : [.libtess]dict.c - $(CC) $(CFLAGS) /obj=[.libtess]dict.obj [.libtess]dict.c - -[.libtess]geom.obj : [.libtess]geom.c - $(CC) $(CFLAGS) /obj=[.libtess]geom.obj [.libtess]geom.c - -[.libtess]memalloc.obj : [.libtess]memalloc.c - $(CC) $(CFLAGS) /obj=[.libtess]memalloc.obj [.libtess]memalloc.c - -[.libtess]mesh.obj : [.libtess]mesh.c - $(CC) $(CFLAGS) /obj=[.libtess]mesh.obj [.libtess]mesh.c - -[.libtess]normal.obj : [.libtess]normal.c - $(CC) $(CFLAGS) /obj=[.libtess]normal.obj [.libtess]normal.c - -[.libtess]priorityq.obj : [.libtess]priorityq.c - $(CC) $(CFLAGS) /obj=[.libtess]priorityq.obj [.libtess]priorityq.c - -[.libtess]render.obj : [.libtess]render.c - $(CC) $(CFLAGS) /obj=[.libtess]render.obj [.libtess]render.c - -[.libtess]sweep.obj : [.libtess]sweep.c - $(CC) $(CFLAGS) /obj=[.libtess]sweep.obj [.libtess]sweep.c - -[.libtess]tess.obj : [.libtess]tess.c - $(CC) $(CFLAGS) /obj=[.libtess]tess.obj [.libtess]tess.c - -[.libtess]tessmono.obj : [.libtess]tessmono.c - $(CC) $(CFLAGS) /obj=[.libtess]tessmono.obj [.libtess]tessmono.c - -[.libnurbs.interface]bezierEval.obj : [.libnurbs.interface]bezierEval.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.interface]bezierEval.obj [.libnurbs.interface]bezierEval.cc - -[.libnurbs.interface]bezierPatch.obj : [.libnurbs.interface]bezierPatch.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.interface]bezierPatch.obj [.libnurbs.interface]bezierPatch.cc - -[.libnurbs.interface]bezierPatchMesh.obj : [.libnurbs.interface]bezierPatchMesh.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.interface]bezierPatchMesh.obj [.libnurbs.interface]bezierPatchMesh.cc - -[.libnurbs.interface]glcurveval.obj : [.libnurbs.interface]glcurveval.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.interface]glcurveval.obj [.libnurbs.interface]glcurveval.cc - -[.libnurbs.interface]glinterface.obj : [.libnurbs.interface]glinterface.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.interface]glinterface.obj [.libnurbs.interface]glinterface.cc - -[.libnurbs.interface]glrenderer.obj : [.libnurbs.interface]glrenderer.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.interface]glrenderer.obj [.libnurbs.interface]glrenderer.cc - -[.libnurbs.interface]glsurfeval.obj : [.libnurbs.interface]glsurfeval.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.interface]glsurfeval.obj [.libnurbs.interface]glsurfeval.cc - -[.libnurbs.interface]incurveeval.obj : [.libnurbs.interface]incurveeval.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.interface]incurveeval.obj [.libnurbs.interface]incurveeval.cc - -[.libnurbs.interface]insurfeval.obj : [.libnurbs.interface]insurfeval.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.interface]insurfeval.obj [.libnurbs.interface]insurfeval.cc - -[.libnurbs.internals]arc.obj : [.libnurbs.internals]arc.cc - $(CXX) $(CFLAGS)/list/show=all /obj=[.libnurbs.internals]arc.obj [.libnurbs.internals]arc.cc - -[.libnurbs.internals]arcsorter.obj : [.libnurbs.internals]arcsorter.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]arcsorter.obj [.libnurbs.internals]arcsorter.cc - -[.libnurbs.internals]arctess.obj : [.libnurbs.internals]arctess.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]arctess.obj [.libnurbs.internals]arctess.cc - -[.libnurbs.internals]backend.obj : [.libnurbs.internals]backend.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]backend.obj [.libnurbs.internals]backend.cc - -[.libnurbs.internals]basiccrveval.obj : [.libnurbs.internals]basiccrveval.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]basiccrveval.obj [.libnurbs.internals]basiccrveval.cc - -[.libnurbs.internals]basicsurfeval.obj : [.libnurbs.internals]basicsurfeval.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]basicsurfeval.obj [.libnurbs.internals]basicsurfeval.cc - -[.libnurbs.internals]bin.obj : [.libnurbs.internals]bin.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]bin.obj [.libnurbs.internals]bin.cc - -[.libnurbs.internals]bufpool.obj : [.libnurbs.internals]bufpool.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]bufpool.obj [.libnurbs.internals]bufpool.cc - -[.libnurbs.internals]cachingeval.obj : [.libnurbs.internals]cachingeval.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]cachingeval.obj [.libnurbs.internals]cachingeval.cc - -[.libnurbs.internals]ccw.obj : [.libnurbs.internals]ccw.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]ccw.obj [.libnurbs.internals]ccw.cc - -[.libnurbs.internals]coveandtiler.obj : [.libnurbs.internals]coveandtiler.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]coveandtiler.obj [.libnurbs.internals]coveandtiler.cc - -[.libnurbs.internals]curve.obj : [.libnurbs.internals]curve.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]curve.obj [.libnurbs.internals]curve.cc - -[.libnurbs.internals]curvelist.obj : [.libnurbs.internals]curvelist.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]curvelist.obj [.libnurbs.internals]curvelist.cc - -[.libnurbs.internals]curvesub.obj : [.libnurbs.internals]curvesub.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]curvesub.obj [.libnurbs.internals]curvesub.cc - -[.libnurbs.internals]dataTransform.obj : [.libnurbs.internals]dataTransform.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]dataTransform.obj [.libnurbs.internals]dataTransform.cc - -[.libnurbs.internals]displaylist.obj : [.libnurbs.internals]displaylist.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]displaylist.obj [.libnurbs.internals]displaylist.cc - -[.libnurbs.internals]flist.obj : [.libnurbs.internals]flist.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]flist.obj [.libnurbs.internals]flist.cc - -[.libnurbs.internals]flistsorter.obj : [.libnurbs.internals]flistsorter.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]flistsorter.obj [.libnurbs.internals]flistsorter.cc - -[.libnurbs.internals]hull.obj : [.libnurbs.internals]hull.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]hull.obj [.libnurbs.internals]hull.cc - -[.libnurbs.internals]intersect.obj : [.libnurbs.internals]intersect.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]intersect.obj [.libnurbs.internals]intersect.cc - -[.libnurbs.internals]knotvector.obj : [.libnurbs.internals]knotvector.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]knotvector.obj [.libnurbs.internals]knotvector.cc - -[.libnurbs.internals]mapdesc.obj : [.libnurbs.internals]mapdesc.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]mapdesc.obj [.libnurbs.internals]mapdesc.cc - -[.libnurbs.internals]mapdescv.obj : [.libnurbs.internals]mapdescv.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]mapdescv.obj [.libnurbs.internals]mapdescv.cc - -[.libnurbs.internals]maplist.obj : [.libnurbs.internals]maplist.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]maplist.obj [.libnurbs.internals]maplist.cc - -[.libnurbs.internals]mesher.obj : [.libnurbs.internals]mesher.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]mesher.obj [.libnurbs.internals]mesher.cc - -[.libnurbs.internals]monoTriangulationBackend.obj : [.libnurbs.internals]monoTriangulationBackend.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]monoTriangulationBackend.obj [.libnurbs.internals]monoTriangulationBackend.cc - -[.libnurbs.internals]monotonizer.obj : [.libnurbs.internals]monotonizer.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]monotonizer.obj [.libnurbs.internals]monotonizer.cc - -[.libnurbs.internals]mycode.obj : [.libnurbs.internals]mycode.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]mycode.obj [.libnurbs.internals]mycode.cc - -[.libnurbs.internals]nurbsinterfac.obj : [.libnurbs.internals]nurbsinterfac.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]nurbsinterfac.obj [.libnurbs.internals]nurbsinterfac.cc - -[.libnurbs.internals]nurbstess.obj : [.libnurbs.internals]nurbstess.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]nurbstess.obj [.libnurbs.internals]nurbstess.cc - -[.libnurbs.internals]patch.obj : [.libnurbs.internals]patch.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]patch.obj [.libnurbs.internals]patch.cc - -[.libnurbs.internals]patchlist.obj : [.libnurbs.internals]patchlist.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]patchlist.obj [.libnurbs.internals]patchlist.cc - -[.libnurbs.internals]quilt.obj : [.libnurbs.internals]quilt.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]quilt.obj [.libnurbs.internals]quilt.cc - -[.libnurbs.internals]reader.obj : [.libnurbs.internals]reader.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]reader.obj [.libnurbs.internals]reader.cc - -[.libnurbs.internals]renderhints.obj : [.libnurbs.internals]renderhints.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]renderhints.obj [.libnurbs.internals]renderhints.cc - -[.libnurbs.internals]slicer.obj : [.libnurbs.internals]slicer.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]slicer.obj [.libnurbs.internals]slicer.cc - -[.libnurbs.internals]sorter.obj : [.libnurbs.internals]sorter.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]sorter.obj [.libnurbs.internals]sorter.cc - -[.libnurbs.internals]splitarcs.obj : [.libnurbs.internals]splitarcs.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]splitarcs.obj [.libnurbs.internals]splitarcs.cc - -[.libnurbs.internals]subdivider.obj : [.libnurbs.internals]subdivider.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]subdivider.obj [.libnurbs.internals]subdivider.cc - -[.libnurbs.internals]tobezier.obj : [.libnurbs.internals]tobezier.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]tobezier.obj [.libnurbs.internals]tobezier.cc - -[.libnurbs.internals]trimline.obj : [.libnurbs.internals]trimline.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]trimline.obj [.libnurbs.internals]trimline.cc - -[.libnurbs.internals]trimregion.obj : [.libnurbs.internals]trimregion.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]trimregion.obj [.libnurbs.internals]trimregion.cc - -[.libnurbs.internals]trimvertpool.obj : [.libnurbs.internals]trimvertpool.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]trimvertpool.obj [.libnurbs.internals]trimvertpool.cc - -[.libnurbs.internals]uarray.obj : [.libnurbs.internals]uarray.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]uarray.obj [.libnurbs.internals]uarray.cc - -[.libnurbs.internals]varray.obj : [.libnurbs.internals]varray.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.internals]varray.obj [.libnurbs.internals]varray.cc - -[.libnurbs.nurbtess]directedLine.obj : [.libnurbs.nurbtess]directedLine.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]directedLine.obj [.libnurbs.nurbtess]directedLine.cc - -[.libnurbs.nurbtess]gridWrap.obj : [.libnurbs.nurbtess]gridWrap.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]gridWrap.obj [.libnurbs.nurbtess]gridWrap.cc - -[.libnurbs.nurbtess]monoChain.obj : [.libnurbs.nurbtess]monoChain.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]monoChain.obj [.libnurbs.nurbtess]monoChain.cc - -[.libnurbs.nurbtess]monoPolyPart.obj : [.libnurbs.nurbtess]monoPolyPart.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]monoPolyPart.obj [.libnurbs.nurbtess]monoPolyPart.cc - -[.libnurbs.nurbtess]monoTriangulation.obj : [.libnurbs.nurbtess]monoTriangulation.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]monoTriangulation.obj [.libnurbs.nurbtess]monoTriangulation.cc - -[.libnurbs.nurbtess]partitionX.obj : [.libnurbs.nurbtess]partitionX.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]partitionX.obj [.libnurbs.nurbtess]partitionX.cc - -[.libnurbs.nurbtess]partitionY.obj : [.libnurbs.nurbtess]partitionY.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]partitionY.obj [.libnurbs.nurbtess]partitionY.cc - -[.libnurbs.nurbtess]polyDBG.obj : [.libnurbs.nurbtess]polyDBG.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]polyDBG.obj [.libnurbs.nurbtess]polyDBG.cc - -[.libnurbs.nurbtess]polyUtil.obj : [.libnurbs.nurbtess]polyUtil.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]polyUtil.obj [.libnurbs.nurbtess]polyUtil.cc - -[.libnurbs.nurbtess]primitiveStream.obj : [.libnurbs.nurbtess]primitiveStream.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]primitiveStream.obj [.libnurbs.nurbtess]primitiveStream.cc - -[.libnurbs.nurbtess]quicksort.obj : [.libnurbs.nurbtess]quicksort.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]quicksort.obj [.libnurbs.nurbtess]quicksort.cc - -[.libnurbs.nurbtess]rectBlock.obj : [.libnurbs.nurbtess]rectBlock.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]rectBlock.obj [.libnurbs.nurbtess]rectBlock.cc - -[.libnurbs.nurbtess]sampleComp.obj : [.libnurbs.nurbtess]sampleComp.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]sampleComp.obj [.libnurbs.nurbtess]sampleComp.cc - -[.libnurbs.nurbtess]sampleCompBot.obj : [.libnurbs.nurbtess]sampleCompBot.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]sampleCompBot.obj [.libnurbs.nurbtess]sampleCompBot.cc - -[.libnurbs.nurbtess]sampleCompRight.obj : [.libnurbs.nurbtess]sampleCompRight.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]sampleCompRight.obj [.libnurbs.nurbtess]sampleCompRight.cc - -[.libnurbs.nurbtess]sampleCompTop.obj : [.libnurbs.nurbtess]sampleCompTop.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]sampleCompTop.obj [.libnurbs.nurbtess]sampleCompTop.cc - -[.libnurbs.nurbtess]sampleMonoPoly.obj : [.libnurbs.nurbtess]sampleMonoPoly.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]sampleMonoPoly.obj [.libnurbs.nurbtess]sampleMonoPoly.cc - -[.libnurbs.nurbtess]sampledLine.obj : [.libnurbs.nurbtess]sampledLine.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]sampledLine.obj [.libnurbs.nurbtess]sampledLine.cc - -[.libnurbs.nurbtess]searchTree.obj : [.libnurbs.nurbtess]searchTree.cc - $(CXX) $(CFLAGS) /obj=[.libnurbs.nurbtess]searchTree.obj [.libnurbs.nurbtess]searchTree.cc diff --git a/src/glut/beos/Makefile b/src/glut/beos/Makefile index 14f4f43f84..a9f8684c9b 100644 --- a/src/glut/beos/Makefile +++ b/src/glut/beos/Makefile @@ -92,6 +92,7 @@ clean: -rm -f *.o depend: $(SOURCES) + rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(SOURCES) > /dev/null diff --git a/src/glut/directfb/Makefile b/src/glut/directfb/Makefile index 34c3743ef4..bf93d63e30 100644 --- a/src/glut/directfb/Makefile +++ b/src/glut/directfb/Makefile @@ -70,6 +70,7 @@ $(TOP)/$(LIB_DIR)/$(GLUT_LIB_NAME): depend $(OBJECTS) # Run 'make -f Makefile.solo dep' to update the dependencies if you change # what's included by any source file. depend: $(SOURCES) + rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(SOURCES) > /dev/null diff --git a/src/glut/dos/Makefile.DJ b/src/glut/dos/Makefile.DJ deleted file mode 100644 index 7e4e0b8576..0000000000 --- a/src/glut/dos/Makefile.DJ +++ /dev/null @@ -1,126 +0,0 @@ -# DOS/DJGPP Mesa Utility Toolkit -# Version: 1.0 -# -# Copyright (C) 2005 Daniel Borca All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# DANIEL BORCA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER -# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - -# -# Available options: -# -# Environment variables: -# CFLAGS -# -# GLIDE path to Glide3 SDK; used to resolve DXEs. -# default = $(TOP)/glide3 -# -# Targets: -# all: build GLUT -# clean: remove object files -# - - - -.PHONY: all clean - -TOP = ../../.. -GLIDE ?= $(TOP)/glide3 -LIBDIR = $(TOP)/lib -GLUT_LIB = libglut.a -GLUT_DXE = glut.dxe -GLUT_IMP = libiglut.a - -export LD_LIBRARY_PATH := $(LD_LIBRARY_PATH);$(LIBDIR);$(GLIDE)/lib - -CC = gcc -CFLAGS += -I$(TOP)/include -I. -IPC_HW -CFLAGS += -DGLUT_IMPORT_LIB - -AR = ar -ARFLAGS = crus - -HAVEDXE3 = $(wildcard $(DJDIR)/bin/dxe3gen.exe) - -ifeq ($(wildcard $(addsuffix /rm.exe,$(subst ;, ,$(PATH)))),) -UNLINK = del $(subst /,\,$(1)) -else -UNLINK = $(RM) $(1) -endif - -CORE_SOURCES = \ - loop.c \ - callback.c \ - color.c \ - extens.c \ - init.c \ - menu.c \ - mouse.c \ - overlay.c \ - state.c \ - util.c \ - window.c \ - f8x13.c \ - f9x15.c \ - hel10.c \ - hel12.c \ - hel18.c \ - tr10.c \ - tr24.c \ - mroman.c \ - roman.c \ - bitmap.c \ - stroke.c \ - teapot.c \ - shapes.c - -PC_HW_SOURCES = \ - PC_HW/pc_hw.c \ - PC_HW/pc_keyb.c \ - PC_HW/pc_mouse.c \ - PC_HW/pc_timer.c \ - PC_HW/pc_irq.S - -SOURCES = $(CORE_SOURCES) $(PC_HW_SOURCES) - -OBJECTS = $(addsuffix .o,$(basename $(SOURCES))) - -.c.o: - $(CC) -o $@ $(CFLAGS) -c $< -.S.o: - $(CC) -o $@ $(CFLAGS) -c $< -.s.o: - $(CC) -o $@ $(CFLAGS) -x assembler-with-cpp -c $< - -all: $(LIBDIR)/$(GLUT_LIB) $(LIBDIR)/$(GLUT_DXE) $(LIBDIR)/$(GLUT_IMP) - -$(LIBDIR)/$(GLUT_LIB): $(OBJECTS) - $(AR) $(ARFLAGS) $@ $^ - -$(LIBDIR)/$(GLUT_DXE) $(LIBDIR)/$(GLUT_IMP): $(OBJECTS) -ifeq ($(HAVEDXE3),) - $(warning Missing DXE3 package... Skipping $(GLUT_DXE)) -else - -dxe3gen -o $(LIBDIR)/$(GLUT_DXE) -Y $(LIBDIR)/$(GLUT_IMP) -D "MesaGLUT DJGPP" -E _glut -P gl.dxe -U $^ -endif - -clean: - -$(call UNLINK,*.o) - -$(call UNLINK,PC_HW/*.o) - --include depend diff --git a/src/glut/fbdev/Makefile b/src/glut/fbdev/Makefile index 4081ccec74..199d8c390a 100644 --- a/src/glut/fbdev/Makefile +++ b/src/glut/fbdev/Makefile @@ -77,6 +77,7 @@ install: # Run 'make -f Makefile.solo dep' to update the dependencies if you change # what's included by any source file. depend: $(SOURCES) + rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(SOURCES) > /dev/null diff --git a/src/glut/ggi/Makefile b/src/glut/ggi/Makefile index 1ae1ba0d70..c23dfcac10 100644 --- a/src/glut/ggi/Makefile +++ b/src/glut/ggi/Makefile @@ -46,6 +46,7 @@ clean: depend: $(SOURCES) + rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) -I$(TOP)/include $(SOURCES) diff --git a/src/glut/glx/Makefile b/src/glut/glx/Makefile index 7f886f775a..b61d5b0478 100644 --- a/src/glut/glx/Makefile +++ b/src/glut/glx/Makefile @@ -127,6 +127,7 @@ clean: depend: $(SOURCES) @ echo "running $(MKDEP)" + @ rm -f depend @ touch depend @ $(MKDEP) $(MKDEP_OPTIONS) -I$(TOP)/include $(SOURCES) \ $(X11_INCLUDES) > /dev/null diff --git a/src/glut/glx/descrip.mms b/src/glut/glx/descrip.mms deleted file mode 100644 index 2e858309b6..0000000000 --- a/src/glut/glx/descrip.mms +++ /dev/null @@ -1,208 +0,0 @@ -# Makefile for GLUT for VMS -# contributed by Jouk Jansen joukj@hrem.stm.tudelft.nl - -.first - define gl [---.include.gl] - -.include [---]mms-config. - -##### MACROS ##### -GLUT_MAJOR = 3 -GLUT_MINOR = 7 - -VPATH = RCS - -INCDIR = [---.include] -LIBDIR = [---.lib] -CFLAGS = /nowarn/include=$(INCDIR)/prefix=all/name=(as_is,short)/float=ieee/ieee=denorm - -SOURCES = \ -glut_8x13.c \ -glut_9x15.c \ -glut_bitmap.c \ -glut_bwidth.c \ -glut_cindex.c \ -glut_cmap.c \ -glut_cursor.c \ -glut_dials.c \ -glut_dstr.c \ -glut_event.c \ -glut_ext.c \ -glut_fullscrn.c \ -glut_gamemode.c \ -glut_get.c \ -glut_glxext.c \ -glut_hel10.c \ -glut_hel12.c \ -glut_hel18.c \ -glut_init.c \ -glut_input.c \ -glut_joy.c \ -glut_key.c \ -glut_keyctrl.c \ -glut_keyup.c \ -glut_menu.c \ -glut_menu2.c \ -glut_mesa.c \ -glut_modifier.c \ -glut_mroman.c \ -glut_overlay.c \ -glut_roman.c \ -glut_shapes.c \ -glut_space.c \ -glut_stroke.c \ -glut_swap.c \ -glut_swidth.c \ -glut_tablet.c \ -glut_teapot.c \ -glut_tr10.c \ -glut_tr24.c \ -glut_util.c \ -glut_vidresize.c \ -glut_warp.c \ -glut_win.c \ -glut_winmisc.c \ -layerutil.c - -OBJECTS0=glut_8x13.obj,\ -glut_9x15.obj,\ -glut_bitmap.obj,\ -glut_bwidth.obj,\ -glut_cindex.obj,\ -glut_cmap.obj,\ -glut_cursor.obj,\ -glut_dials.obj,\ -glut_dstr.obj,\ -glut_event.obj,\ -glut_ext.obj,\ -glut_fullscrn.obj,\ -glut_gamemode.obj - -OBJECTS1=glut_get.obj,\ -glut_glxext.obj,\ -glut_hel10.obj,\ -glut_hel12.obj,\ -glut_hel18.obj,\ -glut_init.obj,\ -glut_input.obj,\ -glut_joy.obj,\ -glut_key.obj,\ -glut_keyctrl.obj,\ -glut_keyup.obj,\ -glut_menu.obj,\ -glut_menu2.obj,\ -glut_mesa.obj,\ -glut_modifier.obj - -OBJECTS2=glut_mroman.obj,\ -glut_overlay.obj,\ -glut_roman.obj,\ -glut_shapes.obj,\ -glut_space.obj,\ -glut_stroke.obj,\ -glut_swap.obj,\ -glut_swidth.obj,\ -glut_tablet.obj,\ -glut_teapot.obj,\ -glut_tr10.obj,\ -glut_tr24.obj,\ -glut_util.obj,\ -glut_vidresize.obj - -OBJECTS3=glut_warp.obj,\ -glut_win.obj,\ -glut_winmisc.obj,\ -layerutil.obj - -##### RULES ##### - -VERSION=Glut V3.7 - -##### TARGETS ##### - -# Make the library -$(LIBDIR)$(GLUT_LIB) : $(OBJECTS0) $(OBJECTS1) $(OBJECTS2) $(OBJECTS3) - @ $(MAKELIB) $(GLUT_LIB) $(OBJECTS0) - @ library $(GLUT_LIB) $(OBJECTS1) - @ library $(GLUT_LIB) $(OBJECTS2) - @ library $(GLUT_LIB) $(OBJECTS3) - @ rename $(GLUT_LIB)* $(LIBDIR) -.ifdef SHARE - @ WRITE_ SYS$OUTPUT " generating mesagl1.opt" - @ OPEN_/WRITE FILE mesagl1.opt - @ WRITE_ FILE "!" - @ WRITE_ FILE "! mesagl1.opt generated by DESCRIP.$(MMS_EXT)" - @ WRITE_ FILE "!" - @ WRITE_ FILE "IDENTIFICATION=""$(VERSION)""" - @ WRITE_ FILE "GSMATCH=LEQUAL,3,7 - @ WRITE_ FILE "$(OBJECTS0)" - @ WRITE_ FILE "$(OBJECTS1)" - @ WRITE_ FILE "$(OBJECTS2)" - @ WRITE_ FILE "$(OBJECTS3)" - @ WRITE_ FILE "[---.lib]libmesaglu.exe/SHARE" - @ WRITE_ FILE "[---.lib]libmesagl.exe/SHARE" - @ write file "sys$library:decw$xmulibshr.exe/share" - @ WRITE_ FILE "SYS$SHARE:DECW$XEXTLIBSHR/SHARE" - @ WRITE_ FILE "SYS$SHARE:DECW$XLIBSHR/SHARE" - @ CLOSE_ FILE - @ WRITE_ SYS$OUTPUT " generating mesagl.map ..." - @ CXXLINK_/NODEB/NOSHARE/NOEXE/MAP=mesagl.map/FULL mesagl1.opt/OPT - @ WRITE_ SYS$OUTPUT " analyzing mesagl.map ..." - @ @[---.vms]ANALYZE_MAP.COM mesagl.map mesagl.opt - @ WRITE_ SYS$OUTPUT " linking $(GLUT_SHAR) ..." - @ CXXLINK_/NODEB/SHARE=$(GLUT_SHAR)/MAP=mesagl.map/FULL mesagl1.opt/opt,mesagl.opt/opt - @ rename $(GLUT_SHAR)* $(LIBDIR) -.endif - -clean : - delete *.obj;* - purge - -include mms_depend. - -glut_8x13.obj : glut_8x13.c -glut_9x15.obj : glut_9x15.c -glut_bitmap.obj : glut_bitmap.c -glut_bwidth.obj : glut_bwidth.c -glut_cindex.obj : glut_cindex.c -glut_cmap.obj : glut_cmap.c -glut_cursor.obj : glut_cursor.c -glut_dials.obj : glut_dials.c -glut_dstr.obj : glut_dstr.c -glut_event.obj : glut_event.c -glut_ext.obj : glut_ext.c -glut_fullscrn.obj : glut_fullscrn.c -glut_gamemode.obj : glut_gamemode.c -glut_get.obj : glut_get.c -glut_glxext.obj : glut_glxext.c -glut_hel10.obj : glut_hel10.c -glut_hel12.obj : glut_hel12.c -glut_hel18.obj : glut_hel18.c -glut_init.obj : glut_init.c -glut_input.obj : glut_input.c -glut_joy.obj : glut_joy.c -glut_key.obj : glut_key.c -glut_keyctrl.obj : glut_keyctrl.c -glut_keyup.obj : glut_keyup.c -glut_menu.obj : glut_menu.c -glut_menu2.obj : glut_menu2.c -glut_mesa.obj : glut_mesa.c -glut_modifier.obj : glut_modifier.c -glut_mroman.obj : glut_mroman.c -glut_overlay.obj : glut_overlay.c -glut_roman.obj : glut_roman.c -glut_shapes.obj : glut_shapes.c -glut_space.obj : glut_space.c -glut_stroke.obj : glut_stroke.c -glut_swap.obj : glut_swap.c -glut_swidth.obj : glut_swidth.c -glut_tablet.obj : glut_tablet.c -glut_teapot.obj : glut_teapot.c -glut_tr10.obj : glut_tr10.c -glut_tr24.obj : glut_tr24.c -glut_util.obj : glut_util.c -glut_vidresize.obj : glut_vidresize.c -glut_warp.obj : glut_warp.c -glut_win.obj : glut_win.c -glut_winmisc.obj : glut_winmisc.c -layerutil.obj : layerutil.c diff --git a/src/glut/glx/glut_event.c b/src/glut/glx/glut_event.c index b5df7b2311..cf34e05ca9 100644 --- a/src/glut/glx/glut_event.c +++ b/src/glut/glx/glut_event.c @@ -1321,7 +1321,12 @@ processWindowWorkList(GLUTwindow * window) is where the finish works gets queued for indirect contexts. */ __glutSetWindow(window); - glFinish(); +#if !defined(_WIN32) + if (!window->isDirect) +#endif + { + glFinish(); + } } if (workMask & GLUT_DEBUG_WORK) { __glutSetWindow(window); diff --git a/src/glut/glx/mms_depend b/src/glut/glx/mms_depend deleted file mode 100644 index 98f87c29e2..0000000000 --- a/src/glut/glx/mms_depend +++ /dev/null @@ -1,72 +0,0 @@ -# DO NOT DELETE - -glut_8x13.obj : glutbitmap.h [---.include.gl]gl.h -glut_9x15.obj : glutbitmap.h [---.include.gl]gl.h -glut_bitmap.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_bitmap.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_bitmap.obj : glutbitmap.h -glut_bwidth.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_bwidth.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_bwidth.obj : glutbitmap.h -glut_cindex.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_cindex.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_cindex.obj : layerutil.h -glut_cursor.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_cursor.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_dials.obj : glutint.h [---.include.gl]glx.h [---.include.gl]gl.h -glut_dials.obj : [---.include.gl]xmesa.h [---.include.gl]glut.h [---.include.gl]glu.h -glut_dstr.obj : glutint.h [---.include.gl]glx.h [---.include.gl]gl.h -glut_dstr.obj : [---.include.gl]xmesa.h [---.include.gl]glut.h [---.include.gl]glu.h -glut_event.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_event.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_ext.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_ext.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_fullscrn.obj : glutint.h [---.include.gl]glx.h [---.include.gl]gl.h -glut_fullscrn.obj : [---.include.gl]xmesa.h [---.include.gl]glut.h -glut_fullscrn.obj : [---.include.gl]glu.h -glut_get.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_get.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_hel10.obj : glutbitmap.h [---.include.gl]gl.h -glut_hel12.obj : glutbitmap.h [---.include.gl]gl.h -glut_hel18.obj : glutbitmap.h [---.include.gl]gl.h -glut_init.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_init.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_menu.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_menu.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h layerutil.h -glut_mesa.obj : glutint.h [---.include.gl]glx.h [---.include.gl]gl.h -glut_mesa.obj : [---.include.gl]xmesa.h [---.include.gl]glut.h [---.include.gl]glu.h -glut_modifier.obj : glutint.h [---.include.gl]glx.h [---.include.gl]gl.h -glut_modifier.obj : [---.include.gl]xmesa.h [---.include.gl]glut.h -glut_modifier.obj : [---.include.gl]glu.h -glut_mroman.obj : glutstroke.h -glut_overlay.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_overlay.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_overlay.obj : layerutil.h -glut_roman.obj : glutstroke.h -glut_shapes.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_shapes.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_space.obj : glutint.h [---.include.gl]glx.h [---.include.gl]gl.h -glut_space.obj : [---.include.gl]xmesa.h [---.include.gl]glut.h [---.include.gl]glu.h -glut_stroke.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_stroke.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_stroke.obj : glutstroke.h -glut_swidth.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_swidth.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_swidth.obj : glutstroke.h -glut_tablet.obj : glutint.h [---.include.gl]glx.h [---.include.gl]gl.h -glut_tablet.obj : [---.include.gl]xmesa.h [---.include.gl]glut.h [---.include.gl]glu.h -glut_teapot.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_tr10.obj : glutbitmap.h [---.include.gl]gl.h -glut_tr24.obj : glutbitmap.h [---.include.gl]gl.h -glut_util.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_util.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_vidresize.obj : [---.include.gl]glx.h [---.include.gl]gl.h -glut_vidresize.obj : [---.include.gl]xmesa.h glutint.h [---.include.gl]glut.h -glut_vidresize.obj : [---.include.gl]glu.h -glut_warp.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_warp.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_win.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_win.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -glut_winmisc.obj : [---.include.gl]glut.h [---.include.gl]gl.h [---.include.gl]glu.h -glut_winmisc.obj : glutint.h [---.include.gl]glx.h [---.include.gl]xmesa.h -layerutil.obj : layerutil.h diff --git a/src/glut/mini/Makefile b/src/glut/mini/Makefile index e47d09edb7..7e7afc3533 100644 --- a/src/glut/mini/Makefile +++ b/src/glut/mini/Makefile @@ -64,6 +64,7 @@ $(TOP)/$(LIB_DIR)/$(GLUT_LIB_NAME): depend $(OBJECTS) # Run 'make -f Makefile.solo dep' to update the dependencies if you change # what's included by any source file. depend: $(SOURCES) + rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(SOURCES) > /dev/null diff --git a/src/glw/Makefile b/src/glw/Makefile index 753c4b74d4..c2e8e3046b 100644 --- a/src/glw/Makefile +++ b/src/glw/Makefile @@ -61,6 +61,7 @@ $(TOP)/$(LIB_DIR)/$(GLW_LIB_NAME): $(OBJECTS) # by any source file. # depend: $(GLW_SOURCES) + rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) -I$(TOP)/include $(GLW_SOURCES) \ $(X11_INCLUDES) > /dev/null diff --git a/src/glx/mini/Makefile b/src/glx/mini/Makefile index aabc1d2be3..6b5a3c76d7 100644 --- a/src/glx/mini/Makefile +++ b/src/glx/mini/Makefile @@ -65,6 +65,7 @@ drmtest: xf86drm.o drmtest.o depend: $(C_SOURCES) $(ASM_SOURCES) + rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(ASM_SOURCES) \ > /dev/null diff --git a/src/glx/x11/Makefile b/src/glx/x11/Makefile index 419cb2853b..86d84d4b9f 100644 --- a/src/glx/x11/Makefile +++ b/src/glx/x11/Makefile @@ -39,9 +39,9 @@ SOURCES = \ dri2_glx.c \ dri2.c -include $(TOP)/src/mesa/sources +include $(TOP)/src/mesa/sources.mak -MESA_ASM_API = $(addprefix $(TOP)/src/mesa/, $(ASM_API)) +MESA_GLAPI_ASM_SOURCES = $(addprefix $(TOP)/src/mesa/, $(GLAPI_ASM_SOURCES)) MESA_GLAPI_SOURCES = $(addprefix $(TOP)/src/mesa/, $(GLAPI_SOURCES)) MESA_GLAPI_OBJECTS = $(addprefix $(TOP)/src/mesa/, $(GLAPI_OBJECTS)) @@ -77,10 +77,11 @@ $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(OBJECTS) Makefile $(GL_LIB_DEPS) $(OBJECTS) -depend: $(SOURCES) $(MESA_GLAPI_SOURCES) $(MESA_ASM_API) Makefile +depend: $(SOURCES) $(MESA_GLAPI_SOURCES) $(MESA_GLAPI_ASM_SOURCES) Makefile + rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(SOURCES) \ - $(MESA_GLAPI_SOURCES) $(MESA_ASM_API) + $(MESA_GLAPI_SOURCES) $(MESA_GLAPI_ASM_SOURCES) # Emacs tags diff --git a/src/glx/x11/glxclient.h b/src/glx/x11/glxclient.h index fe2f540e0f..80f28332b7 100644 --- a/src/glx/x11/glxclient.h +++ b/src/glx/x11/glxclient.h @@ -605,6 +605,8 @@ extern void __glXSendLargeCommand(__GLXcontext *, const GLvoid *, GLint, /* Initialize the GLX extension for dpy */ extern __GLXdisplayPrivate *__glXInitialize(Display*); +extern void __glXPreferEGL(int state); + /************************************************************************/ extern int __glXDebug; diff --git a/src/glx/x11/indirect.c b/src/glx/x11/indirect.c index 3228e2d6fc..82e1b2edcc 100644 --- a/src/glx/x11/indirect.c +++ b/src/glx/x11/indirect.c @@ -7649,26 +7649,6 @@ __indirect_glGetProgramivARB(GLenum target, GLenum pname, GLint * params) #define X_GLrop_ProgramEnvParameter4dvARB 4185 void -__indirect_glProgramEnvParameter4dARB(GLenum target, GLuint index, GLdouble x, - GLdouble y, GLdouble z, GLdouble w) -{ - __GLXcontext *const gc = __glXGetCurrentContext(); - const GLuint cmdlen = 44; - emit_header(gc->pc, X_GLrop_ProgramEnvParameter4dvARB, cmdlen); - (void) memcpy((void *) (gc->pc + 4), (void *) (&target), 4); - (void) memcpy((void *) (gc->pc + 8), (void *) (&index), 4); - (void) memcpy((void *) (gc->pc + 12), (void *) (&x), 8); - (void) memcpy((void *) (gc->pc + 20), (void *) (&y), 8); - (void) memcpy((void *) (gc->pc + 28), (void *) (&z), 8); - (void) memcpy((void *) (gc->pc + 36), (void *) (&w), 8); - gc->pc += cmdlen; - if (__builtin_expect(gc->pc > gc->limit, 0)) { - (void) __glXFlushRenderBuffer(gc, gc->pc); - } -} - -#define X_GLrop_ProgramEnvParameter4dvARB 4185 -void __indirect_glProgramEnvParameter4dvARB(GLenum target, GLuint index, const GLdouble * params) { @@ -7686,26 +7666,6 @@ __indirect_glProgramEnvParameter4dvARB(GLenum target, GLuint index, #define X_GLrop_ProgramEnvParameter4fvARB 4184 void -__indirect_glProgramEnvParameter4fARB(GLenum target, GLuint index, GLfloat x, - GLfloat y, GLfloat z, GLfloat w) -{ - __GLXcontext *const gc = __glXGetCurrentContext(); - const GLuint cmdlen = 28; - emit_header(gc->pc, X_GLrop_ProgramEnvParameter4fvARB, cmdlen); - (void) memcpy((void *) (gc->pc + 4), (void *) (&target), 4); - (void) memcpy((void *) (gc->pc + 8), (void *) (&index), 4); - (void) memcpy((void *) (gc->pc + 12), (void *) (&x), 4); - (void) memcpy((void *) (gc->pc + 16), (void *) (&y), 4); - (void) memcpy((void *) (gc->pc + 20), (void *) (&z), 4); - (void) memcpy((void *) (gc->pc + 24), (void *) (&w), 4); - gc->pc += cmdlen; - if (__builtin_expect(gc->pc > gc->limit, 0)) { - (void) __glXFlushRenderBuffer(gc, gc->pc); - } -} - -#define X_GLrop_ProgramEnvParameter4fvARB 4184 -void __indirect_glProgramEnvParameter4fvARB(GLenum target, GLuint index, const GLfloat * params) { diff --git a/src/glx/x11/indirect.h b/src/glx/x11/indirect.h index 0719a1b302..f8c88b36bb 100644 --- a/src/glx/x11/indirect.h +++ b/src/glx/x11/indirect.h @@ -517,9 +517,7 @@ extern HIDDEN void __indirect_glGetProgramivARB(GLenum target, GLenum pname, GLi extern HIDDEN void __indirect_glGetVertexAttribdvARB(GLuint index, GLenum pname, GLdouble * params); extern HIDDEN void __indirect_glGetVertexAttribfvARB(GLuint index, GLenum pname, GLfloat * params); extern HIDDEN void __indirect_glGetVertexAttribivARB(GLuint index, GLenum pname, GLint * params); -extern HIDDEN void __indirect_glProgramEnvParameter4dARB(GLenum target, GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w); extern HIDDEN void __indirect_glProgramEnvParameter4dvARB(GLenum target, GLuint index, const GLdouble * params); -extern HIDDEN void __indirect_glProgramEnvParameter4fARB(GLenum target, GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w); extern HIDDEN void __indirect_glProgramEnvParameter4fvARB(GLenum target, GLuint index, const GLfloat * params); extern HIDDEN void __indirect_glProgramLocalParameter4dARB(GLenum target, GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w); extern HIDDEN void __indirect_glProgramLocalParameter4dvARB(GLenum target, GLuint index, const GLdouble * params); diff --git a/src/glx/x11/indirect_init.c b/src/glx/x11/indirect_init.c index 852fe712c6..479184337c 100644 --- a/src/glx/x11/indirect_init.c +++ b/src/glx/x11/indirect_init.c @@ -526,9 +526,7 @@ __GLapi * __glXNewIndirectAPI( void ) glAPI->GetVertexAttribdvARB = __indirect_glGetVertexAttribdvARB; glAPI->GetVertexAttribfvARB = __indirect_glGetVertexAttribfvARB; glAPI->GetVertexAttribivARB = __indirect_glGetVertexAttribivARB; - glAPI->ProgramEnvParameter4dARB = __indirect_glProgramEnvParameter4dARB; glAPI->ProgramEnvParameter4dvARB = __indirect_glProgramEnvParameter4dvARB; - glAPI->ProgramEnvParameter4fARB = __indirect_glProgramEnvParameter4fARB; glAPI->ProgramEnvParameter4fvARB = __indirect_glProgramEnvParameter4fvARB; glAPI->ProgramLocalParameter4dARB = __indirect_glProgramLocalParameter4dARB; glAPI->ProgramLocalParameter4dvARB = __indirect_glProgramLocalParameter4dvARB; diff --git a/src/mesa/Makefile b/src/mesa/Makefile index ba65ce695f..2fba2e153b 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -3,8 +3,7 @@ TOP = ../.. include $(TOP)/configs/current -include sources - +include sources.mak .SUFFIXES : .cpp @@ -24,6 +23,9 @@ include sources default: depend asm_subdirs libmesa.a libglapi.a driver_subdirs +# Default: build dependencies, then asm_subdirs, then convenience +# libs (.a) and finally the device drivers: +default: depend asm_subdirs libmesa.a $(GLAPI_LIB) driver_subdirs ###################################################################### # Helper libraries used by many drivers: @@ -36,6 +38,13 @@ libmesa.a: $(MESA_OBJECTS) libglapi.a: $(GLAPI_OBJECTS) @ $(MKLIB) -o glapi -static $(GLAPI_OBJECTS) +# Make archive of gl* API dispatcher functions only +$(GLAPI_LIB): $(GLAPI_OBJECTS) + @if [ "${WINDOW_SYSTEM}" = "dri" ] ; then \ + touch libglapi.a ; \ + else \ + $(TOP)/bin/mklib -o glapi -static $(GLAPI_OBJECTS) ; \ + fi ###################################################################### # Device drivers diff --git a/src/mesa/Makefile.DJ b/src/mesa/Makefile.DJ deleted file mode 100644 index 06a13fb1ab..0000000000 --- a/src/mesa/Makefile.DJ +++ /dev/null @@ -1,166 +0,0 @@ -# Mesa 3-D graphics library -# Version: 5.1 -# -# Copyright (C) 1999-2003 Brian Paul All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN -# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -# DOS/DJGPP core makefile v1.7 for Mesa -# -# Copyright (C) 2002 - Daniel Borca -# Email : dborca@users.sourceforge.net -# Web : http://www.geocities.com/dborca - - -# -# Available options: -# -# Environment variables: -# CFLAGS -# -# GLIDE path to Glide3 SDK; used with FX. -# default = $(TOP)/glide3 -# FX=1 build for 3dfx Glide3. Note that this disables -# compilation of most DMesa code and requires fxMesa. -# As a consequence, you'll need the DJGPP Glide3 -# library to build any application. -# default = no -# X86=1 optimize for x86 (if possible, use MMX, SSE, 3DNow). -# default = no -# -# Targets: -# all: build GL -# clean: remove object files -# - - - -.PHONY: all clean -.INTERMEDIATE: x86/gen_matypes.exe - -TOP = ../.. -GLIDE ?= $(TOP)/glide3 -LIBDIR = $(TOP)/lib -GL_LIB = libgl.a -GL_DXE = gl.dxe -GL_IMP = libigl.a - -export LD_LIBRARY_PATH := $(LD_LIBRARY_PATH);$(LIBDIR);$(GLIDE)/lib - -CC = gcc -CFLAGS += $(INCLUDE_DIRS) -CFLAGS += -DUSE_EXTERNAL_DXTN_LIB=1 -ifeq ($(FX),1) -CFLAGS += -D__DOS__ -CFLAGS += -I$(GLIDE)/include -DFX -LIBNAME = "Mesa/FX DJGPP" -else -LIBNAME = "Mesa DJGPP" -endif - -AR = ar -ARFLAGS = crus - -HAVEDXE3 = $(wildcard $(DJDIR)/bin/dxe3gen.exe) - -ifeq ($(wildcard $(addsuffix /rm.exe,$(subst ;, ,$(PATH)))),) -UNLINK = del $(subst /,\,$(1)) -else -UNLINK = $(RM) $(1) -endif - -include sources - -ifeq ($(X86),1) -CFLAGS += -DUSE_X86_ASM -CFLAGS += -DUSE_MMX_ASM -CFLAGS += -DUSE_SSE_ASM -CFLAGS += -DUSE_3DNOW_ASM -X86_SOURCES += $(X86_API) -else -X86_SOURCES = -endif - -DRIVER_SOURCES = \ - drivers/dos/dmesa.c -ifeq ($(FX),1) -DRIVER_SOURCES += \ - $(GLIDE_DRIVER_SOURCES) -else -DRIVER_SOURCES += \ - $(OSMESA_DRIVER_SOURCES) \ - drivers/dos/video.c \ - drivers/dos/vesa.c \ - drivers/dos/blit.S \ - drivers/dos/vga.c \ - drivers/dos/null.c \ - drivers/dos/dpmi.c -endif - -SOURCES = $(CORE_SOURCES) $(X86_SOURCES) $(COMMON_DRIVER_SOURCES) $(DRIVER_SOURCES) - -OBJECTS = $(addsuffix .o,$(basename $(SOURCES))) - -X86_OBJECTS = $(addsuffix .o,$(basename $(X86_SOURCES))) - -.c.o: - $(CC) -o $@ $(CFLAGS) -c $< -.S.o: - $(CC) -o $@ $(CFLAGS) -c $< -.s.o: - $(CC) -o $@ $(CFLAGS) -x assembler-with-cpp -c $< - -all: $(LIBDIR)/$(GL_LIB) $(LIBDIR)/$(GL_DXE) $(LIBDIR)/$(GL_IMP) - -$(LIBDIR)/$(GL_LIB): $(OBJECTS) - $(AR) $(ARFLAGS) $@ $^ - -$(LIBDIR)/$(GL_DXE) $(LIBDIR)/$(GL_IMP): $(OBJECTS) -ifeq ($(HAVEDXE3),) - $(warning Missing DXE3 package... Skipping $(GL_DXE)) -else -ifeq ($(FX),1) - -dxe3gen -o $(LIBDIR)/$(GL_DXE) -Y $(LIBDIR)/$(GL_IMP) -D $(LIBNAME) -E _gl -E _DMesa -P glide3x.dxe -U $^ -else - -dxe3gen -o $(LIBDIR)/$(GL_DXE) -Y $(LIBDIR)/$(GL_IMP) -D $(LIBNAME) -E _gl -E _DMesa -U $^ -endif -endif - -$(X86_OBJECTS): x86/matypes.h - -x86/matypes.h: x86/gen_matypes.exe - $< > $@ - -x86/gen_matypes.exe: x86/gen_matypes.c - $(CC) -o $@ $(CFLAGS) -s $< - -clean: - -$(call UNLINK,array_cache/*.o) - -$(call UNLINK,glapi/*.o) - -$(call UNLINK,main/*.o) - -$(call UNLINK,math/*.o) - -$(call UNLINK,shader/*.o) - -$(call UNLINK,sparc/*.o) - -$(call UNLINK,ppc/*.o) - -$(call UNLINK,swrast/*.o) - -$(call UNLINK,swrast_setup/*.o) - -$(call UNLINK,tnl/*.o) - -$(call UNLINK,x86/*.o) - -$(call UNLINK,drivers/common/*.o) - -$(call UNLINK,drivers/dos/*.o) - -$(call UNLINK,drivers/glide/*.o) diff --git a/src/mesa/Makefile.mgw b/src/mesa/Makefile.mgw index 886b344ec9..097c390a83 100644 --- a/src/mesa/Makefile.mgw +++ b/src/mesa/Makefile.mgw @@ -124,7 +124,7 @@ ifneq ($(wildcard $(addsuffix /rm,$(subst :, ,$(PATH)))),) UNLINK = $(RM) $(1) endif -include sources +include sources.mak CFLAGS += $(INCLUDE_DIRS) diff --git a/src/mesa/SConscript b/src/mesa/SConscript new file mode 100644 index 0000000000..89b98b37ab --- /dev/null +++ b/src/mesa/SConscript @@ -0,0 +1,339 @@ +####################################################################### +# SConscript for Mesa + + +Import('*') + +if env['platform'] != 'winddk': + + env = env.Clone() + + env.Append(CPPPATH = [ + '#/src/mesa', + ]) + + if gcc: + env.Append(CFLAGS = [ + '-std=c99', + ]) + + # + # Source files + # + + main_sources = [ + 'main/api_arrayelt.c', + 'main/api_exec.c', + 'main/api_loopback.c', + 'main/api_noop.c', + 'main/api_validate.c', + 'main/accum.c', + 'main/attrib.c', + 'main/arrayobj.c', + 'main/blend.c', + 'main/bufferobj.c', + 'main/buffers.c', + 'main/clear.c', + 'main/clip.c', + 'main/colortab.c', + 'main/context.c', + 'main/convolve.c', + 'main/debug.c', + 'main/depth.c', + 'main/depthstencil.c', + 'main/dlist.c', + 'main/drawpix.c', + 'main/enable.c', + 'main/enums.c', + 'main/eval.c', + 'main/execmem.c', + 'main/extensions.c', + 'main/fbobject.c', + 'main/feedback.c', + 'main/ffvertex_prog.c', + 'main/fog.c', + 'main/framebuffer.c', + 'main/get.c', + 'main/getstring.c', + 'main/hash.c', + 'main/hint.c', + 'main/histogram.c', + 'main/image.c', + 'main/imports.c', + 'main/light.c', + 'main/lines.c', + 'main/matrix.c', + 'main/mipmap.c', + 'main/mm.c', + 'main/multisample.c', + 'main/pixel.c', + 'main/pixelstore.c', + 'main/points.c', + 'main/polygon.c', + 'main/queryobj.c', + 'main/rastpos.c', + 'main/rbadaptors.c', + 'main/readpix.c', + 'main/renderbuffer.c', + 'main/scissor.c', + 'main/shaders.c', + 'main/state.c', + 'main/stencil.c', + 'main/texcompress.c', + 'main/texcompress_s3tc.c', + 'main/texcompress_fxt1.c', + 'main/texenv.c', + 'main/texenvprogram.c', + 'main/texformat.c', + 'main/texgen.c', + 'main/teximage.c', + 'main/texobj.c', + 'main/texparam.c', + 'main/texrender.c', + 'main/texstate.c', + 'main/texstore.c', + 'main/varray.c', + 'main/vtxfmt.c', + ] + + math_sources = [ + 'math/m_debug_clip.c', + 'math/m_debug_norm.c', + 'math/m_debug_xform.c', + 'math/m_eval.c', + 'math/m_matrix.c', + 'math/m_translate.c', + 'math/m_vector.c', + 'math/m_xform.c', + ] + + vbo_sources = [ + 'vbo/vbo_context.c', + 'vbo/vbo_exec.c', + 'vbo/vbo_exec_api.c', + 'vbo/vbo_exec_array.c', + 'vbo/vbo_exec_draw.c', + 'vbo/vbo_exec_eval.c', + 'vbo/vbo_rebase.c', + 'vbo/vbo_split.c', + 'vbo/vbo_split_copy.c', + 'vbo/vbo_split_inplace.c', + 'vbo/vbo_save.c', + 'vbo/vbo_save_api.c', + 'vbo/vbo_save_draw.c', + 'vbo/vbo_save_loopback.c', + ] + + vf_sources = [ + 'vf/vf.c', + 'vf/vf_generic.c', + 'vf/vf_sse.c', + ] + + statetracker_sources = [ + 'state_tracker/st_atom.c', + 'state_tracker/st_atom_blend.c', + 'state_tracker/st_atom_clip.c', + 'state_tracker/st_atom_constbuf.c', + 'state_tracker/st_atom_depth.c', + 'state_tracker/st_atom_framebuffer.c', + 'state_tracker/st_atom_pixeltransfer.c', + 'state_tracker/st_atom_sampler.c', + 'state_tracker/st_atom_scissor.c', + 'state_tracker/st_atom_shader.c', + 'state_tracker/st_atom_rasterizer.c', + 'state_tracker/st_atom_stipple.c', + 'state_tracker/st_atom_texture.c', + 'state_tracker/st_atom_viewport.c', + 'state_tracker/st_cb_accum.c', + 'state_tracker/st_cb_bitmap.c', + 'state_tracker/st_cb_blit.c', + 'state_tracker/st_cb_bufferobjects.c', + 'state_tracker/st_cb_clear.c', + 'state_tracker/st_cb_flush.c', + 'state_tracker/st_cb_drawpixels.c', + 'state_tracker/st_cb_fbo.c', + 'state_tracker/st_cb_get.c', + 'state_tracker/st_cb_feedback.c', + 'state_tracker/st_cb_program.c', + 'state_tracker/st_cb_queryobj.c', + 'state_tracker/st_cb_rasterpos.c', + 'state_tracker/st_cb_readpixels.c', + 'state_tracker/st_cb_strings.c', + 'state_tracker/st_cb_texture.c', + 'state_tracker/st_context.c', + 'state_tracker/st_debug.c', + 'state_tracker/st_draw.c', + 'state_tracker/st_extensions.c', + 'state_tracker/st_format.c', + 'state_tracker/st_framebuffer.c', + 'state_tracker/st_gen_mipmap.c', + 'state_tracker/st_mesa_to_tgsi.c', + 'state_tracker/st_program.c', + 'state_tracker/st_texture.c', + ] + + shader_sources = [ + 'shader/arbprogparse.c', + 'shader/arbprogram.c', + 'shader/atifragshader.c', + 'shader/grammar/grammar_mesa.c', + 'shader/nvfragparse.c', + 'shader/nvprogram.c', + 'shader/nvvertparse.c', + 'shader/program.c', + 'shader/prog_cache.c', + 'shader/prog_debug.c', + 'shader/prog_execute.c', + 'shader/prog_instruction.c', + 'shader/prog_parameter.c', + 'shader/prog_print.c', + 'shader/prog_statevars.c', + 'shader/prog_uniform.c', + 'shader/programopt.c', + 'shader/shader_api.c', + ] + + slang_sources = [ + 'shader/slang/slang_builtin.c', + 'shader/slang/slang_codegen.c', + 'shader/slang/slang_compile.c', + 'shader/slang/slang_compile_function.c', + 'shader/slang/slang_compile_operation.c', + 'shader/slang/slang_compile_struct.c', + 'shader/slang/slang_compile_variable.c', + 'shader/slang/slang_emit.c', + 'shader/slang/slang_ir.c', + 'shader/slang/slang_label.c', + 'shader/slang/slang_library_noise.c', + 'shader/slang/slang_link.c', + 'shader/slang/slang_log.c', + 'shader/slang/slang_mem.c', + 'shader/slang/slang_preprocess.c', + 'shader/slang/slang_print.c', + 'shader/slang/slang_simplify.c', + 'shader/slang/slang_storage.c', + 'shader/slang/slang_typeinfo.c', + 'shader/slang/slang_vartable.c', + 'shader/slang/slang_utility.c', + ] + + mesa_sources = ( + main_sources + + math_sources + + vbo_sources + + vf_sources + + statetracker_sources + + shader_sources + + slang_sources + ) + + glapi_sources = [ + 'main/dispatch.c', + 'glapi/glapi.c', + 'glapi/glapi_getproc.c', + 'glapi/glthread.c', + ] + + # + # Assembly sources + # + if gcc and env['machine'] == 'x86': + env.Append(CPPDEFINES = [ + 'USE_X86_ASM', + 'USE_MMX_ASM', + 'USE_3DNOW_ASM', + 'USE_SSE_ASM', + ]) + mesa_sources += [ + 'x86/common_x86.c', + 'x86/x86.c', + 'x86/3dnow.c', + 'x86/sse.c', + 'x86/common_x86_asm.S', + 'x86/x86_xform2.S', + 'x86/x86_xform3.S', + 'x86/x86_xform4.S', + 'x86/x86_cliptest.S', + 'x86/mmx_blend.S', + 'x86/3dnow_xform1.S', + 'x86/3dnow_xform2.S', + 'x86/3dnow_xform3.S', + 'x86/3dnow_xform4.S', + 'x86/3dnow_normal.S', + 'x86/sse_xform1.S', + 'x86/sse_xform2.S', + 'x86/sse_xform3.S', + 'x86/sse_xform4.S', + 'x86/sse_normal.S', + 'x86/read_rgba_span_x86.S', + ] + glapi_sources += [ + 'x86/glapi_x86.S', + ] + elif gcc and env['machine'] == 'x86_64': + env.Append(CPPDEFINES = [ + 'USE_X86_64_ASM', + ]) + mesa_sources += [ + 'x86-64/x86-64.c', + 'x86-64/xform4.S', + ] + glapi_sources += [ + 'x86-64/glapi_x86-64.S' + ] + elif gcc and env['machine'] == 'ppc': + env.Append(CPPDEFINES = [ + 'USE_PPC_ASM', + 'USE_VMX_ASM', + ]) + mesa_sources += [ + 'ppc/common_ppc.c', + ] + glapi_sources += [ + ] + elif gcc and env['machine'] == 'sparc': + mesa_sources += [ + 'sparc/sparc.c', + 'sparc/clip.S', + 'sparc/norm.S', + 'sparc/xform.S', + ] + glapi_sources += [ + 'sparc/glapi_sparc.S' + ] + else: + pass + + # Generate matypes.h + if gcc and env['machine'] in ('x86', 'x86_64'): + # See http://www.scons.org/wiki/UsingCodeGenerators + gen_matypes = env.Program( + target = 'gen_matypes', + source = 'x86/gen_matypes.c', + ) + matypes = env.Command( + 'matypes.h', + gen_matypes, + gen_matypes[0].abspath + ' > $TARGET', + ) + # Add the dir containing the generated header (somewhere inside the + # build dir) to the include path + env.Append(CPPPATH = [matypes[0].dir]) + + # + # Libraries + # + + mesa = env.ConvenienceLibrary( + target = 'mesa', + source = mesa_sources, + ) + Export('mesa') + + if not dri: + glapi = env.ConvenienceLibrary( + target = 'glapi', + source = glapi_sources, + ) + Export('glapi') diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template index 2dc3664cc6..2fa36bab3f 100644 --- a/src/mesa/drivers/dri/Makefile.template +++ b/src/mesa/drivers/dri/Makefile.template @@ -2,13 +2,16 @@ MESA_MODULES = $(TOP)/src/mesa/libmesa.a -COMMON_SOURCES = \ +COMMON_GALLIUM_SOURCES = \ ../common/utils.c \ - ../common/texmem.c \ ../common/vblank.c \ ../common/dri_util.c \ - ../common/xmlconfig.c \ - ../common/drirenderbuffer.c + ../common/xmlconfig.c + +COMMON_SOURCES = $(COMMON_GALLIUM_SOURCES) \ + ../../common/driverfuncs.c \ + ../common/texmem.c \ + ../common/drirenderbuffer.c ifeq ($(WINDOW_SYSTEM),dri) WINOBJ= @@ -59,9 +62,9 @@ SHARED_INCLUDES = \ default: symlinks depend $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME) -$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile $(TOP)/src/mesa/drivers/dri/Makefile.template +$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(PIPE_DRIVERS) $(WINOBJ) Makefile $(TOP)/src/mesa/drivers/dri/Makefile.template $(MKLIB) -o $@ -noprefix -linker '$(CC)' -ldflags '$(LDFLAGS)' \ - $(OBJECTS) $(MESA_MODULES) $(WINOBJ) $(DRI_LIB_DEPS) + $(OBJECTS) $(PIPE_DRIVERS) $(MESA_MODULES) $(WINOBJ) $(DRI_LIB_DEPS) $(TOP)/$(LIB_DIR)/$(LIBNAME): $(LIBNAME) @@ -69,6 +72,7 @@ $(TOP)/$(LIB_DIR)/$(LIBNAME): $(LIBNAME) depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS) + rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) \ $(ASM_SOURCES) diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index ceedd914fb..a16cb504c7 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -315,9 +315,6 @@ static void driSwapBuffers(__DRIdrawable *dPriv) { __DRIscreen *psp = dPriv->driScreenPriv; - if (!dPriv->numClipRects) - return; - psp->DriverAPI.SwapBuffers(dPriv); driReportDamage(dPriv, dPriv->pClipRects, dPriv->numClipRects); diff --git a/src/mesa/drivers/dri/common/dri_util.h b/src/mesa/drivers/dri/common/dri_util.h index c0e1bea5e0..0feb57b3c6 100644 --- a/src/mesa/drivers/dri/common/dri_util.h +++ b/src/mesa/drivers/dri/common/dri_util.h @@ -1,25 +1,3 @@ -/* $XFree86: xc/lib/GL/dri/dri_util.h,v 1.1 2002/02/22 21:32:52 dawes Exp $ */ -/** - * \file dri_util.h - * DRI utility functions definitions. - * - * This module acts as glue between GLX and the actual hardware driver. A DRI - * driver doesn't really \e have to use any of this - it's optional. But, some - * useful stuff is done here that otherwise would have to be duplicated in most - * drivers. - * - * Basically, these utility functions take care of some of the dirty details of - * screen initialization, context creation, context binding, DRM setup, etc. - * - * These functions are compiled into each DRI driver so libGL.so knows nothing - * about them. - * - * \sa dri_util.c. - * - * \author Kevin E. Martin <kevin@precisioninsight.com> - * \author Brian Paul <brian@precisioninsight.com> - */ - /* * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. * All Rights Reserved. @@ -45,6 +23,26 @@ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/** + * \file dri_util.h + * DRI utility functions definitions. + * + * This module acts as glue between GLX and the actual hardware driver. A DRI + * driver doesn't really \e have to use any of this - it's optional. But, some + * useful stuff is done here that otherwise would have to be duplicated in most + * drivers. + * + * Basically, these utility functions take care of some of the dirty details of + * screen initialization, context creation, context binding, DRM setup, etc. + * + * These functions are compiled into each DRI driver so libGL.so knows nothing + * about them. + * + * \sa dri_util.c. + * + * \author Kevin E. Martin <kevin@precisioninsight.com> + * \author Brian Paul <brian@precisioninsight.com> + */ #ifndef _DRI_UTIL_H_ #define _DRI_UTIL_H_ diff --git a/src/mesa/drivers/dri/common/drirenderbuffer.c b/src/mesa/drivers/dri/common/drirenderbuffer.c index b99bf2033b..15af99136c 100644 --- a/src/mesa/drivers/dri/common/drirenderbuffer.c +++ b/src/mesa/drivers/dri/common/drirenderbuffer.c @@ -209,6 +209,8 @@ driUpdateFramebufferSize(GLcontext *ctx, const __DRIdrawablePrivate *dPriv) struct gl_framebuffer *fb = (struct gl_framebuffer *) dPriv->driverPrivate; if (fb && (dPriv->w != fb->Width || dPriv->h != fb->Height)) { ctx->Driver.ResizeBuffers(ctx, fb, dPriv->w, dPriv->h); + /* if the driver needs the hw lock for ResizeBuffers, the drawable + might have changed again by now */ assert(fb->Width == dPriv->w); assert(fb->Height == dPriv->h); } diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index 30c860b96c..2a1ded3871 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -310,8 +310,10 @@ void driInitSingleExtension( GLcontext * ctx, */ offset = _glapi_add_dispatch( functions, parameter_signature ); if (offset == -1) { +#if 0 /* this causes noise with egl */ fprintf(stderr, "DISPATCH ERROR! _glapi_add_dispatch failed " "to add %s!\n", functions[0]); +#endif } else if (ext->functions[i].remap_index != -1) { driDispatchRemapTable[ ext->functions[i].remap_index ] = @@ -504,6 +506,9 @@ GLboolean driClipRectToFramebuffer( const GLframebuffer *buffer, * \c GLX_SWAP_UNDEFINED_OML. See the * GLX_OML_swap_method extension spec for more details. * \param num_db_modes Number of entries in \c db_modes. + * \param msaa_samples Array of msaa sample count. 0 represents a visual + * without a multisample buffer. + * \param num_msaa_modes Number of entries in \c msaa_samples. * \param visType GLX visual type. Usually either \c GLX_TRUE_COLOR or * \c GLX_DIRECT_COLOR. * @@ -523,7 +528,8 @@ __DRIconfig ** driCreateConfigs(GLenum fb_format, GLenum fb_type, const uint8_t * depth_bits, const uint8_t * stencil_bits, unsigned num_depth_stencil_bits, - const GLenum * db_modes, unsigned num_db_modes) + const GLenum * db_modes, unsigned num_db_modes, + const u_int8_t * msaa_samples, unsigned num_msaa_modes) { static const uint8_t bits_table[4][4] = { /* R G B A */ @@ -583,9 +589,7 @@ driCreateConfigs(GLenum fb_format, GLenum fb_type, int index; __DRIconfig **configs, **c; __GLcontextModes *modes; - unsigned i; - unsigned j; - unsigned k; + unsigned i, j, k, h; unsigned num_modes; unsigned num_accum_bits = 2; @@ -658,7 +662,7 @@ driCreateConfigs(GLenum fb_format, GLenum fb_type, break; } - num_modes = num_depth_stencil_bits * num_db_modes * num_accum_bits; + num_modes = num_depth_stencil_bits * num_db_modes * num_accum_bits * num_msaa_modes; configs = _mesa_calloc((num_modes + 1) * sizeof *configs); if (configs == NULL) return NULL; @@ -666,66 +670,72 @@ driCreateConfigs(GLenum fb_format, GLenum fb_type, c = configs; for ( k = 0 ; k < num_depth_stencil_bits ; k++ ) { for ( i = 0 ; i < num_db_modes ; i++ ) { - for ( j = 0 ; j < num_accum_bits ; j++ ) { - *c = _mesa_malloc (sizeof **c); - modes = &(*c)->modes; - c++; - - memset(modes, 0, sizeof *modes); - modes->redBits = bits[0]; - modes->greenBits = bits[1]; - modes->blueBits = bits[2]; - modes->alphaBits = bits[3]; - modes->redMask = masks[0]; - modes->greenMask = masks[1]; - modes->blueMask = masks[2]; - modes->alphaMask = masks[3]; - modes->rgbBits = modes->redBits + modes->greenBits - + modes->blueBits + modes->alphaBits; - - modes->accumRedBits = 16 * j; - modes->accumGreenBits = 16 * j; - modes->accumBlueBits = 16 * j; - modes->accumAlphaBits = (masks[3] != 0) ? 16 * j : 0; - modes->visualRating = (j == 0) ? GLX_NONE : GLX_SLOW_CONFIG; - - modes->stencilBits = stencil_bits[k]; - modes->depthBits = depth_bits[k]; - - modes->transparentPixel = GLX_NONE; - modes->transparentRed = GLX_DONT_CARE; - modes->transparentGreen = GLX_DONT_CARE; - modes->transparentBlue = GLX_DONT_CARE; - modes->transparentAlpha = GLX_DONT_CARE; - modes->transparentIndex = GLX_DONT_CARE; - modes->visualType = GLX_DONT_CARE; - modes->renderType = GLX_RGBA_BIT; - modes->drawableType = GLX_WINDOW_BIT; - modes->rgbMode = GL_TRUE; - - if ( db_modes[i] == GLX_NONE ) { - modes->doubleBufferMode = GL_FALSE; - } - else { - modes->doubleBufferMode = GL_TRUE; - modes->swapMethod = db_modes[i]; - } - - modes->haveAccumBuffer = ((modes->accumRedBits + + for ( h = 0 ; h < num_msaa_modes; h++ ) { + for ( j = 0 ; j < num_accum_bits ; j++ ) { + *c = _mesa_malloc (sizeof **c); + modes = &(*c)->modes; + c++; + + memset(modes, 0, sizeof *modes); + modes->redBits = bits[0]; + modes->greenBits = bits[1]; + modes->blueBits = bits[2]; + modes->alphaBits = bits[3]; + modes->redMask = masks[0]; + modes->greenMask = masks[1]; + modes->blueMask = masks[2]; + modes->alphaMask = masks[3]; + modes->rgbBits = modes->redBits + modes->greenBits + + modes->blueBits + modes->alphaBits; + + modes->accumRedBits = 16 * j; + modes->accumGreenBits = 16 * j; + modes->accumBlueBits = 16 * j; + modes->accumAlphaBits = (masks[3] != 0) ? 16 * j : 0; + modes->visualRating = (j == 0) ? GLX_NONE : GLX_SLOW_CONFIG; + + modes->stencilBits = stencil_bits[k]; + modes->depthBits = depth_bits[k]; + + modes->transparentPixel = GLX_NONE; + modes->transparentRed = GLX_DONT_CARE; + modes->transparentGreen = GLX_DONT_CARE; + modes->transparentBlue = GLX_DONT_CARE; + modes->transparentAlpha = GLX_DONT_CARE; + modes->transparentIndex = GLX_DONT_CARE; + modes->visualType = GLX_DONT_CARE; + modes->renderType = GLX_RGBA_BIT; + modes->drawableType = GLX_WINDOW_BIT; + modes->rgbMode = GL_TRUE; + + if ( db_modes[i] == GLX_NONE ) { + modes->doubleBufferMode = GL_FALSE; + } + else { + modes->doubleBufferMode = GL_TRUE; + modes->swapMethod = db_modes[i]; + } + + modes->samples = msaa_samples[h]; + modes->sampleBuffers = modes->samples ? 1 : 0; + + + modes->haveAccumBuffer = ((modes->accumRedBits + modes->accumGreenBits + modes->accumBlueBits + modes->accumAlphaBits) > 0); - modes->haveDepthBuffer = (modes->depthBits > 0); - modes->haveStencilBuffer = (modes->stencilBits > 0); - - modes->bindToTextureRgb = GL_TRUE; - modes->bindToTextureRgba = GL_TRUE; - modes->bindToMipmapTexture = GL_FALSE; - modes->bindToTextureTargets = modes->rgbMode ? - __DRI_ATTRIB_TEXTURE_1D_BIT | - __DRI_ATTRIB_TEXTURE_2D_BIT | - __DRI_ATTRIB_TEXTURE_RECTANGLE_BIT : - 0; + modes->haveDepthBuffer = (modes->depthBits > 0); + modes->haveStencilBuffer = (modes->stencilBits > 0); + + modes->bindToTextureRgb = GL_TRUE; + modes->bindToTextureRgba = GL_TRUE; + modes->bindToMipmapTexture = GL_FALSE; + modes->bindToTextureTargets = modes->rgbMode ? + __DRI_ATTRIB_TEXTURE_1D_BIT | + __DRI_ATTRIB_TEXTURE_2D_BIT | + __DRI_ATTRIB_TEXTURE_RECTANGLE_BIT : + 0; + } } } } diff --git a/src/mesa/drivers/dri/common/utils.h b/src/mesa/drivers/dri/common/utils.h index 0c974dbff3..4e27bd21a1 100644 --- a/src/mesa/drivers/dri/common/utils.h +++ b/src/mesa/drivers/dri/common/utils.h @@ -131,7 +131,8 @@ extern __DRIconfig ** driCreateConfigs(GLenum fb_format, GLenum fb_type, const uint8_t * depth_bits, const uint8_t * stencil_bits, unsigned num_depth_stencil_bits, - const GLenum * db_modes, unsigned num_db_modes); + const GLenum * db_modes, unsigned num_db_modes, + const uint8_t * msaa_samples, unsigned num_msaa_modes); const __DRIconfig **driConcatConfigs(__DRIconfig **a, __DRIconfig **b); diff --git a/src/mesa/drivers/dri/glcore/Makefile b/src/mesa/drivers/dri/glcore/Makefile new file mode 100644 index 0000000000..ac7e1de928 --- /dev/null +++ b/src/mesa/drivers/dri/glcore/Makefile @@ -0,0 +1,84 @@ +# src/mesa/drivers/dri/glcore/Makefile + +TOP = ../../../../.. +include $(TOP)/configs/current + +LIBNAME = glcore_dri.so + +DRIVER_SOURCES = glcore_driver.c \ + $(TOP)/src/mesa/drivers/common/driverfuncs.c \ + ../common/dri_util.c + +C_SOURCES = \ + $(DRIVER_SOURCES) \ + $(DRI_SOURCES) + + +# Include directories +INCLUDE_DIRS = \ + -I. \ + -I../common \ + -I../dri_client \ + -I../dri_client/imports \ + -Iserver \ + -I$(TOP)/include \ + -I$(DRM_SOURCE_PATH)/shared-core \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/mesa/glapi \ + -I$(TOP)/src/mesa/math \ + -I$(TOP)/src/mesa/transform \ + -I$(TOP)/src/mesa/shader \ + -I$(TOP)/src/mesa/swrast \ + -I$(TOP)/src/mesa/swrast_setup + +# Core Mesa objects +MESA_MODULES = $(TOP)/src/mesa/libmesa.a + +# Libraries that the driver shared lib depends on +LIB_DEPS = -lm -lpthread -lc +# LIB_DEPS = -lGL -lm -lpthread -lc + + +ASM_SOURCES = + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(ASM_SOURCES:.S=.o) + + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $(DEFINES) $< -o $@ + + +##### TARGETS ##### + +default: depend $(TOP)/$(LIB_DIR)/$(LIBNAME) + + +$(TOP)/$(LIB_DIR)/$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(WINOBJ) Makefile + CC="$(CC)" CXX="$(CXX)" $(TOP)/bin/mklib -o $(LIBNAME) -noprefix -install $(TOP)/$(LIB_DIR) \ + $(OBJECTS) $(WINLIB) $(LIB_DEPS) $(WINOBJ) $(MESA_MODULES) + + +depend: $(C_SOURCES) $(ASM_SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDE_DIRS) $(C_SOURCES) $(ASM_SOURCES) \ + > /dev/null + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + + +clean: + -rm -f *.o server/*.o + + +include depend diff --git a/src/mesa/glapi/descrip.mms b/src/mesa/glapi/descrip.mms deleted file mode 100644 index ee96baa6fc..0000000000 --- a/src/mesa/glapi/descrip.mms +++ /dev/null @@ -1,39 +0,0 @@ -# Makefile for core library for VMS -# contributed by Jouk Jansen joukj@hrem.nano.tudelft.nl -# Last revision : 29 September 2008 - -.first - define gl [---.include.gl] - define main [-.main] - -.include [---]mms-config. - -##### MACROS ##### - -VPATH = RCS - -INCDIR = [---.include],[-.main] -LIBDIR = [---.lib] -CFLAGS = /include=($(INCDIR),[])/define=(PTHREADS=1)/name=(as_is,short)/float=ieee/ieee=denorm - -SOURCES = glapi.c glthread.c glapi_getproc.c - -OBJECTS = glapi.obj,glthread.obj,glapi_getproc.obj - -##### RULES ##### - -VERSION=Mesa V3.4 - -##### TARGETS ##### -# Make the library -$(LIBDIR)$(GL_LIB) : $(OBJECTS) - @ library $(LIBDIR)$(GL_LIB) $(OBJECTS) - -clean : - purge - delete *.obj;* - -glapi.obj : glapi.c - -glthread.obj : glthread.c -glapi_getproc.obj : glapi_getproc.c diff --git a/src/mesa/glapi/glapi.c b/src/mesa/glapi/glapi.c index c3ebf60719..9b5144a88b 100644 --- a/src/mesa/glapi/glapi.c +++ b/src/mesa/glapi/glapi.c @@ -98,6 +98,7 @@ _glapi_set_warning_func( _glapi_warning_func func ) static GLboolean warn(void) { +#if !defined(_WIN32_WCE) if ((WarnFlag || getenv("MESA_DEBUG") || getenv("LIBGL_DEBUG")) && warning_func) { return GL_TRUE; @@ -105,6 +106,9 @@ warn(void) else { return GL_FALSE; } +#else + return GL_FALSE; +#endif } diff --git a/src/mesa/glapi/glthread.c b/src/mesa/glapi/glthread.c index b818f4e26e..56ddf7c25a 100644 --- a/src/mesa/glapi/glthread.c +++ b/src/mesa/glapi/glthread.c @@ -25,16 +25,14 @@ /* * XXX There's probably some work to do in order to make this file - * truly reusable outside of Mesa. + * truly reusable outside of Mesa. First, the glheader.h include must go. */ - #ifdef HAVE_DIX_CONFIG_H #include <dix-config.h> #endif -#include <stdlib.h> -#include <stdio.h> +#include "main/glheader.h" #include "glthread.h" diff --git a/src/mesa/glapi/glthread.h b/src/mesa/glapi/glthread.h index e2765cebb1..ad2a63584b 100644 --- a/src/mesa/glapi/glthread.h +++ b/src/mesa/glapi/glthread.h @@ -116,9 +116,49 @@ typedef pthread_mutex_t _glthread_Mutex; #define _glthread_UNLOCK_MUTEX(name) \ (void) pthread_mutex_unlock(&(name)) -#endif /* PTHREADS */ +typedef pthread_cond_t _glthread_Cond; +#define _glthread_DECLARE_STATIC_COND(name) \ + static _glthread_Cond name = PTHREAD_COND_INITIALIZER +#define _glthread_INIT_COND(cond) \ + pthread_cond_init(&(cond), NULL) + +#define _glthread_DESTROY_COND(name) \ + pthread_cond_destroy(&(name)) + +#define _glthread_COND_WAIT(cond, mutex) \ + pthread_cond_wait(&(cond), &(mutex)) + +#define _glthread_COND_SIGNAL(cond) \ + pthread_cond_signal(&(cond)) + +#define _glthread_COND_BROADCAST(cond) \ + pthread_cond_broadcast(&(cond)) + + +#else /* PTHREADS */ + +typedef unsigned int _glthread_Cond; +#define _glthread_DECLARE_STATIC_COND(name) \ +// #warning Condition variables not implemented. + +#define _glthread_INIT_COND(cond) \ + ASSERT(0); + +#define _glthread_DESTROY_COND(name) \ + ASSERT(0); + +#define _glthread_COND_WAIT(cond, mutex) \ + ASSERT(0); + +#define _glthread_COND_SIGNAL(cond) \ + ASSERT(0); + +#define _glthread_COND_BROADCAST(cond) \ + ASSERT(0); + +#endif /* @@ -259,11 +299,11 @@ typedef benaphore _glthread_Mutex; * THREADS not defined */ -typedef int _glthread_TSD; +typedef GLuint _glthread_TSD; -typedef int _glthread_Thread; +typedef GLuint _glthread_Thread; -typedef int _glthread_Mutex; +typedef GLuint _glthread_Mutex; #define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = 0 diff --git a/src/mesa/main/api_validate.c b/src/mesa/main/api_validate.c index 98dfbb105f..bbc5933ab9 100644 --- a/src/mesa/main/api_validate.c +++ b/src/mesa/main/api_validate.c @@ -119,6 +119,12 @@ _mesa_validate_DrawElements(GLcontext *ctx, /* use indices in the buffer object */ GLuint indexBytes; + if (!ctx->Array.ElementArrayBufferObj->Size) { + _mesa_warning(ctx, + "glDrawElements called with empty array elements buffer"); + return GL_FALSE; + } + if (type == GL_UNSIGNED_INT) { indexBytes = count * sizeof(GLuint); } diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 918e87246f..190e6ab564 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -38,6 +38,13 @@ #include "bufferobj.h" +#ifdef FEATURE_OES_mapbuffer +#define DEFAULT_ACCESS GL_WRITE_ONLY; +#else +#define DEFAULT_ACCESS GL_READ_WRITE; +#endif + + /** * Get the buffer object bound to the specified target in a GL context. * @@ -255,7 +262,7 @@ _mesa_initialize_buffer_object( struct gl_buffer_object *obj, obj->RefCount = 1; obj->Name = name; obj->Usage = GL_STATIC_DRAW_ARB; - obj->Access = GL_READ_WRITE_ARB; + obj->Access = DEFAULT_ACCESS; } @@ -664,8 +671,8 @@ _mesa_map_drawpix_pbo(GLcontext *ctx, * \sa _mesa_unmap_bitmap_pbo */ void -_mesa_unmap_drapix_pbo(GLcontext *ctx, - const struct gl_pixelstore_attrib *unpack) +_mesa_unmap_drawpix_pbo(GLcontext *ctx, + const struct gl_pixelstore_attrib *unpack) { if (unpack->BufferObj->Name) { ctx->Driver.UnmapBuffer(ctx, GL_PIXEL_UNPACK_BUFFER_EXT, @@ -1065,7 +1072,7 @@ _mesa_UnmapBufferARB(GLenum target) status = ctx->Driver.UnmapBuffer( ctx, target, bufObj ); } - bufObj->Access = GL_READ_WRITE_ARB; /* initial value, OK? */ + bufObj->Access = DEFAULT_ACCESS; bufObj->Pointer = NULL; return status; diff --git a/src/mesa/main/bufferobj.h b/src/mesa/main/bufferobj.h index 163fe241aa..3c08f0083c 100644 --- a/src/mesa/main/bufferobj.h +++ b/src/mesa/main/bufferobj.h @@ -103,9 +103,8 @@ _mesa_map_drawpix_pbo(GLcontext *ctx, const GLvoid *pixels); extern void -_mesa_unmap_drapix_pbo(GLcontext *ctx, - const struct gl_pixelstore_attrib *unpack); - +_mesa_unmap_drawpix_pbo(GLcontext *ctx, + const struct gl_pixelstore_attrib *unpack); extern void * _mesa_map_readpix_pbo(GLcontext *ctx, diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index b23d2f612b..5dd85de272 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -419,7 +419,7 @@ _mesa_drawbuffers(GLcontext *ctx, GLuint n, const GLenum *buffers, } } - ctx->NewState |= _NEW_COLOR; + ctx->NewState |= _NEW_BUFFERS; } @@ -494,6 +494,7 @@ _mesa_ReadBuffer(GLenum buffer) /* OK, all error checking has been completed now */ _mesa_readbuffer(ctx, buffer, srcBuffer); + ctx->NewState |= _NEW_BUFFERS; /* * Call device driver function. diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index 882e2f224a..3b340c476c 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -176,13 +176,11 @@ /** For GL_ARB_fragment_program */ /*@{*/ #define MAX_FRAGMENT_PROGRAM_ADDRESS_REGS 0 -#define MAX_FRAGMENT_PROGRAM_ALU_INSTRUCTIONS 48 -#define MAX_FRAGMENT_PROGRAM_TEX_INSTRUCTIONS 24 -#define MAX_FRAGMENT_PROGRAM_TEX_INDIRECTIONS 4 /*@}*/ /** For any program target/extension */ /*@{*/ +#define MAX_PROGRAM_INSTRUCTIONS (16 * 1024) #define MAX_PROGRAM_LOCAL_PARAMS 128 /* KW: power of two */ #define MAX_PROGRAM_ENV_PARAMS 128 #define MAX_PROGRAM_MATRICES 8 diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index a41cb42952..61c0861cbd 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -145,9 +145,7 @@ #include "glapi/glthread.h" #include "glapi/glapioffsets.h" #include "glapi/glapitable.h" -#if FEATURE_NV_vertex_program || FEATURE_NV_fragment_program #include "shader/program.h" -#endif #include "shader/shader_api.h" #if FEATURE_ATI_fragment_shader #include "shader/atifragshader.h" @@ -186,9 +184,11 @@ GLfloat _mesa_ubyte_to_float_color_tab[256]; * We have to finish any pending rendering. */ void -_mesa_notifySwapBuffers(__GLcontext *gc) +_mesa_notifySwapBuffers(__GLcontext *ctx) { - FLUSH_VERTICES( gc, 0 ); + if (ctx->Driver.Flush) { + ctx->Driver.Flush(ctx); + } } @@ -611,6 +611,7 @@ delete_program_cb(GLuint id, void *data, void *userData) ctx->Driver.DeleteProgram(ctx, prog); } +#if FEATURE_ATI_fragment_shader /** * Callback for deleting an ATI fragment shader object. * Called by _mesa_HashDeleteAll(). @@ -622,6 +623,7 @@ delete_fragshader_cb(GLuint id, void *data, void *userData) GLcontext *ctx = (GLcontext *) userData; _mesa_delete_ati_fragment_shader(ctx, shader); } +#endif /** * Callback for deleting a buffer object. Called by _mesa_HashDeleteAll(). @@ -712,7 +714,6 @@ delete_renderbuffer_cb(GLuint id, void *data, void *userData) } - /** * Deallocate a shared state object and all children structures. * @@ -818,11 +819,33 @@ _mesa_init_current(GLcontext *ctx) /** - * Init vertex/fragment program native limits from logical limits. + * Init vertex/fragment program limits. + * Important: drivers should override these with actual limits. */ static void -init_natives(struct gl_program_constants *prog) +init_program_limits(GLenum type, struct gl_program_constants *prog) { + prog->MaxInstructions = MAX_PROGRAM_INSTRUCTIONS; + prog->MaxAluInstructions = MAX_PROGRAM_INSTRUCTIONS; + prog->MaxTexInstructions = MAX_PROGRAM_INSTRUCTIONS; + prog->MaxTexIndirections = MAX_PROGRAM_INSTRUCTIONS; + prog->MaxTemps = MAX_PROGRAM_TEMPS; + prog->MaxEnvParams = MAX_PROGRAM_ENV_PARAMS; + prog->MaxLocalParams = MAX_PROGRAM_LOCAL_PARAMS; + prog->MaxUniformComponents = 4 * MAX_UNIFORMS; + + if (type == GL_VERTEX_PROGRAM_ARB) { + prog->MaxParameters = MAX_NV_VERTEX_PROGRAM_PARAMS; + prog->MaxAttribs = MAX_NV_VERTEX_PROGRAM_INPUTS; + prog->MaxAddressRegs = MAX_VERTEX_PROGRAM_ADDRESS_REGS; + } + else { + prog->MaxParameters = MAX_NV_FRAGMENT_PROGRAM_PARAMS; + prog->MaxAttribs = MAX_NV_FRAGMENT_PROGRAM_INPUTS; + prog->MaxAddressRegs = MAX_FRAGMENT_PROGRAM_ADDRESS_REGS; + } + + /* copy the above limits to init native limits */ prog->MaxNativeInstructions = prog->MaxInstructions; prog->MaxNativeAluInstructions = prog->MaxAluInstructions; prog->MaxNativeTexInstructions = prog->MaxTexInstructions; @@ -884,33 +907,10 @@ _mesa_init_constants(GLcontext *ctx) ctx->Const.MaxViewportWidth = MAX_WIDTH; ctx->Const.MaxViewportHeight = MAX_HEIGHT; #if FEATURE_ARB_vertex_program - ctx->Const.VertexProgram.MaxInstructions = MAX_NV_VERTEX_PROGRAM_INSTRUCTIONS; - ctx->Const.VertexProgram.MaxAluInstructions = 0; - ctx->Const.VertexProgram.MaxTexInstructions = 0; - ctx->Const.VertexProgram.MaxTexIndirections = 0; - ctx->Const.VertexProgram.MaxAttribs = MAX_NV_VERTEX_PROGRAM_INPUTS; - ctx->Const.VertexProgram.MaxTemps = MAX_PROGRAM_TEMPS; - ctx->Const.VertexProgram.MaxParameters = MAX_NV_VERTEX_PROGRAM_PARAMS; - ctx->Const.VertexProgram.MaxLocalParams = MAX_PROGRAM_LOCAL_PARAMS; - ctx->Const.VertexProgram.MaxEnvParams = MAX_PROGRAM_ENV_PARAMS; - ctx->Const.VertexProgram.MaxAddressRegs = MAX_VERTEX_PROGRAM_ADDRESS_REGS; - ctx->Const.VertexProgram.MaxUniformComponents = 4 * MAX_UNIFORMS; - init_natives(&ctx->Const.VertexProgram); + init_program_limits(GL_VERTEX_PROGRAM_ARB, &ctx->Const.VertexProgram); #endif - #if FEATURE_ARB_fragment_program - ctx->Const.FragmentProgram.MaxInstructions = MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS; - ctx->Const.FragmentProgram.MaxAluInstructions = MAX_FRAGMENT_PROGRAM_ALU_INSTRUCTIONS; - ctx->Const.FragmentProgram.MaxTexInstructions = MAX_FRAGMENT_PROGRAM_TEX_INSTRUCTIONS; - ctx->Const.FragmentProgram.MaxTexIndirections = MAX_FRAGMENT_PROGRAM_TEX_INDIRECTIONS; - ctx->Const.FragmentProgram.MaxAttribs = MAX_NV_FRAGMENT_PROGRAM_INPUTS; - ctx->Const.FragmentProgram.MaxTemps = MAX_PROGRAM_TEMPS; - ctx->Const.FragmentProgram.MaxParameters = MAX_NV_FRAGMENT_PROGRAM_PARAMS; - ctx->Const.FragmentProgram.MaxLocalParams = MAX_PROGRAM_LOCAL_PARAMS; - ctx->Const.FragmentProgram.MaxEnvParams = MAX_PROGRAM_ENV_PARAMS; - ctx->Const.FragmentProgram.MaxAddressRegs = MAX_FRAGMENT_PROGRAM_ADDRESS_REGS; - ctx->Const.FragmentProgram.MaxUniformComponents = 4 * MAX_UNIFORMS; - init_natives(&ctx->Const.FragmentProgram); + init_program_limits(GL_FRAGMENT_PROGRAM_ARB, &ctx->Const.FragmentProgram); #endif ctx->Const.MaxProgramMatrices = MAX_PROGRAM_MATRICES; ctx->Const.MaxProgramMatrixStackDepth = MAX_PROGRAM_MATRIX_STACK_DEPTH; @@ -1063,6 +1063,7 @@ init_attrib_groups(GLcontext *ctx) /* Miscellaneous */ ctx->NewState = _NEW_ALL; ctx->ErrorValue = (GLenum) GL_NO_ERROR; + ctx->varying_vp_inputs = ~0; return GL_TRUE; } @@ -1162,7 +1163,7 @@ _mesa_initialize_context(GLcontext *ctx, const struct dd_function_table *driverFunctions, void *driverContext) { - ASSERT(driverContext); + /*ASSERT(driverContext);*/ assert(driverFunctions->NewTextureObject); assert(driverFunctions->FreeTexImageData); @@ -1267,7 +1268,7 @@ _mesa_create_context(const GLvisual *visual, GLcontext *ctx; ASSERT(visual); - ASSERT(driverContext); + /*ASSERT(driverContext);*/ ctx = (GLcontext *) _mesa_calloc(sizeof(GLcontext)); if (!ctx) @@ -1315,7 +1316,9 @@ _mesa_free_context_data( GLcontext *ctx ) _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._Current, NULL); _mesa_reference_fragprog(ctx, &ctx->FragmentProgram._TexEnvProgram, NULL); +#if FEATURE_attrib_stack _mesa_free_attrib_data(ctx); +#endif _mesa_free_lighting_data( ctx ); #if FEATURE_evaluators _mesa_free_eval_data( ctx ); @@ -1632,7 +1635,22 @@ _mesa_make_current( GLcontext *newCtx, GLframebuffer *drawBuffer, * or not bound to a user-created FBO. */ if (!newCtx->DrawBuffer || newCtx->DrawBuffer->Name == 0) { + /* KW: merge conflict here, revisit. + */ + /* fix up the fb fields - these will end up wrong otherwise + * if the DRIdrawable changes, and everything relies on them. + * This is a bit messy (same as needed in _mesa_BindFramebufferEXT) + */ + unsigned int i; + GLenum buffers[MAX_DRAW_BUFFERS]; + _mesa_reference_framebuffer(&newCtx->DrawBuffer, drawBuffer); + + for(i = 0; i < newCtx->Const.MaxDrawBuffers; i++) { + buffers[i] = newCtx->Color.DrawBuffer[i]; + } + + _mesa_drawbuffers(newCtx, newCtx->Const.MaxDrawBuffers, buffers, NULL); } if (!newCtx->ReadBuffer || newCtx->ReadBuffer->Name == 0) { _mesa_reference_framebuffer(&newCtx->ReadBuffer, readBuffer); diff --git a/src/mesa/main/drawpix.c b/src/mesa/main/drawpix.c index ac97bc1ff9..13cfa0e756 100644 --- a/src/mesa/main/drawpix.c +++ b/src/mesa/main/drawpix.c @@ -153,7 +153,7 @@ _mesa_CopyPixels( GLint srcx, GLint srcy, GLsizei width, GLsizei height, return; } - if (!ctx->Current.RasterPosValid) { + if (!ctx->Current.RasterPosValid || width ==0 || height == 0) { return; } @@ -218,7 +218,7 @@ _mesa_Bitmap( GLsizei width, GLsizei height, if (ctx->RenderMode == GL_RENDER) { /* Truncate, to satisfy conformance tests (matches SGI's OpenGL). */ - const GLfloat epsilon = (const GLfloat)0.0001; + const GLfloat epsilon = 0.0001F; GLint x = IFLOOR(ctx->Current.RasterPos[0] + epsilon - xorig); GLint y = IFLOOR(ctx->Current.RasterPos[1] + epsilon - yorig); diff --git a/src/mesa/main/enable.c b/src/mesa/main/enable.c index e0483a72e8..248df1badc 100644 --- a/src/mesa/main/enable.c +++ b/src/mesa/main/enable.c @@ -929,6 +929,7 @@ _mesa_set_enable(GLcontext *ctx, GLenum cap, GLboolean state) break; /* GL_MESA_program_debug */ +#if FEATURE_MESA_program_debug case GL_FRAGMENT_PROGRAM_CALLBACK_MESA: CHECK_EXTENSION(MESA_program_debug, cap); ctx->FragmentProgram.CallbackEnabled = state; @@ -937,6 +938,7 @@ _mesa_set_enable(GLcontext *ctx, GLenum cap, GLboolean state) CHECK_EXTENSION(MESA_program_debug, cap); ctx->VertexProgram.CallbackEnabled = state; break; +#endif #if FEATURE_ATI_fragment_shader case GL_FRAGMENT_SHADER_ATI: @@ -1378,12 +1380,15 @@ _mesa_IsEnabled( GLenum cap ) return ctx->Depth.BoundsTest; /* GL_MESA_program_debug */ +#if FEATURE_MESA_program_debug case GL_FRAGMENT_PROGRAM_CALLBACK_MESA: CHECK_EXTENSION(MESA_program_debug); return ctx->FragmentProgram.CallbackEnabled; case GL_VERTEX_PROGRAM_CALLBACK_MESA: CHECK_EXTENSION(MESA_program_debug); return ctx->VertexProgram.CallbackEnabled; +#endif + #if FEATURE_ATI_fragment_shader case GL_FRAGMENT_SHADER_ATI: CHECK_EXTENSION(ATI_fragment_shader); diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c index de75325f15..95bf1165f4 100644 --- a/src/mesa/main/extensions.c +++ b/src/mesa/main/extensions.c @@ -86,7 +86,7 @@ static const struct { { OFF, "GL_EXT_blend_logic_op", F(EXT_blend_logic_op) }, { OFF, "GL_EXT_blend_minmax", F(EXT_blend_minmax) }, { OFF, "GL_EXT_blend_subtract", F(EXT_blend_subtract) }, - { ON, "GL_EXT_clip_volume_hint", F(EXT_clip_volume_hint) }, + { OFF, "GL_EXT_clip_volume_hint", F(EXT_clip_volume_hint) }, { OFF, "GL_EXT_cull_vertex", F(EXT_cull_vertex) }, { ON, "GL_EXT_compiled_vertex_array", F(EXT_compiled_vertex_array) }, { OFF, "GL_EXT_convolution", F(EXT_convolution) }, diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 4c92d1fb5a..dd06327972 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -173,7 +173,7 @@ _mesa_remove_attachment(GLcontext *ctx, struct gl_renderbuffer_attachment *att) if (att->Type == GL_TEXTURE) { ASSERT(att->Texture); if (ctx->Driver.FinishRenderTexture) { - /* tell driver we're done rendering to this texobj */ + /* tell driver that we're done rendering to this texture. */ ctx->Driver.FinishRenderTexture(ctx, att); } _mesa_reference_texobj(&att->Texture, NULL); /* unbind */ diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index 308b4ef711..da2640dd8f 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -47,17 +47,17 @@ struct state_key { + unsigned light_color_material_mask:12; + unsigned light_material_mask:12; unsigned light_global_enabled:1; unsigned light_local_viewer:1; unsigned light_twoside:1; unsigned light_color_material:1; - unsigned light_color_material_mask:12; - unsigned light_material_mask:12; unsigned material_shininess_is_zero:1; - unsigned need_eye_coords:1; unsigned normalize:1; unsigned rescale_normals:1; + unsigned fog_source_is_depth:1; unsigned tnl_do_vertex_fog:1; unsigned separate_specular:1; @@ -67,6 +67,8 @@ struct state_key { unsigned texture_enabled_global:1; unsigned fragprog_inputs_read:12; + unsigned varying_vp_inputs; + struct { unsigned light_enabled:1; unsigned light_eyepos3_is_zero:1; @@ -193,6 +195,7 @@ static void make_state_key( GLcontext *ctx, struct state_key *key ) key->need_eye_coords = ctx->_NeedEyeCoords; key->fragprog_inputs_read = fp->Base.InputsRead; + key->varying_vp_inputs = ctx->varying_vp_inputs; if (ctx->RenderMode == GL_FEEDBACK) { /* make sure the vertprog emits color and tex0 */ @@ -448,14 +451,46 @@ static void release_temps( struct tnl_program *p ) } +static struct ureg register_param5(struct tnl_program *p, + GLint s0, + GLint s1, + GLint s2, + GLint s3, + GLint s4) +{ + gl_state_index tokens[STATE_LENGTH]; + GLint idx; + tokens[0] = s0; + tokens[1] = s1; + tokens[2] = s2; + tokens[3] = s3; + tokens[4] = s4; + idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens ); + return make_ureg(PROGRAM_STATE_VAR, idx); +} + + +#define register_param1(p,s0) register_param5(p,s0,0,0,0,0) +#define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0) +#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) +#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) + + /** * \param input one of VERT_ATTRIB_x tokens. */ static struct ureg register_input( struct tnl_program *p, GLuint input ) { - p->program->Base.InputsRead |= (1<<input); - return make_ureg(PROGRAM_INPUT, input); + /* Material attribs are passed here as inputs >= 32 + */ + if (input >= 32 || (p->state->varying_vp_inputs & (1<<input))) { + p->program->Base.InputsRead |= (1<<input); + return make_ureg(PROGRAM_INPUT, input); + } + else { + return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input ); + } } /** @@ -504,31 +539,6 @@ static struct ureg get_identity_param( struct tnl_program *p ) return p->identity; } -static struct ureg register_param5(struct tnl_program *p, - GLint s0, - GLint s1, - GLint s2, - GLint s3, - GLint s4) -{ - gl_state_index tokens[STATE_LENGTH]; - GLint idx; - tokens[0] = s0; - tokens[1] = s1; - tokens[2] = s2; - tokens[3] = s3; - tokens[4] = s4; - idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens ); - return make_ureg(PROGRAM_STATE_VAR, idx); -} - - -#define register_param1(p,s0) register_param5(p,s0,0,0,0,0) -#define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0) -#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) -#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) - - static void register_matrix_param5( struct tnl_program *p, GLint s0, /* modelview, projection, etc */ GLint s1, /* texture matrix number */ diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h index 1d0f178dc4..d69c7bbb21 100644 --- a/src/mesa/main/glheader.h +++ b/src/mesa/main/glheader.h @@ -64,22 +64,33 @@ #include <stdarg.h> -/* Get typedefs for uintptr_t and friends */ -#if defined(__MINGW32__) || defined(__NetBSD__) -# include <stdint.h> -#elif defined(_WIN32) -# include <BaseTsd.h> -# if _MSC_VER == 1200 - typedef UINT_PTR uintptr_t; -# endif -#elif defined(__INTERIX) -/* Interix 3.x has a gcc that shadows this. */ -# ifndef _UINTPTR_T_DEFINED - typedef unsigned long uintptr_t; -# define _UINTPTR_T_DEFINED +/* Get standard integer types */ +#if defined(_MSC_VER) + + typedef __int8 int8_t; + typedef unsigned __int8 uint8_t; + typedef __int16 int16_t; + typedef unsigned __int16 uint16_t; +# ifndef __eglplatform_h_ + typedef __int32 int32_t; +# endif + typedef unsigned __int32 uint32_t; + typedef __int64 int64_t; + typedef unsigned __int64 uint64_t; + +# if defined(_WIN64) + typedef __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +# else + typedef __int32 intptr_t; + typedef unsigned __int32 uintptr_t; # endif + +# define INT64_C(__val) __val##i64 +# define UINT64_C(__val) __val##ui64 + #else -# include <inttypes.h> +# include <stdint.h> #endif diff --git a/src/mesa/main/image.c b/src/mesa/main/image.c index 1a6e864b98..bd38e0c3d8 100644 --- a/src/mesa/main/image.c +++ b/src/mesa/main/image.c @@ -5119,7 +5119,7 @@ _mesa_clip_copytexsubimage(const GLcontext *ctx, const struct gl_framebuffer *fb = ctx->ReadBuffer; const GLint srcX0 = *srcX, srcY0 = *srcY; - if (_mesa_clip_to_region(fb->_Xmin, fb->_Ymin, fb->_Xmax, fb->_Ymax, + if (_mesa_clip_to_region(0, 0, fb->Width, fb->Height, srcX, srcY, width, height)) { *destX = *destX + *srcX - srcX0; *destY = *destY + *srcY - srcY0; diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index 13cb84ca4b..6cfd7ccc72 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -557,7 +557,7 @@ _mesa_pow(double x, double y) * Find the first bit set in a word. */ int -_mesa_ffs(int i) +_mesa_ffs(int32_t i) { #if (defined(_WIN32) ) || defined(__IBMC__) || defined(__IBMCPP__) register int bit = 0; @@ -594,11 +594,7 @@ _mesa_ffs(int i) * if no bits set. */ int -#ifdef __MINGW32__ -_mesa_ffsll(long val) -#else -_mesa_ffsll(long long val) -#endif +_mesa_ffsll(int64_t val) { #ifdef ffsll return ffsll(val); @@ -607,11 +603,11 @@ _mesa_ffsll(long long val) assert(sizeof(val) == 8); - bit = _mesa_ffs(val); + bit = _mesa_ffs((int32_t)val); if (bit != 0) return bit; - bit = _mesa_ffs(val >> 32); + bit = _mesa_ffs((int32_t)(val >> 32)); if (bit != 0) return 32 + bit; @@ -938,12 +934,10 @@ _mesa_sprintf( char *str, const char *fmt, ... ) void _mesa_printf( const char *fmtString, ... ) { - char s[MAXSTRING]; va_list args; va_start( args, fmtString ); - vsnprintf(s, MAXSTRING, fmtString, args); + vfprintf(stderr, fmtString, args); va_end( args ); - fprintf(stderr,"%s", s); } /** Wrapper around vsprintf() */ diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index 89d0662f79..0e6e301fde 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -698,14 +698,10 @@ extern double _mesa_pow(double x, double y); extern int -_mesa_ffs(int i); +_mesa_ffs(int32_t i); extern int -#ifdef __MINGW32__ -_mesa_ffsll(long i); -#else -_mesa_ffsll(long long i); -#endif +_mesa_ffsll(int64_t i); extern unsigned int _mesa_bitcount(unsigned int n); diff --git a/src/mesa/main/light.c b/src/mesa/main/light.c index 10ee088a2d..d4db960f1b 100644 --- a/src/mesa/main/light.c +++ b/src/mesa/main/light.c @@ -1367,6 +1367,7 @@ _mesa_init_lighting( GLcontext *ctx ) /* Miscellaneous */ ctx->Light._NeedEyeCoords = GL_FALSE; ctx->_NeedEyeCoords = GL_FALSE; + ctx->_ForceEyeCoords = GL_FALSE; ctx->_ModelViewInvScale = 1.0; } diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 62bc65cc72..7ad8cb244d 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -126,6 +126,8 @@ struct gl_program_cache; struct gl_texture_format; struct gl_texture_image; struct gl_texture_object; +struct st_context; +struct pipe_surface; typedef struct __GLcontextRec GLcontext; typedef struct __GLcontextModesRec GLvisual; typedef struct gl_framebuffer GLframebuffer; @@ -1567,7 +1569,6 @@ struct gl_texture_attrib struct gl_texture_unit Unit[MAX_TEXTURE_UNITS]; - /** Proxy texture objects */ struct gl_texture_object *ProxyTex[NUM_TEXTURE_TARGETS]; /** GL_EXT_shared_texture_palette */ @@ -1986,6 +1987,8 @@ struct gl_vertex_program_state GLboolean CallbackEnabled; GLuint CurrentPosition; #endif + + GLboolean _Overriden; }; @@ -2259,6 +2262,7 @@ struct gl_renderbuffer GLubyte IndexBits; GLubyte DepthBits; GLubyte StencilBits; + GLubyte Samples; /**< Number of samples - 0 if not multisampled */ GLvoid *Data; /**< This may not be used by some kinds of RBs */ /* Used to wrap one renderbuffer around another: */ @@ -2719,6 +2723,7 @@ struct gl_matrix_stack #define _NEW_MULTISAMPLE 0x2000000 /**< __GLcontextRec::Multisample */ #define _NEW_TRACK_MATRIX 0x4000000 /**< __GLcontextRec::VertexProgram */ #define _NEW_PROGRAM 0x8000000 /**< __GLcontextRec::VertexProgram */ +#define _NEW_CURRENT_ATTRIB 0x10000000 /**< __GLcontextRec::Current */ #define _NEW_ALL ~0 /*@}*/ @@ -3046,6 +3051,8 @@ struct __GLcontextRec GLenum RenderMode; /**< either GL_RENDER, GL_SELECT, GL_FEEDBACK */ GLbitfield NewState; /**< bitwise-or of _NEW_* flags */ + GLbitfield varying_vp_inputs; /**< mask of VERT_BIT_* flags */ + /** \name Derived state */ /*@{*/ /** Bitwise-or of DD_* flags. Note that this bitfield may be used before @@ -3091,7 +3098,7 @@ struct __GLcontextRec void *swsetup_context; void *swtnl_context; void *swtnl_im; - void *acache_context; + struct st_context *st; void *aelt_context; /*@}*/ }; diff --git a/src/mesa/main/renderbuffer.c b/src/mesa/main/renderbuffer.c index 6f1d7c3960..3c37d05b40 100644 --- a/src/mesa/main/renderbuffer.c +++ b/src/mesa/main/renderbuffer.c @@ -1199,7 +1199,8 @@ _mesa_soft_renderbuffer_storage(GLcontext *ctx, struct gl_renderbuffer *rb, if (width > 0 && height > 0) { /* allocate new buffer storage */ - rb->Data = _mesa_malloc(width * height * pixelSize); + rb->Data = malloc(width * height * pixelSize); + if (rb->Data == NULL) { rb->Width = 0; rb->Height = 0; diff --git a/src/mesa/main/sources b/src/mesa/main/sources index eb8cd900ee..468121bd1d 100644 --- a/src/mesa/main/sources +++ b/src/mesa/main/sources @@ -78,7 +78,6 @@ vsnprintf.c MESA_MAIN_HEADERS = \ accum.h \ api_arrayelt.h \ -api_eval.h \ api_exec.h \ api_loopback.h \ api_noop.h \ diff --git a/src/mesa/main/state.c b/src/mesa/main/state.c index 48656bd35e..5e073a1863 100644 --- a/src/mesa/main/state.c +++ b/src/mesa/main/state.c @@ -1,6 +1,6 @@ /* * Mesa 3-D graphics library - * Version: 7.1 + * Version: 7.3 * * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. * @@ -441,6 +441,9 @@ _mesa_update_state_locked( GLcontext *ctx ) GLbitfield new_state = ctx->NewState; GLbitfield prog_flags = _NEW_PROGRAM; + if (new_state == _NEW_CURRENT_ATTRIB) + goto out; + if (MESA_VERBOSE & VERBOSE_STATE) _mesa_print_state("_mesa_update_state", new_state); @@ -450,7 +453,7 @@ _mesa_update_state_locked( GLcontext *ctx ) if (new_state & (_NEW_PROGRAM|_NEW_TEXTURE|_NEW_TEXTURE_MATRIX)) _mesa_update_texture( ctx, new_state ); - if (new_state & (_NEW_BUFFERS | _NEW_COLOR | _NEW_PIXEL)) + if (new_state & _NEW_BUFFERS) _mesa_update_framebuffer(ctx); if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT)) @@ -504,10 +507,13 @@ _mesa_update_state_locked( GLcontext *ctx ) _mesa_update_tnl_spaces( ctx, new_state ); if (ctx->FragmentProgram._MaintainTexEnvProgram) { - prog_flags |= (_NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR); + prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE_MATRIX | _NEW_LIGHT | + _NEW_RENDERMODE | + _NEW_TEXTURE | _NEW_FOG | _DD_NEW_SEPARATE_SPECULAR); } if (ctx->VertexProgram._MaintainTnlProgram) { prog_flags |= (_NEW_ARRAY | _NEW_TEXTURE | _NEW_TEXTURE_MATRIX | + _NEW_RENDERMODE | _NEW_TRANSFORM | _NEW_POINT | _NEW_FOG | _NEW_LIGHT | _MESA_NEW_NEED_EYE_COORDS); @@ -526,6 +532,7 @@ _mesa_update_state_locked( GLcontext *ctx ) * Set ctx->NewState to zero to avoid recursion if * Driver.UpdateState() has to call FLUSH_VERTICES(). (fixed?) */ + out: new_state = ctx->NewState; ctx->NewState = 0; ctx->Driver.UpdateState(ctx, new_state); @@ -542,3 +549,59 @@ _mesa_update_state( GLcontext *ctx ) _mesa_update_state_locked(ctx); _mesa_unlock_context_textures(ctx); } + + + + +/** + * Want to figure out which fragment program inputs are actually + * constant/current values from ctx->Current. These should be + * referenced as a tracked state variable rather than a fragment + * program input, to save the overhead of putting a constant value in + * every submitted vertex, transferring it to hardware, interpolating + * it across the triangle, etc... + * + * When there is a VP bound, just use vp->outputs. But when we're + * generating vp from fixed function state, basically want to + * calculate: + * + * vp_out_2_fp_in( vp_in_2_vp_out( varying_inputs ) | + * potential_vp_outputs ) + * + * Where potential_vp_outputs is calculated by looking at enabled + * texgen, etc. + * + * The generated fragment program should then only declare inputs that + * may vary or otherwise differ from the ctx->Current values. + * Otherwise, the fp should track them as state values instead. + */ +void +_mesa_set_varying_vp_inputs( GLcontext *ctx, + GLbitfield varying_inputs ) +{ + if (ctx->varying_vp_inputs != varying_inputs) { + ctx->varying_vp_inputs = varying_inputs; + ctx->NewState |= _NEW_ARRAY; + /*_mesa_printf("%s %x\n", __FUNCTION__, varying_inputs);*/ + } +} + + +/** + * Used by drivers to tell core Mesa that the driver is going to + * install/ use its own vertex program. In particular, this will + * prevent generated fragment programs from using state vars instead + * of ordinary varyings/inputs. + */ +void +_mesa_set_vp_override(GLcontext *ctx, GLboolean flag) +{ + if (ctx->VertexProgram._Overriden != flag) { + ctx->VertexProgram._Overriden = flag; + + /* Set one of the bits which will trigger fragment program + * regeneration: + */ + ctx->NewState |= _NEW_ARRAY; + } +} diff --git a/src/mesa/main/state.h b/src/mesa/main/state.h index bb7cb8f32a..29db08a0b9 100644 --- a/src/mesa/main/state.h +++ b/src/mesa/main/state.h @@ -1,6 +1,6 @@ /* * Mesa 3-D graphics library - * Version: 7.1 + * Version: 7.3 * * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. * @@ -29,13 +29,21 @@ #include "mtypes.h" extern void -_mesa_update_state( GLcontext *ctx ); +_mesa_update_state(GLcontext *ctx); /* As above but can only be called between _mesa_lock_context_textures() and * _mesa_unlock_context_textures(). */ extern void -_mesa_update_state_locked( GLcontext *ctx ); +_mesa_update_state_locked(GLcontext *ctx); + + +extern void +_mesa_set_varying_vp_inputs(GLcontext *ctx, GLbitfield varying_inputs); + + +extern void +_mesa_set_vp_override(GLcontext *ctx, GLboolean flag); #endif diff --git a/src/mesa/main/texcompress.c b/src/mesa/main/texcompress.c index c44d594d68..5ad936419b 100644 --- a/src/mesa/main/texcompress.c +++ b/src/mesa/main/texcompress.c @@ -137,8 +137,10 @@ _mesa_compressed_texture_size( GLcontext *ctx, ASSERT(depth == 1); (void) depth; + (void) size; switch (mesaFormat) { +#if FEATURE_texture_fxt1 case MESA_FORMAT_RGB_FXT1: case MESA_FORMAT_RGBA_FXT1: /* round up width to next multiple of 8, height to next multiple of 4 */ @@ -152,6 +154,8 @@ _mesa_compressed_texture_size( GLcontext *ctx, if (size < 16) size = 16; return size; +#endif +#if FEATURE_texture_s3tc case MESA_FORMAT_RGB_DXT1: case MESA_FORMAT_RGBA_DXT1: /* round up width, height to next multiple of 4 */ @@ -178,6 +182,7 @@ _mesa_compressed_texture_size( GLcontext *ctx, if (size < 16) size = 16; return size; +#endif default: _mesa_problem(ctx, "bad mesaFormat in _mesa_compressed_texture_size"); return 0; @@ -202,12 +207,15 @@ _mesa_compressed_texture_size_glenum(GLcontext *ctx, GLuint mesaFormat; switch (glformat) { +#if FEATURE_texture_fxt1 case GL_COMPRESSED_RGB_FXT1_3DFX: mesaFormat = MESA_FORMAT_RGB_FXT1; break; case GL_COMPRESSED_RGBA_FXT1_3DFX: mesaFormat = MESA_FORMAT_RGBA_FXT1; break; +#endif +#if FEATURE_texture_s3tc case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: case GL_RGB_S3TC: mesaFormat = MESA_FORMAT_RGB_DXT1; @@ -224,6 +232,7 @@ _mesa_compressed_texture_size_glenum(GLcontext *ctx, case GL_RGBA4_S3TC: mesaFormat = MESA_FORMAT_RGBA_DXT5; break; +#endif default: return 0; } @@ -245,10 +254,13 @@ _mesa_compressed_row_stride(GLuint mesaFormat, GLsizei width) GLint stride; switch (mesaFormat) { +#if FEATURE_texture_fxt1 case MESA_FORMAT_RGB_FXT1: case MESA_FORMAT_RGBA_FXT1: stride = ((width + 7) / 8) * 16; /* 16 bytes per 8x4 tile */ break; +#endif +#if FEATURE_texture_s3tc case MESA_FORMAT_RGB_DXT1: case MESA_FORMAT_RGBA_DXT1: stride = ((width + 3) / 4) * 8; /* 8 bytes per 4x4 tile */ @@ -257,6 +269,7 @@ _mesa_compressed_row_stride(GLuint mesaFormat, GLsizei width) case MESA_FORMAT_RGBA_DXT5: stride = ((width + 3) / 4) * 16; /* 16 bytes per 4x4 tile */ break; +#endif default: _mesa_problem(NULL, "bad mesaFormat in _mesa_compressed_row_stride"); return 0; @@ -293,10 +306,13 @@ _mesa_compressed_image_address(GLint col, GLint row, GLint img, */ switch (mesaFormat) { +#if FEATURE_texture_fxt1 case MESA_FORMAT_RGB_FXT1: case MESA_FORMAT_RGBA_FXT1: addr = (GLubyte *) image + 16 * (((width + 7) / 8) * (row / 4) + col / 8); break; +#endif +#if FEATURE_texture_s3tc case MESA_FORMAT_RGB_DXT1: case MESA_FORMAT_RGBA_DXT1: addr = (GLubyte *) image + 8 * (((width + 3) / 4) * (row / 4) + col / 4); @@ -305,6 +321,7 @@ _mesa_compressed_image_address(GLint col, GLint row, GLint img, case MESA_FORMAT_RGBA_DXT5: addr = (GLubyte *) image + 16 * (((width + 3) / 4) * (row / 4) + col / 4); break; +#endif default: _mesa_problem(NULL, "bad mesaFormat in _mesa_compressed_image_address"); addr = NULL; diff --git a/src/mesa/main/texcompress_fxt1.c b/src/mesa/main/texcompress_fxt1.c index 45f344b0c5..fc151605c9 100644 --- a/src/mesa/main/texcompress_fxt1.c +++ b/src/mesa/main/texcompress_fxt1.c @@ -298,17 +298,17 @@ const struct gl_texture_format _mesa_texformat_rgba_fxt1 = { /* * Define a 64-bit unsigned integer type and macros */ -#ifdef GL_EXT_timer_query /* this extensions defines the GLuint64EXT type */ +#if 1 #define FX64_NATIVE 1 -typedef GLuint64EXT Fx64; +typedef uint64_t Fx64; #define FX64_MOV32(a, b) a = b #define FX64_OR32(a, b) a |= b #define FX64_SHL(a, c) a <<= c -#else /* !GL_EXT_timer_query */ +#else #define FX64_NATIVE 0 @@ -330,7 +330,7 @@ typedef struct { } \ } while (0) -#endif /* !GL_EXT_timer_query */ +#endif #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */ diff --git a/src/mesa/main/texenvprogram.c b/src/mesa/main/texenvprogram.c index dcd7f90567..2c7df7b8d9 100644 --- a/src/mesa/main/texenvprogram.c +++ b/src/mesa/main/texenvprogram.c @@ -48,24 +48,24 @@ struct texenvprog_cache_item /** - * This MAX is probably a bit generous, but that's OK. There can be - * up to four instructions per texture unit (TEX + 3 for combine), - * then there's fog and specular add. + * Up to nine instructions per tex unit, plus fog, specular color. */ -#define MAX_INSTRUCTIONS ((MAX_TEXTURE_UNITS * 4) + 12) +#define MAX_INSTRUCTIONS ((MAX_TEXTURE_UNITS * 9) + 12) #define DISASSEM (MESA_VERBOSE & VERBOSE_DISASSEM) struct mode_opt { - GLuint Source:4; - GLuint Operand:3; + GLubyte Source:4; + GLubyte Operand:3; }; struct state_key { - GLbitfield enabled_units; + GLuint nr_enabled_units:8; + GLuint enabled_units:8; GLuint separate_specular:1; GLuint fog_enabled:1; GLuint fog_mode:2; + GLuint inputs_available:12; struct { GLuint enabled:1; @@ -76,10 +76,10 @@ struct state_key { GLuint NumArgsRGB:2; GLuint ModeRGB:4; - struct mode_opt OptRGB[3]; - GLuint NumArgsA:2; GLuint ModeA:4; + + struct mode_opt OptRGB[3]; struct mode_opt OptA[3]; } unit[8]; }; @@ -201,6 +201,76 @@ static GLuint translate_tex_src_bit( GLbitfield bit ) } } +#define VERT_BIT_TEX_ANY (0xff << VERT_ATTRIB_TEX0) +#define VERT_RESULT_TEX_ANY (0xff << VERT_RESULT_TEX0) + +/** + * Identify all possible varying inputs. The fragment program will + * never reference non-varying inputs, but will track them via state + * constants instead. + * + * This function figures out all the inputs that the fragment program + * has access to. The bitmask is later reduced to just those which + * are actually referenced. + */ +static GLbitfield get_fp_input_mask( GLcontext *ctx ) +{ + GLbitfield fp_inputs = 0x0; + + if (ctx->VertexProgram._Overriden) { + /* Somebody's messing with the vertex program and we don't have + * a clue what's happening. Assume that it could be producing + * all possible outputs. + */ + fp_inputs = ~0; + } + else if (ctx->RenderMode == GL_FEEDBACK) { + fp_inputs = (FRAG_BIT_COL0 | FRAG_BIT_TEX0); + } + else if (!ctx->VertexProgram._Enabled || + !ctx->VertexProgram._Current) { + + /* Fixed function logic */ + GLbitfield varying_inputs = ctx->varying_vp_inputs; + + /* First look at what values may be computed by the generated + * vertex program: + */ + if (ctx->Light.Enabled) { + fp_inputs |= FRAG_BIT_COL0; + + if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) + fp_inputs |= FRAG_BIT_COL1; + } + + fp_inputs |= (ctx->Texture._TexGenEnabled | + ctx->Texture._TexMatEnabled) << FRAG_ATTRIB_TEX0; + + /* Then look at what might be varying as a result of enabled + * arrays, etc: + */ + if (varying_inputs & VERT_BIT_COLOR0) fp_inputs |= FRAG_BIT_COL0; + if (varying_inputs & VERT_BIT_COLOR1) fp_inputs |= FRAG_BIT_COL1; + + fp_inputs |= (((varying_inputs & VERT_BIT_TEX_ANY) >> VERT_ATTRIB_TEX0) + << FRAG_ATTRIB_TEX0); + + } + else { + /* calculate from vp->outputs */ + GLbitfield vp_outputs = ctx->VertexProgram._Current->Base.OutputsWritten; + + if (vp_outputs & (1 << VERT_RESULT_COL0)) fp_inputs |= FRAG_BIT_COL0; + if (vp_outputs & (1 << VERT_RESULT_COL1)) fp_inputs |= FRAG_BIT_COL1; + + fp_inputs |= (((vp_outputs & VERT_RESULT_TEX_ANY) >> VERT_RESULT_TEX0) + << FRAG_ATTRIB_TEX0); + } + + return fp_inputs; +} + + /** * Examine current texture environment state and generate a unique * key to identify it. @@ -208,7 +278,9 @@ static GLuint translate_tex_src_bit( GLbitfield bit ) static void make_state_key( GLcontext *ctx, struct state_key *key ) { GLuint i, j; - + GLbitfield inputs_referenced = FRAG_BIT_COL0; + GLbitfield inputs_available = get_fp_input_mask( ctx ); + memset(key, 0, sizeof(*key)); for (i=0;i<MAX_TEXTURE_UNITS;i++) { @@ -219,6 +291,8 @@ static void make_state_key( GLcontext *ctx, struct state_key *key ) key->unit[i].enabled = 1; key->enabled_units |= (1<<i); + key->nr_enabled_units = i+1; + inputs_referenced |= FRAG_BIT_TEX(i); key->unit[i].source_index = translate_tex_src_bit(texUnit->_ReallyEnabled); @@ -247,16 +321,22 @@ static void make_state_key( GLcontext *ctx, struct state_key *key ) } } - if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) + if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) { key->separate_specular = 1; + inputs_referenced |= FRAG_BIT_COL1; + } if (ctx->Fog.Enabled) { key->fog_enabled = 1; key->fog_mode = translate_fog_mode(ctx->Fog.Mode); + inputs_referenced |= FRAG_BIT_FOGC; /* maybe */ } + + key->inputs_available = (inputs_available & inputs_referenced); } -/* Use uregs to represent registers internally, translate to Mesa's +/** + * Use uregs to represent registers internally, translate to Mesa's * expected formats on emit. * * NOTE: These are passed by value extensively in this file rather @@ -289,16 +369,16 @@ static const struct ureg undef = { }; -/* State used to build the fragment program: +/** State used to build the fragment program: */ struct texenv_fragment_program { struct gl_fragment_program *program; GLcontext *ctx; struct state_key *state; - GLbitfield alu_temps; /* Track texture indirections, see spec. */ - GLbitfield temps_output; /* Track texture indirections, see spec. */ - GLbitfield temp_in_use; /* Tracks temporary regs which are in use. */ + GLbitfield alu_temps; /**< Track texture indirections, see spec. */ + GLbitfield temps_output; /**< Track texture indirections, see spec. */ + GLbitfield temp_in_use; /**< Tracks temporary regs which are in use. */ GLboolean error; struct ureg src_texture[MAX_TEXTURE_UNITS]; @@ -306,11 +386,11 @@ struct texenv_fragment_program { * else undef. */ - struct ureg src_previous; /* Reg containing color from previous + struct ureg src_previous; /**< Reg containing color from previous * stage. May need to be decl'd. */ - GLuint last_tex_stage; /* Number of last enabled texture unit */ + GLuint last_tex_stage; /**< Number of last enabled texture unit */ struct ureg half; struct ureg one; @@ -388,7 +468,7 @@ static struct ureg get_tex_temp( struct texenv_fragment_program *p ) { int bit; - /* First try to find availble temp not previously used (to avoid + /* First try to find available temp not previously used (to avoid * starting a new texture indirection). According to the spec, the * ~p->temps_output isn't necessary, but will keep it there for * now: @@ -413,6 +493,14 @@ static struct ureg get_tex_temp( struct texenv_fragment_program *p ) } +/** Mark a temp reg as being no longer allocatable. */ +static void reserve_temp( struct texenv_fragment_program *p, struct ureg r ) +{ + if (r.file == PROGRAM_TEMPORARY) + p->temps_output |= (1 << r.idx); +} + + static void release_temps(GLcontext *ctx, struct texenv_fragment_program *p ) { GLuint max_temp = ctx->Const.FragmentProgram.MaxTemps; @@ -451,11 +539,29 @@ static struct ureg register_param5( struct texenv_fragment_program *p, #define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) +static GLuint frag_to_vert_attrib( GLuint attrib ) +{ + switch (attrib) { + case FRAG_ATTRIB_COL0: return VERT_ATTRIB_COLOR0; + case FRAG_ATTRIB_COL1: return VERT_ATTRIB_COLOR1; + default: + assert(attrib >= FRAG_ATTRIB_TEX0); + assert(attrib <= FRAG_ATTRIB_TEX7); + return attrib - FRAG_ATTRIB_TEX0 + VERT_ATTRIB_TEX0; + } +} + static struct ureg register_input( struct texenv_fragment_program *p, GLuint input ) { - p->program->Base.InputsRead |= (1 << input); - return make_ureg(PROGRAM_INPUT, input); + if (p->state->inputs_available & (1<<input)) { + p->program->Base.InputsRead |= (1 << input); + return make_ureg(PROGRAM_INPUT, input); + } + else { + GLuint idx = frag_to_vert_attrib( input ); + return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, idx ); + } } @@ -506,10 +612,12 @@ emit_op(struct texenv_fragment_program *p, emit_dst( &inst->DstReg, dest, mask ); +#if 0 /* Accounting for indirection tracking: */ if (dest.file == PROGRAM_TEMPORARY) p->temps_output |= 1 << dest.idx; +#endif return inst; } @@ -564,6 +672,10 @@ static struct ureg emit_texld( struct texenv_fragment_program *p, p->program->Base.NumTexInstructions++; + /* Accounting for indirection tracking: + */ + reserve_temp(p, dest); + /* Is this a texture indirection? */ if ((coord.file == PROGRAM_TEMPORARY && @@ -588,14 +700,16 @@ static struct ureg register_const4f( struct texenv_fragment_program *p, { GLfloat values[4]; GLuint idx, swizzle; + struct ureg r; values[0] = s0; values[1] = s1; values[2] = s2; values[3] = s3; idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, &swizzle ); - ASSERT(swizzle == SWIZZLE_NOOP); - return make_ureg(PROGRAM_CONSTANT, idx); + r = make_ureg(PROGRAM_CONSTANT, idx); + r.swz = swizzle; + return r; } #define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) @@ -953,6 +1067,7 @@ static void load_texture( struct texenv_fragment_program *p, GLuint unit ) p->src_texture[unit] = emit_texld( p, OPCODE_TXP, tmp, WRITEMASK_XYZW, unit, dim, texcoord ); + if (p->state->unit[unit].shadow) p->program->Base.ShadowSamplers |= 1 << unit; @@ -1078,6 +1193,7 @@ create_new_program(GLcontext *ctx, struct state_key *key, for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) if (key->enabled_units & (1<<unit)) { p.src_previous = emit_texenv( &p, unit ); + reserve_temp(&p, p.src_previous); /* don't re-use this temp reg */ release_temps(ctx, &p); /* release all temps */ } } @@ -1189,6 +1305,9 @@ _mesa_get_fixed_func_fragment_program(GLcontext *ctx) * If _MaintainTexEnvProgram is set we'll generate a fragment program that * implements the current texture env/combine mode. * This function generates that program and puts it into effect. + * + * XXX: remove this function. currently only called by some drivers, + * not by mesa core. We now handle this properly from inside mesa. */ void _mesa_UpdateTexEnvProgram( GLcontext *ctx ) diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c index 4442ce39a4..ce2772c299 100644 --- a/src/mesa/main/texformat.c +++ b/src/mesa/main/texformat.c @@ -1673,6 +1673,11 @@ _mesa_format_to_type_and_comps(const struct gl_texture_format *format, *comps = 1; /* XXX OK? */ return; + case MESA_FORMAT_S8_Z24: + *datatype = GL_UNSIGNED_INT; + *comps = 1; /* XXX OK? */ + return; + case MESA_FORMAT_Z16: *datatype = GL_UNSIGNED_SHORT; *comps = 1; @@ -1683,6 +1688,7 @@ _mesa_format_to_type_and_comps(const struct gl_texture_format *format, *comps = 1; return; +#if FEATURE_EXT_texture_sRGB case MESA_FORMAT_SRGB8: *datatype = GL_UNSIGNED_BYTE; *comps = 3; @@ -1699,9 +1705,13 @@ _mesa_format_to_type_and_comps(const struct gl_texture_format *format, *datatype = GL_UNSIGNED_BYTE; *comps = 2; return; +#endif +#if FEATURE_texture_fxt1 case MESA_FORMAT_RGB_FXT1: case MESA_FORMAT_RGBA_FXT1: +#endif +#if FEATURE_texture_s3tc case MESA_FORMAT_RGB_DXT1: case MESA_FORMAT_RGBA_DXT1: case MESA_FORMAT_RGBA_DXT3: @@ -1710,6 +1720,7 @@ _mesa_format_to_type_and_comps(const struct gl_texture_format *format, *datatype = GL_UNSIGNED_BYTE; *comps = 0; return; +#endif case MESA_FORMAT_RGBA: *datatype = CHAN_TYPE; diff --git a/src/mesa/main/texformat.h b/src/mesa/main/texformat.h index 18900fe08b..f34b3b8223 100644 --- a/src/mesa/main/texformat.h +++ b/src/mesa/main/texformat.h @@ -106,12 +106,16 @@ enum _format { * \name Compressed texture formats. */ /*@{*/ +#if FEATURE_texture_fxt1 MESA_FORMAT_RGB_FXT1, MESA_FORMAT_RGBA_FXT1, +#endif +#if FEATURE_texture_s3tc MESA_FORMAT_RGB_DXT1, MESA_FORMAT_RGBA_DXT1, MESA_FORMAT_RGBA_DXT3, MESA_FORMAT_RGBA_DXT5, +#endif /*@}*/ /** @@ -225,12 +229,16 @@ extern const struct gl_texture_format _mesa_texformat_ycbcr_rev; /** \name Compressed formats */ /*@{*/ +#if FEATURE_texture_fxt1 extern const struct gl_texture_format _mesa_texformat_rgb_fxt1; extern const struct gl_texture_format _mesa_texformat_rgba_fxt1; +#endif +#if FEATURE_texture_s3tc extern const struct gl_texture_format _mesa_texformat_rgb_dxt1; extern const struct gl_texture_format _mesa_texformat_rgba_dxt1; extern const struct gl_texture_format _mesa_texformat_rgba_dxt3; extern const struct gl_texture_format _mesa_texformat_rgba_dxt5; +#endif /*@}*/ /** \name The null format */ diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c index f9360b474e..cddd9df016 100644 --- a/src/mesa/main/teximage.c +++ b/src/mesa/main/teximage.c @@ -1217,19 +1217,30 @@ _mesa_init_teximage_fields(GLcontext *ctx, GLenum target, img->Width = width; img->Height = height; img->Depth = depth; + img->Width2 = width - 2 * border; /* == 1 << img->WidthLog2; */ - img->Height2 = height - 2 * border; /* == 1 << img->HeightLog2; */ - img->Depth2 = depth - 2 * border; /* == 1 << img->DepthLog2; */ img->WidthLog2 = logbase2(img->Width2); - if (height == 1) /* 1-D texture */ + + if (height == 1) { /* 1-D texture */ + img->Height2 = 1; img->HeightLog2 = 0; - else + } + else { + img->Height2 = height - 2 * border; /* == 1 << img->HeightLog2; */ img->HeightLog2 = logbase2(img->Height2); - if (depth == 1) /* 2-D texture */ + } + + if (depth == 1) { /* 2-D texture */ + img->Depth2 = 1; img->DepthLog2 = 0; - else + } + else { + img->Depth2 = depth - 2 * border; /* == 1 << img->DepthLog2; */ img->DepthLog2 = logbase2(img->Depth2); + } + img->MaxLog2 = MAX2(img->WidthLog2, img->HeightLog2); + img->IsCompressed = GL_FALSE; img->CompressedSize = 0; diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index abeed3baa1..c1fafbae30 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -885,8 +885,8 @@ _mesa_swizzle_ubyte_image(GLcontext *ctx, /* _mesa_printf("map %d %d %d %d\n", map[0], map[1], map[2], map[3]); */ - if (srcRowStride == dstRowStride && - srcComponents == dstComponents && + if (srcComponents == dstComponents && + srcRowStride == dstRowStride && srcRowStride == srcWidth * srcComponents && dimensions < 3) { /* 1 and 2D images only */ diff --git a/src/mesa/main/varray.h b/src/mesa/main/varray.h index 6385689fcc..97d5c8219d 100644 --- a/src/mesa/main/varray.h +++ b/src/mesa/main/varray.h @@ -156,6 +156,20 @@ _mesa_LockArraysEXT(GLint first, GLsizei count); extern void GLAPIENTRY _mesa_UnlockArraysEXT( void ); + +extern void GLAPIENTRY +_mesa_DrawArrays(GLenum mode, GLint first, GLsizei count); + +extern void GLAPIENTRY +_mesa_DrawElements(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices); + +extern void GLAPIENTRY +_mesa_DrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, + GLenum type, const GLvoid *indices); + + + extern void _mesa_init_varray( GLcontext * ctx ); diff --git a/src/mesa/math/m_debug_xform.c b/src/mesa/math/m_debug_xform.c index ec22c7052d..df8cc066b6 100644 --- a/src/mesa/math/m_debug_xform.c +++ b/src/mesa/math/m_debug_xform.c @@ -206,7 +206,8 @@ static int test_transform_function( transform_func func, int psize, case VAR: break; default: - abort(); + ASSERT(0); + return 0; } } } diff --git a/src/mesa/ppc/common_ppc.c b/src/mesa/ppc/common_ppc.c index 786e1817ac..c7cec96e5d 100644 --- a/src/mesa/ppc/common_ppc.c +++ b/src/mesa/ppc/common_ppc.c @@ -85,7 +85,7 @@ void _mesa_init_all_ppc_transform_asm( void ) } # ifndef USE_VMX_ASM - _mesa_ppc_cpu_features &= ~PPC_FEATURES_HAS_ALTIVEC; + _mesa_ppc_cpu_features &= ~PPC_FEATURE_HAS_ALTIVEC; # endif #endif } diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c index 39988b5fca..536404bf97 100644 --- a/src/mesa/shader/arbprogparse.c +++ b/src/mesa/shader/arbprogparse.c @@ -30,6 +30,27 @@ * \author Karl Rasche */ +/** +Notes on program parameters, etc. + +The instructions we emit will use six kinds of source registers: + + PROGRAM_INPUT - input registers + PROGRAM_TEMPORARY - temp registers + PROGRAM_ADDRESS - address/indirect register + PROGRAM_SAMPLER - texture sampler + PROGRAM_CONSTANT - indexes into program->Parameters, a known constant/literal + PROGRAM_STATE_VAR - indexes into program->Parameters, and may actually be: + + a state variable, like "state.fog.color", or + + a pointer to a "program.local[k]" parameter, or + + a pointer to a "program.env[k]" parameter + +Basically, all the program.local[] and program.env[] values will get mapped +into the unified gl_program->Parameters array. This solves the problem of +having three separate program parameter arrays. +*/ + + #include "main/glheader.h" #include "main/imports.h" #include "main/context.h" @@ -38,16 +59,11 @@ #include "shader/grammar/grammar_mesa.h" #include "arbprogparse.h" #include "program.h" +#include "programopt.h" #include "prog_parameter.h" #include "prog_statevars.h" #include "prog_instruction.h" - -/* For ARB programs, use the NV instruction limits */ -#define MAX_INSTRUCTIONS MAX2(MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS, \ - MAX_NV_VERTEX_PROGRAM_INSTRUCTIONS) - - /** * This is basically a union of the vertex_program and fragment_program * structs that we can use to parse the program into @@ -1870,7 +1886,11 @@ parse_param_elements (GLcontext * ctx, const GLubyte ** inst, const_values, 4); if (param_var->param_binding_begin == ~0U) param_var->param_binding_begin = idx; - param_var->param_binding_type = PROGRAM_CONSTANT; + param_var->param_binding_type = PROGRAM_STATE_VAR; + /* Note: when we reference this parameter in an instruction later, + * we'll check if it's really a constant/immediate and set the + * instruction register type appropriately. + */ param_var->param_binding_length++; Program->Base.NumParameters++; break; @@ -2577,6 +2597,18 @@ parse_src_reg (GLcontext * ctx, const GLubyte ** inst, return 1; } + if (*File == PROGRAM_STATE_VAR) { + enum register_file file; + + /* If we're referencing the Program->Parameters[] array, check if the + * parameter is really a constant/literal. If so, set File to CONSTANT. + */ + assert(*Index < (GLint) Program->Base.Parameters->NumParameters); + file = Program->Base.Parameters->Parameters[*Index].Type; + if (file == PROGRAM_CONSTANT) + *File = PROGRAM_CONSTANT; + } + /* Add attributes to InputsRead only if they are used the program. * This avoids the handling of unused ATTRIB declarations in the drivers. */ if (*File == PROGRAM_INPUT) @@ -3353,11 +3385,11 @@ debug_variables (GLcontext * ctx, struct var_cache *vc_head, fprintf (stderr, "%s\n", Program->Base.Parameters->Parameters[a + b].Name); if (Program->Base.Parameters->Parameters[a + b].Type == PROGRAM_STATE_VAR) { - const char *s; + char *s; s = _mesa_program_state_string(Program->Base.Parameters->Parameters [a + b].StateIndexes); fprintf(stderr, "%s\n", s); - _mesa_free((char *) s); + _mesa_free(s); } else fprintf (stderr, "%f %f %f %f\n", @@ -3405,7 +3437,7 @@ parse_instructions(GLcontext * ctx, const GLubyte * inst, : ctx->Const.VertexProgram.MaxInstructions; GLint err = 0; - ASSERT(MAX_INSTRUCTIONS >= maxInst); + ASSERT(MAX_PROGRAM_INSTRUCTIONS >= maxInst); Program->MajorVersion = (GLuint) * inst++; Program->MinorVersion = (GLuint) * inst++; @@ -3760,7 +3792,7 @@ _mesa_parse_arb_program(GLcontext *ctx, GLenum target, /* Initialize the arb_program struct */ program->Base.String = strz; - program->Base.Instructions = _mesa_alloc_instructions(MAX_INSTRUCTIONS); + program->Base.Instructions = _mesa_alloc_instructions(MAX_PROGRAM_INSTRUCTIONS); program->Base.NumInstructions = program->Base.NumTemporaries = program->Base.NumParameters = @@ -3805,12 +3837,12 @@ _mesa_parse_arb_program(GLcontext *ctx, GLenum target, _mesa_free (parsed); - /* Reallocate the instruction array from size [MAX_INSTRUCTIONS] + /* Reallocate the instruction array from size [MAX_PROGRAM_INSTRUCTIONS] * to size [ap.Base.NumInstructions]. */ program->Base.Instructions = _mesa_realloc_instructions(program->Base.Instructions, - MAX_INSTRUCTIONS, + MAX_PROGRAM_INSTRUCTIONS, program->Base.NumInstructions); return !err; @@ -3874,6 +3906,16 @@ _mesa_parse_arb_fragment_program(GLcontext* ctx, GLenum target, _mesa_free_parameter_list(program->Base.Parameters); program->Base.Parameters = ap.Base.Parameters; + /* Append fog instructions now if the program has "OPTION ARB_fog_exp" + * or similar. We used to leave this up to drivers, but it appears + * there's no hardware that wants to do fog in a discrete stage separate + * from the fragment shader. + */ + if (program->FogOption != GL_NONE) { + _mesa_append_fog_code(ctx, program); + program->FogOption = GL_NONE; + } + #if DEBUG_FP _mesa_printf("____________Fragment program %u ________\n", program->Base.Id); _mesa_print_program(&program->Base); diff --git a/src/mesa/shader/atifragshader.h b/src/mesa/shader/atifragshader.h index 32fb3a8019..e1dc20e606 100644 --- a/src/mesa/shader/atifragshader.h +++ b/src/mesa/shader/atifragshader.h @@ -8,6 +8,8 @@ #ifndef ATIFRAGSHADER_H #define ATIFRAGSHADER_H +#include "main/mtypes.h" + #define MAX_NUM_INSTRUCTIONS_PER_PASS_ATI 8 #define MAX_NUM_PASSES_ATI 2 #define MAX_NUM_FRAGMENT_REGISTERS_ATI 6 diff --git a/src/mesa/shader/grammar/descrip.mms b/src/mesa/shader/grammar/descrip.mms deleted file mode 100644 index 6976b70d6a..0000000000 --- a/src/mesa/shader/grammar/descrip.mms +++ /dev/null @@ -1,42 +0,0 @@ -# Makefile for core library for VMS -# contributed by Jouk Jansen joukj@hrem.nano.tudelft.nl -# Last revision : 29 September 2008 - -.first - define gl [----.include.gl] - define math [--.math] - define swrast [--.swrast] - define array_cache [--.array_cache] - define main [--.main] - -.include [----]mms-config. - -##### MACROS ##### - -VPATH = RCS - -INCDIR = [----.include],[],[--.main],[--.glapi],[-.slang] -LIBDIR = [----.lib] -CFLAGS = /include=($(INCDIR),[])/define=(PTHREADS=1)/name=(as_is,short)/float=ieee/ieee=denorm - -SOURCES = grammar_mesa.c - -OBJECTS = grammar_mesa.obj - -##### RULES ##### - -VERSION=Mesa V3.4 - -##### TARGETS ##### -all : - $(MMS)$(MMSQUALIFIERS) $(LIBDIR)$(GL_LIB) - -# Make the library -$(LIBDIR)$(GL_LIB) : $(OBJECTS) - @ library $(LIBDIR)$(GL_LIB) $(OBJECTS) - -clean : - purge - delete *.obj;* - -grammar_mesa.obj : grammar_mesa.c grammar.c diff --git a/src/mesa/shader/prog_cache.c b/src/mesa/shader/prog_cache.c index 36a25377c5..9437e59613 100644 --- a/src/mesa/shader/prog_cache.c +++ b/src/mesa/shader/prog_cache.c @@ -44,6 +44,7 @@ struct cache_item struct gl_program_cache { struct cache_item **items; + struct cache_item *last; GLuint size, n_items; }; @@ -83,6 +84,8 @@ rehash(struct gl_program_cache *cache) struct cache_item *c, *next; GLuint size, i; + cache->last = NULL; + size = cache->size * 3; items = (struct cache_item**) _mesa_malloc(size * sizeof(*items)); _mesa_memset(items, 0, size * sizeof(*items)); @@ -105,6 +108,8 @@ clear_cache(GLcontext *ctx, struct gl_program_cache *cache) { struct cache_item *c, *next; GLuint i; + + cache->last = NULL; for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { @@ -149,18 +154,26 @@ _mesa_delete_program_cache(GLcontext *ctx, struct gl_program_cache *cache) struct gl_program * -_mesa_search_program_cache(const struct gl_program_cache *cache, +_mesa_search_program_cache(struct gl_program_cache *cache, const void *key, GLuint keysize) { - const GLuint hash = hash_key(key, keysize); - struct cache_item *c; - - for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (c->hash == hash && memcmp(c->key, key, keysize) == 0) - return c->program; + if (cache->last && + memcmp(cache->last->key, key, keysize) == 0) { + return cache->last->program; } + else { + const GLuint hash = hash_key(key, keysize); + struct cache_item *c; + + for (c = cache->items[hash % cache->size]; c; c = c->next) { + if (c->hash == hash && memcmp(c->key, key, keysize) == 0) { + cache->last = c; + return c->program; + } + } - return NULL; + return NULL; + } } diff --git a/src/mesa/shader/prog_cache.h b/src/mesa/shader/prog_cache.h index a8c91fba01..4e1ccac03f 100644 --- a/src/mesa/shader/prog_cache.h +++ b/src/mesa/shader/prog_cache.h @@ -42,7 +42,7 @@ _mesa_delete_program_cache(GLcontext *ctx, struct gl_program_cache *pc); extern struct gl_program * -_mesa_search_program_cache(const struct gl_program_cache *cache, +_mesa_search_program_cache(struct gl_program_cache *cache, const void *key, GLuint keysize); extern void diff --git a/src/mesa/shader/prog_execute.c b/src/mesa/shader/prog_execute.c index 32b6ff4fd4..23648f3f4c 100644 --- a/src/mesa/shader/prog_execute.c +++ b/src/mesa/shader/prog_execute.c @@ -81,11 +81,12 @@ get_register_pointer(const struct prog_src_register *source, { if (source->RelAddr) { const GLint reg = source->Index + machine->AddressReg[0][0]; - if (source->File == PROGRAM_ENV_PARAM) + if (source->File == PROGRAM_ENV_PARAM) { if (reg < 0 || reg >= MAX_PROGRAM_ENV_PARAMS) return ZeroVec; else return machine->EnvParams[reg]; + } else { const struct gl_program_parameter_list *params; ASSERT(source->File == PROGRAM_LOCAL_PARAM || diff --git a/src/mesa/shader/prog_instruction.h b/src/mesa/shader/prog_instruction.h index aca768376a..711166f9dd 100644 --- a/src/mesa/shader/prog_instruction.h +++ b/src/mesa/shader/prog_instruction.h @@ -413,11 +413,13 @@ struct prog_instruction */ GLint BranchTarget; +#if 0 /** * For TEX instructions in shaders, the sampler to use for the * texture lookup. */ GLint Sampler; +#endif const char *Comment; }; diff --git a/src/mesa/shader/prog_parameter.c b/src/mesa/shader/prog_parameter.c index 2dfd923a0f..bfe27d2f63 100644 --- a/src/mesa/shader/prog_parameter.c +++ b/src/mesa/shader/prog_parameter.c @@ -420,7 +420,7 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList, const gl_state_index stateTokens[STATE_LENGTH]) { const GLuint size = 4; /* XXX fix */ - const char *name; + char *name; GLint index; /* Check if the state reference is already in the list */ @@ -447,7 +447,7 @@ _mesa_add_state_reference(struct gl_program_parameter_list *paramList, paramList->StateFlags |= _mesa_program_state_flags(stateTokens); /* free name string here since we duplicated it in add_parameter() */ - _mesa_free((void *) name); + _mesa_free(name); return index; } diff --git a/src/mesa/shader/prog_print.c b/src/mesa/shader/prog_print.c index 32708ed706..ec260f18a9 100644 --- a/src/mesa/shader/prog_print.c +++ b/src/mesa/shader/prog_print.c @@ -250,7 +250,9 @@ reg_string(enum register_file f, GLint index, gl_prog_print_mode mode, { struct gl_program_parameter *param = prog->Parameters->Parameters + index; - sprintf(str, _mesa_program_state_string(param->StateIndexes)); + char *state = _mesa_program_state_string(param->StateIndexes); + sprintf(str, state); + _mesa_free(state); } break; case PROGRAM_ADDRESS: @@ -356,6 +358,19 @@ _mesa_swizzle_string(GLuint swizzle, GLuint negateBase, GLboolean extended) } +void +_mesa_print_swizzle(GLuint swizzle) +{ + if (swizzle == SWIZZLE_XYZW) { + _mesa_printf(".xyzw\n"); + } + else { + const char *s = _mesa_swizzle_string(swizzle, 0, 0); + _mesa_printf("%s\n", s); + } +} + + static const char * writemask_string(GLuint writeMask) { diff --git a/src/mesa/shader/prog_print.h b/src/mesa/shader/prog_print.h index 36c47e0dff..3cdb1b195e 100644 --- a/src/mesa/shader/prog_print.h +++ b/src/mesa/shader/prog_print.h @@ -44,6 +44,9 @@ extern const char * _mesa_swizzle_string(GLuint swizzle, GLuint negateBase, GLboolean extended); extern void +_mesa_print_swizzle(GLuint swizzle); + +extern void _mesa_print_alu_instruction(const struct prog_instruction *inst, const char *opcode_string, GLuint numRegs); diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c index 971eb25a49..34c4741350 100644 --- a/src/mesa/shader/prog_statevars.c +++ b/src/mesa/shader/prog_statevars.c @@ -395,6 +395,12 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], case STATE_INTERNAL: switch (state[1]) { + case STATE_CURRENT_ATTRIB: { + const GLuint idx = (GLuint) state[2]; + COPY_4V(value, ctx->Current.Attrib[idx]); + return; + } + case STATE_NORMAL_SCALE: ASSIGN_4V(value, ctx->_ModelViewInvScale, @@ -501,6 +507,9 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], } return; + /* XXX: make sure new tokens added here are also handled in the + * _mesa_program_state_flags() switch, below. + */ default: /* unknown state indexes are silently ignored * should be handled by the driver. @@ -574,11 +583,29 @@ _mesa_program_state_flags(const gl_state_index state[STATE_LENGTH]) case STATE_INTERNAL: switch (state[1]) { + case STATE_CURRENT_ATTRIB: + return _NEW_CURRENT_ATTRIB; + + case STATE_NORMAL_SCALE: + return _NEW_MODELVIEW; + case STATE_TEXRECT_SCALE: case STATE_SHADOW_AMBIENT: return _NEW_TEXTURE; case STATE_FOG_PARAMS_OPTIMIZED: return _NEW_FOG; + case STATE_LIGHT_SPOT_DIR_NORMALIZED: + case STATE_LIGHT_POSITION: + case STATE_LIGHT_POSITION_NORMALIZED: + case STATE_LIGHT_HALF_VECTOR: + return _NEW_LIGHT; + + case STATE_PT_SCALE: + case STATE_PT_BIAS: + case STATE_PCM_SCALE: + case STATE_PCM_BIAS: + return _NEW_PIXEL; + default: /* unknown state indexes are silently ignored and * no flag set, since it is handled by the driver. @@ -784,7 +811,7 @@ append_index(char *dst, GLint index) * For example, return "state.matrix.texture[2].inverse". * Use _mesa_free() to deallocate the string. */ -const char * +char * _mesa_program_state_string(const gl_state_index state[STATE_LENGTH]) { char str[1000] = ""; diff --git a/src/mesa/shader/prog_statevars.h b/src/mesa/shader/prog_statevars.h index d3091147f8..72e51f4031 100644 --- a/src/mesa/shader/prog_statevars.h +++ b/src/mesa/shader/prog_statevars.h @@ -104,6 +104,7 @@ typedef enum gl_state_index_ { STATE_LOCAL, STATE_INTERNAL, /* Mesa additions */ + STATE_CURRENT_ATTRIB, /* ctx->Current vertex attrib value */ STATE_NORMAL_SCALE, STATE_TEXRECT_SCALE, STATE_FOG_PARAMS_OPTIMIZED, /* for faster fog calc */ @@ -130,7 +131,7 @@ extern GLbitfield _mesa_program_state_flags(const gl_state_index state[STATE_LENGTH]); -extern const char * +extern char * _mesa_program_state_string(const gl_state_index state[STATE_LENGTH]); diff --git a/src/mesa/shader/prog_uniform.c b/src/mesa/shader/prog_uniform.c index f57df3d86d..25d7c0997b 100644 --- a/src/mesa/shader/prog_uniform.c +++ b/src/mesa/shader/prog_uniform.c @@ -134,8 +134,8 @@ _mesa_longest_uniform_name(const struct gl_uniform_list *list) GLint max = 0; GLuint i; for (i = 0; list && i < list->NumUniforms; i++) { - GLuint len = _mesa_strlen(list->Uniforms[i].Name); - if (len > (GLuint)max) + GLint len = (GLint)_mesa_strlen(list->Uniforms[i].Name); + if (len > max) max = len; } return max; diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c index 738891a029..37962f0e9b 100644 --- a/src/mesa/shader/program.c +++ b/src/mesa/shader/program.c @@ -554,7 +554,6 @@ _mesa_insert_instructions(struct gl_program *prog, GLuint start, GLuint count) return GL_TRUE; } - /** * Delete 'count' instructions at 'start' in the given program. * Adjust branch targets accordingly. @@ -691,17 +690,47 @@ _mesa_combine_programs(GLcontext *ctx, if (newProg->Target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprogA, *fprogB, *newFprog; + GLbitfield progB_inputsRead = progB->InputsRead; + GLint progB_colorFile, progB_colorIndex; + fprogA = (struct gl_fragment_program *) progA; fprogB = (struct gl_fragment_program *) progB; newFprog = (struct gl_fragment_program *) newProg; newFprog->UsesKill = fprogA->UsesKill || fprogB->UsesKill; + /* We'll do a search and replace for instances + * of progB_colorFile/progB_colorIndex below... + */ + progB_colorFile = PROGRAM_INPUT; + progB_colorIndex = FRAG_ATTRIB_COL0; + + /* + * The fragment program may get color from a state var rather than + * a fragment input (vertex output) if it's constant. + * See the texenvprogram.c code. + * So, search the program's parameter list now to see if the program + * gets color from a state var instead of a conventional fragment + * input register. + */ + for (i = 0; i < progB->Parameters->NumParameters; i++) { + struct gl_program_parameter *p = &progB->Parameters->Parameters[i]; + if (p->Type == PROGRAM_STATE_VAR && + p->StateIndexes[0] == STATE_INTERNAL && + p->StateIndexes[1] == STATE_CURRENT_ATTRIB && + p->StateIndexes[2] == VERT_ATTRIB_COLOR0) { + progB_inputsRead |= FRAG_BIT_COL0; + progB_colorFile = PROGRAM_STATE_VAR; + progB_colorIndex = i; + break; + } + } + /* Connect color outputs of fprogA to color inputs of fprogB, via a * new temporary register. */ if ((progA->OutputsWritten & (1 << FRAG_RESULT_COLR)) && - (progB->InputsRead & (1 << FRAG_ATTRIB_COL0))) { + (progB_inputsRead & FRAG_BIT_COL0)) { GLint tempReg = _mesa_find_free_register(newProg, PROGRAM_TEMPORARY); if (tempReg < 0) { _mesa_problem(ctx, "No free temp regs found in " @@ -712,13 +741,14 @@ _mesa_combine_programs(GLcontext *ctx, replace_registers(newInst, lenA, PROGRAM_OUTPUT, FRAG_RESULT_COLR, PROGRAM_TEMPORARY, tempReg); - /* replace reads from input.color[0] with tempReg */ + /* replace reads from the input color with tempReg */ replace_registers(newInst + lenA, lenB, - PROGRAM_INPUT, FRAG_ATTRIB_COL0, - PROGRAM_TEMPORARY, tempReg); + progB_colorFile, progB_colorIndex, /* search for */ + PROGRAM_TEMPORARY, tempReg /* replace with */ ); } - inputsB = progB->InputsRead; + /* compute combined program's InputsRead */ + inputsB = progB_inputsRead; if (progA->OutputsWritten & (1 << FRAG_RESULT_COLR)) { inputsB &= ~(1 << FRAG_ATTRIB_COL0); } diff --git a/src/mesa/shader/shader_api.c b/src/mesa/shader/shader_api.c index 504d769323..266ecc4ef2 100644 --- a/src/mesa/shader/shader_api.c +++ b/src/mesa/shader/shader_api.c @@ -381,7 +381,7 @@ _mesa_init_shader_state(GLcontext * ctx) * are generated by the GLSL compiler. */ ctx->Shader.EmitHighLevelInstructions = GL_TRUE; - ctx->Shader.EmitCondCodes = GL_TRUE; /* XXX probably want GL_FALSE... */ + ctx->Shader.EmitCondCodes = GL_FALSE;/*GL_TRUE;*/ /* XXX probably want GL_FALSE... */ ctx->Shader.EmitComments = GL_FALSE; } @@ -1117,7 +1117,8 @@ get_matrix_dims(GLenum type, GLint *rows, GLint *cols) /** * Determine the number of rows and columns occupied by a uniform - * according to its datatype. + * according to its datatype. For non-matrix types (such as GL_FLOAT_VEC4), + * the number of rows = 1 and cols = number of elements in the vector. */ static void get_uniform_rows_cols(const struct gl_program_parameter *p, @@ -1126,11 +1127,17 @@ get_uniform_rows_cols(const struct gl_program_parameter *p, get_matrix_dims(p->DataType, rows, cols); if (*rows == 0 && *cols == 0) { /* not a matrix type, probably a float or vector */ - *rows = p->Size / 4 + 1; - if (p->Size % 4 == 0) - *cols = 4; - else - *cols = p->Size % 4; + if (p->Size <= 4) { + *rows = 1; + *cols = p->Size; + } + else { + *rows = p->Size / 4 + 1; + if (p->Size % 4 == 0) + *cols = 4; + else + *cols = p->Size % 4; + } } } @@ -1513,7 +1520,7 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program, return; } - if (index + offset > program->Parameters->Size) { + if (index + offset > (GLint) program->Parameters->Size) { /* out of bounds! */ return; } @@ -1577,7 +1584,7 @@ set_program_uniform(GLcontext *ctx, struct gl_program *program, /* if the uniform is bool-valued, convert to 1.0 or 0.0 */ if (is_boolean_type(program->Parameters->Parameters[index].DataType)) { for (i = 0; i < elems; i++) { - uniformVal[i] = uniformVal[i] ? 1.0 : 0.0; + uniformVal[i] = uniformVal[i] ? 1.0f : 0.0f; } } } diff --git a/src/mesa/shader/slang/library/slang_version_syn.h b/src/mesa/shader/slang/library/slang_version_syn.h deleted file mode 100644 index 3b94d85927..0000000000 --- a/src/mesa/shader/slang/library/slang_version_syn.h +++ /dev/null @@ -1,64 +0,0 @@ -".syntax version_directive;\n" -"version_directive\n" -" version_directive_1 .and .loop version_directive_2;\n" -"version_directive_1\n" -" prior_optional_spaces .and optional_version_directive .and .true .emit $;\n" -"version_directive_2\n" -" prior_optional_spaces .and version_directive_body .and .true .emit $;\n" -"optional_version_directive\n" -" version_directive_body .or .true .emit 10 .emit 1;\n" -"version_directive_body\n" -" '#' .and optional_space .and \"version\" .and space .and version_number .and optional_space .and\n" -" new_line;\n" -"version_number\n" -" version_number_110;\n" -"version_number_110\n" -" leading_zeroes .and \"110\" .emit 10 .emit 1;\n" -"leading_zeroes\n" -" .loop zero;\n" -"zero\n" -" '0';\n" -"space\n" -" single_space .and .loop single_space;\n" -"optional_space\n" -" .loop single_space;\n" -"single_space\n" -" ' ' .or '\\t';\n" -"prior_optional_spaces\n" -" .loop prior_space;\n" -"prior_space\n" -" c_style_comment_block .or cpp_style_comment_block .or space .or new_line;\n" -"c_style_comment_block\n" -" '/' .and '*' .and c_style_comment_rest;\n" -"c_style_comment_rest\n" -" .loop c_style_comment_char_no_star .and c_style_comment_rest_1;\n" -"c_style_comment_rest_1\n" -" c_style_comment_end .or c_style_comment_rest_2;\n" -"c_style_comment_rest_2\n" -" '*' .and c_style_comment_rest;\n" -"c_style_comment_char_no_star\n" -" '\\x2B'-'\\xFF' .or '\\x01'-'\\x29';\n" -"c_style_comment_end\n" -" '*' .and '/';\n" -"cpp_style_comment_block\n" -" '/' .and '/' .and cpp_style_comment_block_1;\n" -"cpp_style_comment_block_1\n" -" cpp_style_comment_block_2 .or cpp_style_comment_block_3;\n" -"cpp_style_comment_block_2\n" -" .loop cpp_style_comment_char .and new_line;\n" -"cpp_style_comment_block_3\n" -" .loop cpp_style_comment_char;\n" -"cpp_style_comment_char\n" -" '\\x0E'-'\\xFF' .or '\\x01'-'\\x09' .or '\\x0B'-'\\x0C';\n" -"new_line\n" -" cr_lf .or lf_cr .or '\\n' .or '\\r';\n" -"cr_lf\n" -" '\\r' .and '\\n';\n" -"lf_cr\n" -" '\\n' .and '\\r';\n" -".string __string_filter;\n" -"__string_filter\n" -" .loop __identifier_char;\n" -"__identifier_char\n" -" 'a'-'z' .or 'A'-'Z' .or '_' .or '0'-'9';\n" -"" diff --git a/src/mesa/shader/slang/library/slang_vertex_builtin.gc b/src/mesa/shader/slang/library/slang_vertex_builtin.gc index 20c924a30d..17e86d9a0e 100644 --- a/src/mesa/shader/slang/library/slang_vertex_builtin.gc +++ b/src/mesa/shader/slang/library/slang_vertex_builtin.gc @@ -57,7 +57,10 @@ varying float gl_FogFragCoord; vec4 ftransform() { - __retVal = gl_Vertex * gl_ModelViewProjectionMatrixTranspose; + __retVal = gl_ModelViewProjectionMatrix[0] * gl_Vertex.xxxx + + gl_ModelViewProjectionMatrix[1] * gl_Vertex.yyyy + + gl_ModelViewProjectionMatrix[2] * gl_Vertex.zzzz + + gl_ModelViewProjectionMatrix[3] * gl_Vertex.wwww; } diff --git a/src/mesa/shader/slang/library/slang_vertex_builtin_gc.h b/src/mesa/shader/slang/library/slang_vertex_builtin_gc.h index 1a08a54d7e..63d33af78e 100644 --- a/src/mesa/shader/slang/library/slang_vertex_builtin_gc.h +++ b/src/mesa/shader/slang/library/slang_vertex_builtin_gc.h @@ -20,75 +20,81 @@ 0,2,2,3,0,12,1,103,108,95,84,101,120,67,111,111,114,100,0,3,18,103,108,95,77,97,120,84,101,120,116, 117,114,101,67,111,111,114,100,115,0,0,0,2,2,3,0,9,1,103,108,95,70,111,103,70,114,97,103,67,111, 111,114,100,0,0,0,1,0,0,12,0,102,116,114,97,110,115,102,111,114,109,0,0,1,9,18,95,95,114,101,116, -86,97,108,0,18,103,108,95,86,101,114,116,101,120,0,18,103,108,95,77,111,100,101,108,86,105,101,119, -80,114,111,106,101,99,116,105,111,110,77,97,116,114,105,120,84,114,97,110,115,112,111,115,101,0,48, -20,0,0,1,0,0,12,0,116,101,120,116,117,114,101,49,68,76,111,100,0,1,1,0,16,115,97,109,112,108,101, -114,0,0,1,1,0,9,99,111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,99,111,111,114,100, -52,0,0,0,9,18,99,111,111,114,100,52,0,59,120,0,18,99,111,111,114,100,0,20,0,9,18,99,111,111,114, -100,52,0,59,119,0,18,108,111,100,0,20,0,4,118,101,99,52,95,116,101,120,98,49,100,0,18,95,95,114, -101,116,86,97,108,0,0,18,115,97,109,112,108,101,114,0,0,18,99,111,111,114,100,52,0,0,0,0,1,0,0,12, -0,116,101,120,116,117,114,101,49,68,80,114,111,106,76,111,100,0,1,1,0,16,115,97,109,112,108,101, -114,0,0,1,1,0,10,99,111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,112,99,111,111, -114,100,0,0,0,9,18,112,99,111,111,114,100,0,59,120,0,18,99,111,111,114,100,0,59,120,0,18,99,111, -111,114,100,0,59,121,0,49,20,0,9,18,112,99,111,111,114,100,0,59,119,0,18,108,111,100,0,20,0,4,118, -101,99,52,95,116,101,120,98,49,100,0,18,95,95,114,101,116,86,97,108,0,0,18,115,97,109,112,108,101, -114,0,0,18,112,99,111,111,114,100,0,0,0,0,1,0,0,12,0,116,101,120,116,117,114,101,49,68,80,114,111, -106,76,111,100,0,1,1,0,16,115,97,109,112,108,101,114,0,0,1,1,0,12,99,111,111,114,100,0,0,1,1,0,9, -108,111,100,0,0,0,1,3,2,0,0,12,1,112,99,111,111,114,100,0,0,0,9,18,112,99,111,111,114,100,0,59,120, -0,18,99,111,111,114,100,0,59,120,0,18,99,111,111,114,100,0,59,122,0,49,20,0,9,18,112,99,111,111, -114,100,0,59,119,0,18,108,111,100,0,20,0,4,118,101,99,52,95,116,101,120,98,49,100,0,18,95,95,114, -101,116,86,97,108,0,0,18,115,97,109,112,108,101,114,0,0,18,112,99,111,111,114,100,0,0,0,0,1,0,0,12, -0,116,101,120,116,117,114,101,50,68,76,111,100,0,1,1,0,17,115,97,109,112,108,101,114,0,0,1,1,0,10, -99,111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,99,111,111,114,100,52,0,0,0,9,18, -99,111,111,114,100,52,0,59,120,121,0,18,99,111,111,114,100,0,59,120,121,0,20,0,9,18,99,111,111,114, -100,52,0,59,119,0,18,108,111,100,0,20,0,4,118,101,99,52,95,116,101,120,98,50,100,0,18,95,95,114, -101,116,86,97,108,0,0,18,115,97,109,112,108,101,114,0,0,18,99,111,111,114,100,52,0,0,0,0,1,0,0,12, -0,116,101,120,116,117,114,101,50,68,80,114,111,106,76,111,100,0,1,1,0,17,115,97,109,112,108,101, -114,0,0,1,1,0,11,99,111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,112,99,111,111, -114,100,0,0,0,9,18,112,99,111,111,114,100,0,59,120,121,0,18,99,111,111,114,100,0,59,120,121,0,18, -99,111,111,114,100,0,59,122,0,49,20,0,9,18,112,99,111,111,114,100,0,59,119,0,18,108,111,100,0,20,0, -4,118,101,99,52,95,116,101,120,98,50,100,0,18,95,95,114,101,116,86,97,108,0,0,18,115,97,109,112, -108,101,114,0,0,18,112,99,111,111,114,100,0,0,0,0,1,0,0,12,0,116,101,120,116,117,114,101,50,68,80, -114,111,106,76,111,100,0,1,1,0,17,115,97,109,112,108,101,114,0,0,1,1,0,12,99,111,111,114,100,0,0,1, -1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,112,99,111,111,114,100,0,0,0,9,18,112,99,111,111,114,100,0, -59,120,121,0,18,99,111,111,114,100,0,59,120,121,0,18,99,111,111,114,100,0,59,122,0,49,20,0,9,18, -112,99,111,111,114,100,0,59,119,0,18,108,111,100,0,20,0,4,118,101,99,52,95,116,101,120,98,50,100,0, -18,95,95,114,101,116,86,97,108,0,0,18,115,97,109,112,108,101,114,0,0,18,112,99,111,111,114,100,0,0, -0,0,1,0,0,12,0,116,101,120,116,117,114,101,51,68,76,111,100,0,1,1,0,18,115,97,109,112,108,101,114, -0,0,1,1,0,11,99,111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,99,111,111,114,100,52, -0,0,0,9,18,99,111,111,114,100,52,0,59,120,121,122,0,18,99,111,111,114,100,0,59,120,121,122,0,20,0, -9,18,99,111,111,114,100,52,0,59,119,0,18,108,111,100,0,20,0,4,118,101,99,52,95,116,101,120,98,51, -100,0,18,95,95,114,101,116,86,97,108,0,0,18,115,97,109,112,108,101,114,0,0,18,99,111,111,114,100, -52,0,0,0,0,1,0,0,12,0,116,101,120,116,117,114,101,51,68,80,114,111,106,76,111,100,0,1,1,0,18,115, -97,109,112,108,101,114,0,0,1,1,0,12,99,111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12, -1,112,99,111,111,114,100,0,0,0,9,18,112,99,111,111,114,100,0,59,120,121,122,0,18,99,111,111,114, -100,0,59,120,121,122,0,18,99,111,111,114,100,0,59,119,0,49,20,0,9,18,112,99,111,111,114,100,0,59, -119,0,18,108,111,100,0,20,0,4,118,101,99,52,95,116,101,120,98,51,100,0,18,95,95,114,101,116,86,97, -108,0,0,18,115,97,109,112,108,101,114,0,0,18,112,99,111,111,114,100,0,0,0,0,1,0,0,12,0,116,101,120, -116,117,114,101,67,117,98,101,76,111,100,0,1,1,0,19,115,97,109,112,108,101,114,0,0,1,1,0,11,99,111, -111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,99,111,111,114,100,52,0,0,0,9,18,99,111, -111,114,100,52,0,59,120,121,122,0,18,99,111,111,114,100,0,20,0,9,18,99,111,111,114,100,52,0,59,119, -0,18,108,111,100,0,20,0,4,118,101,99,52,95,116,101,120,99,117,98,101,0,18,95,95,114,101,116,86,97, -108,0,0,18,115,97,109,112,108,101,114,0,0,18,99,111,111,114,100,52,0,0,0,0,1,0,0,12,0,115,104,97, -100,111,119,49,68,76,111,100,0,1,1,0,20,115,97,109,112,108,101,114,0,0,1,1,0,11,99,111,111,114,100, -0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,99,111,111,114,100,52,0,0,0,9,18,99,111,111,114,100, -52,0,59,120,121,122,0,18,99,111,111,114,100,0,20,0,9,18,99,111,111,114,100,52,0,59,119,0,18,108, +86,97,108,0,18,103,108,95,77,111,100,101,108,86,105,101,119,80,114,111,106,101,99,116,105,111,110, +77,97,116,114,105,120,0,16,8,48,0,57,18,103,108,95,86,101,114,116,101,120,0,59,120,120,120,120,0, +48,18,103,108,95,77,111,100,101,108,86,105,101,119,80,114,111,106,101,99,116,105,111,110,77,97,116, +114,105,120,0,16,10,49,0,57,18,103,108,95,86,101,114,116,101,120,0,59,121,121,121,121,0,48,46,18, +103,108,95,77,111,100,101,108,86,105,101,119,80,114,111,106,101,99,116,105,111,110,77,97,116,114, +105,120,0,16,10,50,0,57,18,103,108,95,86,101,114,116,101,120,0,59,122,122,122,122,0,48,46,18,103, +108,95,77,111,100,101,108,86,105,101,119,80,114,111,106,101,99,116,105,111,110,77,97,116,114,105, +120,0,16,10,51,0,57,18,103,108,95,86,101,114,116,101,120,0,59,119,119,119,119,0,48,46,20,0,0,1,0,0, +12,0,116,101,120,116,117,114,101,49,68,76,111,100,0,1,1,0,16,115,97,109,112,108,101,114,0,0,1,1,0, +9,99,111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,99,111,111,114,100,52,0,0,0,9,18, +99,111,111,114,100,52,0,59,120,0,18,99,111,111,114,100,0,20,0,9,18,99,111,111,114,100,52,0,59,119, +0,18,108,111,100,0,20,0,4,118,101,99,52,95,116,101,120,98,49,100,0,18,95,95,114,101,116,86,97,108, +0,0,18,115,97,109,112,108,101,114,0,0,18,99,111,111,114,100,52,0,0,0,0,1,0,0,12,0,116,101,120,116, +117,114,101,49,68,80,114,111,106,76,111,100,0,1,1,0,16,115,97,109,112,108,101,114,0,0,1,1,0,10,99, +111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,112,99,111,111,114,100,0,0,0,9,18,112, +99,111,111,114,100,0,59,120,0,18,99,111,111,114,100,0,59,120,0,18,99,111,111,114,100,0,59,121,0,49, +20,0,9,18,112,99,111,111,114,100,0,59,119,0,18,108,111,100,0,20,0,4,118,101,99,52,95,116,101,120, +98,49,100,0,18,95,95,114,101,116,86,97,108,0,0,18,115,97,109,112,108,101,114,0,0,18,112,99,111,111, +114,100,0,0,0,0,1,0,0,12,0,116,101,120,116,117,114,101,49,68,80,114,111,106,76,111,100,0,1,1,0,16, +115,97,109,112,108,101,114,0,0,1,1,0,12,99,111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0, +12,1,112,99,111,111,114,100,0,0,0,9,18,112,99,111,111,114,100,0,59,120,0,18,99,111,111,114,100,0, +59,120,0,18,99,111,111,114,100,0,59,122,0,49,20,0,9,18,112,99,111,111,114,100,0,59,119,0,18,108, 111,100,0,20,0,4,118,101,99,52,95,116,101,120,98,49,100,0,18,95,95,114,101,116,86,97,108,0,0,18, -115,97,109,112,108,101,114,0,0,18,99,111,111,114,100,52,0,0,0,0,1,0,0,12,0,115,104,97,100,111,119, -49,68,80,114,111,106,76,111,100,0,1,1,0,20,115,97,109,112,108,101,114,0,0,1,1,0,12,99,111,111,114, -100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,112,99,111,111,114,100,0,0,0,9,18,112,99,111,111, -114,100,0,59,120,0,18,99,111,111,114,100,0,59,120,0,18,99,111,111,114,100,0,59,119,0,49,20,0,9,18, -112,99,111,111,114,100,0,59,122,0,18,99,111,111,114,100,0,59,122,0,20,0,9,18,112,99,111,111,114, -100,0,59,119,0,18,108,111,100,0,20,0,4,118,101,99,52,95,116,101,120,98,49,100,0,18,95,95,114,101, -116,86,97,108,0,0,18,115,97,109,112,108,101,114,0,0,18,112,99,111,111,114,100,0,0,0,0,1,0,0,12,0, -115,104,97,100,111,119,50,68,76,111,100,0,1,1,0,21,115,97,109,112,108,101,114,0,0,1,1,0,11,99,111, -111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,99,111,111,114,100,52,0,0,0,9,18,99,111, -111,114,100,52,0,59,120,121,122,0,18,99,111,111,114,100,0,20,0,9,18,99,111,111,114,100,52,0,59,119, -0,18,108,111,100,0,20,0,4,118,101,99,52,95,116,101,120,98,50,100,0,18,95,95,114,101,116,86,97,108, -0,0,18,115,97,109,112,108,101,114,0,0,18,99,111,111,114,100,52,0,0,0,0,1,0,0,12,0,115,104,97,100, -111,119,50,68,80,114,111,106,76,111,100,0,1,1,0,21,115,97,109,112,108,101,114,0,0,1,1,0,12,99,111, +115,97,109,112,108,101,114,0,0,18,112,99,111,111,114,100,0,0,0,0,1,0,0,12,0,116,101,120,116,117, +114,101,50,68,76,111,100,0,1,1,0,17,115,97,109,112,108,101,114,0,0,1,1,0,10,99,111,111,114,100,0,0, +1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,99,111,111,114,100,52,0,0,0,9,18,99,111,111,114,100,52,0, +59,120,121,0,18,99,111,111,114,100,0,59,120,121,0,20,0,9,18,99,111,111,114,100,52,0,59,119,0,18, +108,111,100,0,20,0,4,118,101,99,52,95,116,101,120,98,50,100,0,18,95,95,114,101,116,86,97,108,0,0, +18,115,97,109,112,108,101,114,0,0,18,99,111,111,114,100,52,0,0,0,0,1,0,0,12,0,116,101,120,116,117, +114,101,50,68,80,114,111,106,76,111,100,0,1,1,0,17,115,97,109,112,108,101,114,0,0,1,1,0,11,99,111, 111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,112,99,111,111,114,100,0,0,0,9,18,112,99, -111,111,114,100,0,59,120,121,0,18,99,111,111,114,100,0,59,120,121,0,18,99,111,111,114,100,0,59,119, -0,49,20,0,9,18,112,99,111,111,114,100,0,59,122,0,18,99,111,111,114,100,0,59,122,0,20,0,9,18,112,99, -111,111,114,100,0,59,119,0,18,108,111,100,0,20,0,4,118,101,99,52,95,116,101,120,98,50,100,0,18,95, -95,114,101,116,86,97,108,0,0,18,115,97,109,112,108,101,114,0,0,18,112,99,111,111,114,100,0,0,0,0,0 +111,111,114,100,0,59,120,121,0,18,99,111,111,114,100,0,59,120,121,0,18,99,111,111,114,100,0,59,122, +0,49,20,0,9,18,112,99,111,111,114,100,0,59,119,0,18,108,111,100,0,20,0,4,118,101,99,52,95,116,101, +120,98,50,100,0,18,95,95,114,101,116,86,97,108,0,0,18,115,97,109,112,108,101,114,0,0,18,112,99,111, +111,114,100,0,0,0,0,1,0,0,12,0,116,101,120,116,117,114,101,50,68,80,114,111,106,76,111,100,0,1,1,0, +17,115,97,109,112,108,101,114,0,0,1,1,0,12,99,111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2, +0,0,12,1,112,99,111,111,114,100,0,0,0,9,18,112,99,111,111,114,100,0,59,120,121,0,18,99,111,111,114, +100,0,59,120,121,0,18,99,111,111,114,100,0,59,122,0,49,20,0,9,18,112,99,111,111,114,100,0,59,119,0, +18,108,111,100,0,20,0,4,118,101,99,52,95,116,101,120,98,50,100,0,18,95,95,114,101,116,86,97,108,0, +0,18,115,97,109,112,108,101,114,0,0,18,112,99,111,111,114,100,0,0,0,0,1,0,0,12,0,116,101,120,116, +117,114,101,51,68,76,111,100,0,1,1,0,18,115,97,109,112,108,101,114,0,0,1,1,0,11,99,111,111,114,100, +0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,99,111,111,114,100,52,0,0,0,9,18,99,111,111,114,100, +52,0,59,120,121,122,0,18,99,111,111,114,100,0,59,120,121,122,0,20,0,9,18,99,111,111,114,100,52,0, +59,119,0,18,108,111,100,0,20,0,4,118,101,99,52,95,116,101,120,98,51,100,0,18,95,95,114,101,116,86, +97,108,0,0,18,115,97,109,112,108,101,114,0,0,18,99,111,111,114,100,52,0,0,0,0,1,0,0,12,0,116,101, +120,116,117,114,101,51,68,80,114,111,106,76,111,100,0,1,1,0,18,115,97,109,112,108,101,114,0,0,1,1, +0,12,99,111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2,0,0,12,1,112,99,111,111,114,100,0,0,0, +9,18,112,99,111,111,114,100,0,59,120,121,122,0,18,99,111,111,114,100,0,59,120,121,122,0,18,99,111, +111,114,100,0,59,119,0,49,20,0,9,18,112,99,111,111,114,100,0,59,119,0,18,108,111,100,0,20,0,4,118, +101,99,52,95,116,101,120,98,51,100,0,18,95,95,114,101,116,86,97,108,0,0,18,115,97,109,112,108,101, +114,0,0,18,112,99,111,111,114,100,0,0,0,0,1,0,0,12,0,116,101,120,116,117,114,101,67,117,98,101,76, +111,100,0,1,1,0,19,115,97,109,112,108,101,114,0,0,1,1,0,11,99,111,111,114,100,0,0,1,1,0,9,108,111, +100,0,0,0,1,3,2,0,0,12,1,99,111,111,114,100,52,0,0,0,9,18,99,111,111,114,100,52,0,59,120,121,122,0, +18,99,111,111,114,100,0,20,0,9,18,99,111,111,114,100,52,0,59,119,0,18,108,111,100,0,20,0,4,118,101, +99,52,95,116,101,120,99,117,98,101,0,18,95,95,114,101,116,86,97,108,0,0,18,115,97,109,112,108,101, +114,0,0,18,99,111,111,114,100,52,0,0,0,0,1,0,0,12,0,115,104,97,100,111,119,49,68,76,111,100,0,1,1, +0,20,115,97,109,112,108,101,114,0,0,1,1,0,11,99,111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3, +2,0,0,12,1,99,111,111,114,100,52,0,0,0,9,18,99,111,111,114,100,52,0,59,120,121,122,0,18,99,111,111, +114,100,0,20,0,9,18,99,111,111,114,100,52,0,59,119,0,18,108,111,100,0,20,0,4,118,101,99,52,95,116, +101,120,98,49,100,0,18,95,95,114,101,116,86,97,108,0,0,18,115,97,109,112,108,101,114,0,0,18,99,111, +111,114,100,52,0,0,0,0,1,0,0,12,0,115,104,97,100,111,119,49,68,80,114,111,106,76,111,100,0,1,1,0, +20,115,97,109,112,108,101,114,0,0,1,1,0,12,99,111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2, +0,0,12,1,112,99,111,111,114,100,0,0,0,9,18,112,99,111,111,114,100,0,59,120,0,18,99,111,111,114,100, +0,59,120,0,18,99,111,111,114,100,0,59,119,0,49,20,0,9,18,112,99,111,111,114,100,0,59,122,0,18,99, +111,111,114,100,0,59,122,0,20,0,9,18,112,99,111,111,114,100,0,59,119,0,18,108,111,100,0,20,0,4,118, +101,99,52,95,116,101,120,98,49,100,0,18,95,95,114,101,116,86,97,108,0,0,18,115,97,109,112,108,101, +114,0,0,18,112,99,111,111,114,100,0,0,0,0,1,0,0,12,0,115,104,97,100,111,119,50,68,76,111,100,0,1,1, +0,21,115,97,109,112,108,101,114,0,0,1,1,0,11,99,111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3, +2,0,0,12,1,99,111,111,114,100,52,0,0,0,9,18,99,111,111,114,100,52,0,59,120,121,122,0,18,99,111,111, +114,100,0,20,0,9,18,99,111,111,114,100,52,0,59,119,0,18,108,111,100,0,20,0,4,118,101,99,52,95,116, +101,120,98,50,100,0,18,95,95,114,101,116,86,97,108,0,0,18,115,97,109,112,108,101,114,0,0,18,99,111, +111,114,100,52,0,0,0,0,1,0,0,12,0,115,104,97,100,111,119,50,68,80,114,111,106,76,111,100,0,1,1,0, +21,115,97,109,112,108,101,114,0,0,1,1,0,12,99,111,111,114,100,0,0,1,1,0,9,108,111,100,0,0,0,1,3,2, +0,0,12,1,112,99,111,111,114,100,0,0,0,9,18,112,99,111,111,114,100,0,59,120,121,0,18,99,111,111,114, +100,0,59,120,121,0,18,99,111,111,114,100,0,59,119,0,49,20,0,9,18,112,99,111,111,114,100,0,59,122,0, +18,99,111,111,114,100,0,59,122,0,20,0,9,18,112,99,111,111,114,100,0,59,119,0,18,108,111,100,0,20,0, +4,118,101,99,52,95,116,101,120,98,50,100,0,18,95,95,114,101,116,86,97,108,0,0,18,115,97,109,112, +108,101,114,0,0,18,112,99,111,111,114,100,0,0,0,0,0 diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c index 9e8daa1051..f3c3fa6c5b 100644 --- a/src/mesa/shader/slang/slang_emit.c +++ b/src/mesa/shader/slang/slang_emit.c @@ -1579,13 +1579,17 @@ emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n) else { /* Variable array index */ struct prog_instruction *inst; + slang_ir_storage dstStore = *n->Store; /* do codegen for array index expression */ emit(emitInfo, n->Children[1]); inst = new_instruction(emitInfo, OPCODE_ARL); - storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); + if (dstStore.Size > 4) + dstStore.Size = 4; /* only emit one instruction */ + + storage_to_dst_reg(&inst->DstReg, &dstStore, n->Writemask); storage_to_src_reg(&inst->SrcReg[0], n->Children[1]->Store); inst->DstReg.File = PROGRAM_ADDRESS; diff --git a/src/mesa/shader/slang/slang_link.c b/src/mesa/shader/slang/slang_link.c index dd7d5be6d8..00e8953768 100644 --- a/src/mesa/shader/slang/slang_link.c +++ b/src/mesa/shader/slang/slang_link.c @@ -408,7 +408,7 @@ _slang_update_inputs_outputs(struct gl_program *prog) } } else if (inst->SrcReg[j].File == PROGRAM_ADDRESS) { - maxAddrReg = MAX2(maxAddrReg, inst->SrcReg[j].Index + 1); + maxAddrReg = MAX2(maxAddrReg, (GLuint) (inst->SrcReg[j].Index + 1)); } } if (inst->DstReg.File == PROGRAM_OUTPUT) { @@ -418,7 +418,6 @@ _slang_update_inputs_outputs(struct gl_program *prog) maxAddrReg = MAX2(maxAddrReg, inst->DstReg.Index + 1); } } - prog->NumAddressRegs = maxAddrReg; } diff --git a/src/mesa/sources b/src/mesa/sources index 9b9eb991fd..9a34d1a989 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -273,20 +273,19 @@ MESA_SOURCES = \ ALL_SOURCES = \ $(MESA_SOURCES) \ $(GLAPI_SOURCES) \ - $(ASM_SOURCES) \ + $(MESA_ASM_SOURCES) \ $(COMMON_DRIVER_SOURCES) - ### Object files MESA_OBJECTS = \ $(MESA_SOURCES:.c=.o) \ - $(ASM_SOURCES:.S=.o) + $(MESA_ASM_SOURCES:.S=.o) GLAPI_OBJECTS = \ $(GLAPI_SOURCES:.c=.o) \ - $(ASM_API:.S=.o) + $(GLAPI_ASM_SOURCES:.S=.o) COMMON_DRIVER_OBJECTS = $(COMMON_DRIVER_SOURCES:.c=.o) @@ -300,5 +299,4 @@ COMMON_DRIVER_OBJECTS = $(COMMON_DRIVER_SOURCES:.c=.o) INCLUDE_DIRS = \ -I$(TOP)/include \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/mesa/main + -I$(TOP)/src/mesa diff --git a/src/mesa/sources.mak b/src/mesa/sources.mak new file mode 100644 index 0000000000..14fd8597a8 --- /dev/null +++ b/src/mesa/sources.mak @@ -0,0 +1,348 @@ +### Lists of source files, included by Makefiles + +MAIN_SOURCES = \ + main/api_arrayelt.c \ + main/api_exec.c \ + main/api_loopback.c \ + main/api_noop.c \ + main/api_validate.c \ + main/accum.c \ + main/attrib.c \ + main/arrayobj.c \ + main/blend.c \ + main/bufferobj.c \ + main/buffers.c \ + main/clear.c \ + main/clip.c \ + main/colortab.c \ + main/context.c \ + main/convolve.c \ + main/debug.c \ + main/depth.c \ + main/depthstencil.c \ + main/dlist.c \ + main/drawpix.c \ + main/enable.c \ + main/enums.c \ + main/eval.c \ + main/execmem.c \ + main/extensions.c \ + main/fbobject.c \ + main/feedback.c \ + main/ffvertex_prog.c \ + main/fog.c \ + main/framebuffer.c \ + main/get.c \ + main/getstring.c \ + main/hash.c \ + main/hint.c \ + main/histogram.c \ + main/image.c \ + main/imports.c \ + main/light.c \ + main/lines.c \ + main/matrix.c \ + main/mipmap.c \ + main/mm.c \ + main/multisample.c \ + main/pixel.c \ + main/pixelstore.c \ + main/points.c \ + main/polygon.c \ + main/queryobj.c \ + main/rastpos.c \ + main/rbadaptors.c \ + main/readpix.c \ + main/renderbuffer.c \ + main/scissor.c \ + main/shaders.c \ + main/state.c \ + main/stencil.c \ + main/texcompress.c \ + main/texcompress_s3tc.c \ + main/texcompress_fxt1.c \ + main/texenv.c \ + main/texenvprogram.c \ + main/texformat.c \ + main/texgen.c \ + main/teximage.c \ + main/texobj.c \ + main/texparam.c \ + main/texrender.c \ + main/texstate.c \ + main/texstore.c \ + main/varray.c \ + main/vtxfmt.c + +GLAPI_SOURCES = \ + main/dispatch.c \ + glapi/glapi.c \ + glapi/glapi_getproc.c \ + glapi/glthread.c + +MATH_SOURCES = \ + math/m_debug_clip.c \ + math/m_debug_norm.c \ + math/m_debug_xform.c \ + math/m_eval.c \ + math/m_matrix.c \ + math/m_translate.c \ + math/m_vector.c \ + math/m_xform.c + +__SWRAST_SOURCES = \ + swrast/s_aaline.c \ + swrast/s_aatriangle.c \ + swrast/s_accum.c \ + swrast/s_alpha.c \ + swrast/s_atifragshader.c \ + swrast/s_bitmap.c \ + swrast/s_blend.c \ + swrast/s_blit.c \ + swrast/s_buffers.c \ + swrast/s_copypix.c \ + swrast/s_context.c \ + swrast/s_depth.c \ + swrast/s_drawpix.c \ + swrast/s_feedback.c \ + swrast/s_fog.c \ + swrast/s_fragprog.c \ + swrast/s_imaging.c \ + swrast/s_lines.c \ + swrast/s_logic.c \ + swrast/s_masking.c \ + swrast/s_points.c \ + swrast/s_readpix.c \ + swrast/s_span.c \ + swrast/s_stencil.c \ + swrast/s_texcombine.c \ + swrast/s_texfilter.c \ + swrast/s_texstore.c \ + swrast/s_triangle.c \ + swrast/s_zoom.c + +__SWRAST_SETUP_SOURCES = \ + swrast_setup/ss_context.c \ + swrast_setup/ss_triangle.c + +__TNL_SOURCES = \ + tnl/t_context.c \ + tnl/t_pipeline.c \ + tnl/t_draw.c \ + tnl/t_rasterpos.c \ + tnl/t_vb_program.c \ + tnl/t_vb_render.c \ + tnl/t_vb_texgen.c \ + tnl/t_vb_texmat.c \ + tnl/t_vb_vertex.c \ + tnl/t_vb_cull.c \ + tnl/t_vb_fog.c \ + tnl/t_vb_light.c \ + tnl/t_vb_normals.c \ + tnl/t_vb_points.c \ + tnl/t_vp_build.c \ + tnl/t_vertex.c \ + tnl/t_vertex_sse.c \ + tnl/t_vertex_generic.c + +VBO_SOURCES = \ + vbo/vbo_context.c \ + vbo/vbo_exec.c \ + vbo/vbo_exec_api.c \ + vbo/vbo_exec_array.c \ + vbo/vbo_exec_draw.c \ + vbo/vbo_exec_eval.c \ + vbo/vbo_rebase.c \ + vbo/vbo_split.c \ + vbo/vbo_split_copy.c \ + vbo/vbo_split_inplace.c \ + vbo/vbo_save.c \ + vbo/vbo_save_api.c \ + vbo/vbo_save_draw.c \ + vbo/vbo_save_loopback.c + +__VF_SOURCES = \ + vf/vf.c \ + vf/vf_generic.c \ + vf/vf_sse.c + +STATETRACKER_SOURCES = \ + state_tracker/st_atom.c \ + state_tracker/st_atom_blend.c \ + state_tracker/st_atom_clip.c \ + state_tracker/st_atom_constbuf.c \ + state_tracker/st_atom_depth.c \ + state_tracker/st_atom_framebuffer.c \ + state_tracker/st_atom_pixeltransfer.c \ + state_tracker/st_atom_sampler.c \ + state_tracker/st_atom_scissor.c \ + state_tracker/st_atom_shader.c \ + state_tracker/st_atom_rasterizer.c \ + state_tracker/st_atom_stipple.c \ + state_tracker/st_atom_texture.c \ + state_tracker/st_atom_viewport.c \ + state_tracker/st_cb_accum.c \ + state_tracker/st_cb_bitmap.c \ + state_tracker/st_cb_blit.c \ + state_tracker/st_cb_bufferobjects.c \ + state_tracker/st_cb_clear.c \ + state_tracker/st_cb_flush.c \ + state_tracker/st_cb_get.c \ + state_tracker/st_cb_drawpixels.c \ + state_tracker/st_cb_fbo.c \ + state_tracker/st_cb_feedback.c \ + state_tracker/st_cb_program.c \ + state_tracker/st_cb_queryobj.c \ + state_tracker/st_cb_rasterpos.c \ + state_tracker/st_cb_readpixels.c \ + state_tracker/st_cb_strings.c \ + state_tracker/st_cb_texture.c \ + state_tracker/st_api.c \ + state_tracker/st_context.c \ + state_tracker/st_debug.c \ + state_tracker/st_draw.c \ + state_tracker/st_extensions.c \ + state_tracker/st_format.c \ + state_tracker/st_framebuffer.c \ + state_tracker/st_gen_mipmap.c \ + state_tracker/st_mesa_to_tgsi.c \ + state_tracker/st_program.c \ + state_tracker/st_texture.c + +SHADER_SOURCES = \ + shader/arbprogparse.c \ + shader/arbprogram.c \ + shader/atifragshader.c \ + shader/grammar/grammar_mesa.c \ + shader/nvfragparse.c \ + shader/nvprogram.c \ + shader/nvvertparse.c \ + shader/program.c \ + shader/prog_cache.c \ + shader/prog_debug.c \ + shader/prog_execute.c \ + shader/prog_instruction.c \ + shader/prog_parameter.c \ + shader/prog_print.c \ + shader/prog_statevars.c \ + shader/prog_uniform.c \ + shader/programopt.c \ + shader/shader_api.c \ + +SLANG_SOURCES = \ + shader/slang/slang_builtin.c \ + shader/slang/slang_codegen.c \ + shader/slang/slang_compile.c \ + shader/slang/slang_compile_function.c \ + shader/slang/slang_compile_operation.c \ + shader/slang/slang_compile_struct.c \ + shader/slang/slang_compile_variable.c \ + shader/slang/slang_emit.c \ + shader/slang/slang_ir.c \ + shader/slang/slang_label.c \ + shader/slang/slang_library_noise.c \ + shader/slang/slang_link.c \ + shader/slang/slang_log.c \ + shader/slang/slang_mem.c \ + shader/slang/slang_preprocess.c \ + shader/slang/slang_print.c \ + shader/slang/slang_simplify.c \ + shader/slang/slang_storage.c \ + shader/slang/slang_typeinfo.c \ + shader/slang/slang_vartable.c \ + shader/slang/slang_utility.c + +ASM_C_SOURCES = \ + x86/common_x86.c \ + x86/x86.c \ + x86/3dnow.c \ + x86/sse.c \ + sparc/sparc.c \ + ppc/common_ppc.c \ + x86-64/x86-64.c + +X86_SOURCES = \ + x86/common_x86_asm.S \ + x86/x86_xform2.S \ + x86/x86_xform3.S \ + x86/x86_xform4.S \ + x86/x86_cliptest.S \ + x86/mmx_blend.S \ + x86/3dnow_xform1.S \ + x86/3dnow_xform2.S \ + x86/3dnow_xform3.S \ + x86/3dnow_xform4.S \ + x86/3dnow_normal.S \ + x86/sse_xform1.S \ + x86/sse_xform2.S \ + x86/sse_xform3.S \ + x86/sse_xform4.S \ + x86/sse_normal.S \ + x86/read_rgba_span_x86.S + +X86_API = \ + x86/glapi_x86.S + +X86-64_SOURCES = \ + x86-64/xform4.S + +X86-64_API = \ + x86-64/glapi_x86-64.S + +SPARC_SOURCES = \ + sparc/clip.S \ + sparc/norm.S \ + sparc/xform.S + +SPARC_API = \ + sparc/glapi_sparc.S + +__COMMON_DRIVER_SOURCES = \ + drivers/common/driverfuncs.c + + + +### All the core C sources + +MESA_SOURCES = \ + $(MAIN_SOURCES) \ + $(MATH_SOURCES) \ + $(VBO_SOURCES) \ + $(STATETRACKER_SOURCES) \ + $(TNL_SOURCES) \ + $(SHADER_SOURCES) \ + $(SWRAST_SOURCES) \ + $(SWRAST_SETUP_SOURCES) \ + $(ASM_C_SOURCES) \ + $(SLANG_SOURCES) + +ALL_SOURCES = \ + $(MESA_SOURCES) \ + $(GLAPI_SOURCES) \ + $(MESA_ASM_SOURCES) \ + $(COMMON_DRIVER_SOURCES) + + +### Object files + +MESA_OBJECTS = \ + $(MESA_SOURCES:.c=.o) \ + $(MESA_ASM_SOURCES:.S=.o) + +GLAPI_OBJECTS = \ + $(GLAPI_SOURCES:.c=.o) \ + $(GLAPI_ASM_SOURCES:.S=.o) + + +COMMON_DRIVER_OBJECTS = $(COMMON_DRIVER_SOURCES:.c=.o) + + + +### Include directories + +INCLUDE_DIRS = \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/auxiliary diff --git a/src/mesa/state_tracker/Makefile b/src/mesa/state_tracker/Makefile new file mode 100644 index 0000000000..0ab1dc6e6b --- /dev/null +++ b/src/mesa/state_tracker/Makefile @@ -0,0 +1,2 @@ +default: + cd ../.. ; make
\ No newline at end of file diff --git a/src/mesa/state_tracker/st_api.c b/src/mesa/state_tracker/st_api.c new file mode 100644 index 0000000000..fc0e9a2316 --- /dev/null +++ b/src/mesa/state_tracker/st_api.c @@ -0,0 +1,33 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Just a global symbol for EGL to look for to identify the supported + * graphics API. + */ +int st_api_OpenGL = 1; diff --git a/src/mesa/state_tracker/st_atom.c b/src/mesa/state_tracker/st_atom.c new file mode 100644 index 0000000000..fc8587f459 --- /dev/null +++ b/src/mesa/state_tracker/st_atom.c @@ -0,0 +1,218 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/glheader.h" +#include "main/context.h" + +#include "pipe/p_defines.h" +#include "st_context.h" +#include "st_atom.h" +#include "st_cb_bitmap.h" +#include "st_program.h" + + + +/* This is used to initialize st->atoms[]. We could use this list + * directly except for a single atom, st_update_constants, which has a + * .dirty value which changes according to the parameters of the + * current fragment and vertex programs, and so cannot be a static + * value. + */ +static const struct st_tracked_state *atoms[] = +{ + &st_update_depth_stencil_alpha, + &st_update_clip, + + &st_finalize_textures, + &st_update_shader, + + &st_update_rasterizer, + &st_update_polygon_stipple, + &st_update_viewport, + &st_update_scissor, + &st_update_blend, + &st_update_sampler, + &st_update_texture, + &st_update_framebuffer, + &st_update_vs_constants, + &st_update_fs_constants, + &st_update_pixel_transfer +}; + + +void st_init_atoms( struct st_context *st ) +{ + GLuint i; + + st->atoms = malloc(sizeof(atoms)); + st->nr_atoms = sizeof(atoms)/sizeof(*atoms); + memcpy(st->atoms, atoms, sizeof(atoms)); + + /* Patch in a pointer to the dynamic state atom: + */ + for (i = 0; i < st->nr_atoms; i++) { + if (st->atoms[i] == &st_update_vs_constants) { + st->atoms[i] = &st->constants.tracked_state[PIPE_SHADER_VERTEX]; + st->atoms[i][0] = st_update_vs_constants; + } + + if (st->atoms[i] == &st_update_fs_constants) { + st->atoms[i] = &st->constants.tracked_state[PIPE_SHADER_FRAGMENT]; + st->atoms[i][0] = st_update_fs_constants; + } + } +} + + +void st_destroy_atoms( struct st_context *st ) +{ + if (st->atoms) { + free(st->atoms); + st->atoms = NULL; + } +} + + +/*********************************************************************** + */ + +static GLboolean check_state( const struct st_state_flags *a, + const struct st_state_flags *b ) +{ + return ((a->mesa & b->mesa) || + (a->st & b->st)); +} + +static void accumulate_state( struct st_state_flags *a, + const struct st_state_flags *b ) +{ + a->mesa |= b->mesa; + a->st |= b->st; +} + + +static void xor_states( struct st_state_flags *result, + const struct st_state_flags *a, + const struct st_state_flags *b ) +{ + result->mesa = a->mesa ^ b->mesa; + result->st = a->st ^ b->st; +} + + +/* Too complex to figure out, just check every time: + */ +static void check_program_state( struct st_context *st ) +{ + GLcontext *ctx = st->ctx; + + if (ctx->VertexProgram._Current != &st->vp->Base) + st->dirty.st |= ST_NEW_VERTEX_PROGRAM; + + if (ctx->FragmentProgram._Current != &st->fp->Base) + st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; + +} + + +/*********************************************************************** + * Update all derived state: + */ + +void st_validate_state( struct st_context *st ) +{ + struct st_state_flags *state = &st->dirty; + GLuint i; + + /* The bitmap cache is immune to pixel unpack changes. + * Note that GLUT makes several calls to glPixelStore for each + * bitmap char it draws so this is an important check. + */ + if (state->mesa & ~_NEW_PACKUNPACK) + st_flush_bitmap_cache(st); + + check_program_state( st ); + + if (state->st == 0) + return; + +// _mesa_printf("%s %x/%x\n", __FUNCTION__, state->mesa, state->st); + + if (1) { + /* Debug version which enforces various sanity checks on the + * state flags which are generated and checked to help ensure + * state atoms are ordered correctly in the list. + */ + struct st_state_flags examined, prev; + memset(&examined, 0, sizeof(examined)); + prev = *state; + + for (i = 0; i < st->nr_atoms; i++) { + const struct st_tracked_state *atom = st->atoms[i]; + struct st_state_flags generated; + +// _mesa_printf("atom %s %x/%x\n", atom->name, atom->dirty.mesa, atom->dirty.st); + + if (!(atom->dirty.mesa || atom->dirty.st) || + !atom->update) { + _mesa_printf("malformed atom %s\n", atom->name); + assert(0); + } + + if (check_state(state, &atom->dirty)) { + st->atoms[i]->update( st ); +// _mesa_printf("after: %x\n", atom->dirty.mesa); + } + + accumulate_state(&examined, &atom->dirty); + + /* generated = (prev ^ state) + * if (examined & generated) + * fail; + */ + xor_states(&generated, &prev, state); + assert(!check_state(&examined, &generated)); + prev = *state; + } +// _mesa_printf("\n"); + + } + else { + const GLuint nr = st->nr_atoms; + + for (i = 0; i < nr; i++) { + if (check_state(state, &st->atoms[i]->dirty)) + st->atoms[i]->update( st ); + } + } + + memset(state, 0, sizeof(*state)); +} + + + diff --git a/src/mesa/state_tracker/st_atom.h b/src/mesa/state_tracker/st_atom.h new file mode 100644 index 0000000000..c7cffd85c8 --- /dev/null +++ b/src/mesa/state_tracker/st_atom.h @@ -0,0 +1,66 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef ST_ATOM_H +#define ST_ATOM_H + +struct st_context; +struct st_tracked_state; + +void st_init_atoms( struct st_context *st ); +void st_destroy_atoms( struct st_context *st ); + + +void st_validate_state( struct st_context *st ); + + +extern const struct st_tracked_state st_update_framebuffer; +extern const struct st_tracked_state st_update_clip; +extern const struct st_tracked_state st_update_depth_stencil_alpha; +extern const struct st_tracked_state st_update_shader; +extern const struct st_tracked_state st_update_rasterizer; +extern const struct st_tracked_state st_update_polygon_stipple; +extern const struct st_tracked_state st_update_viewport; +extern const struct st_tracked_state st_update_scissor; +extern const struct st_tracked_state st_update_blend; +extern const struct st_tracked_state st_update_sampler; +extern const struct st_tracked_state st_update_texture; +extern const struct st_tracked_state st_finalize_textures; +extern const struct st_tracked_state st_update_fs_constants; +extern const struct st_tracked_state st_update_vs_constants; +extern const struct st_tracked_state st_update_pixel_transfer; + + +uint st_compare_func_to_pipe(GLenum func); + +#endif diff --git a/src/mesa/state_tracker/st_atom_blend.c b/src/mesa/state_tracker/st_atom_blend.c new file mode 100644 index 0000000000..35c09c3e08 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_blend.c @@ -0,0 +1,232 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + + +#include "st_context.h" +#include "st_atom.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "cso_cache/cso_context.h" + +#include "main/macros.h" + +/** + * Convert GLenum blend tokens to pipe tokens. + * Both blend factors and blend funcs are accepted. + */ +static GLuint +translate_blend(GLenum blend) +{ + switch (blend) { + /* blend functions */ + case GL_FUNC_ADD: + return PIPE_BLEND_ADD; + case GL_FUNC_SUBTRACT: + return PIPE_BLEND_SUBTRACT; + case GL_FUNC_REVERSE_SUBTRACT: + return PIPE_BLEND_REVERSE_SUBTRACT; + case GL_MIN: + return PIPE_BLEND_MIN; + case GL_MAX: + return PIPE_BLEND_MAX; + + /* blend factors */ + case GL_ONE: + return PIPE_BLENDFACTOR_ONE; + case GL_SRC_COLOR: + return PIPE_BLENDFACTOR_SRC_COLOR; + case GL_SRC_ALPHA: + return PIPE_BLENDFACTOR_SRC_ALPHA; + case GL_DST_ALPHA: + return PIPE_BLENDFACTOR_DST_ALPHA; + case GL_DST_COLOR: + return PIPE_BLENDFACTOR_DST_COLOR; + case GL_SRC_ALPHA_SATURATE: + return PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE; + case GL_CONSTANT_COLOR: + return PIPE_BLENDFACTOR_CONST_COLOR; + case GL_CONSTANT_ALPHA: + return PIPE_BLENDFACTOR_CONST_ALPHA; + /* + return PIPE_BLENDFACTOR_SRC1_COLOR; + return PIPE_BLENDFACTOR_SRC1_ALPHA; + */ + case GL_ZERO: + return PIPE_BLENDFACTOR_ZERO; + case GL_ONE_MINUS_SRC_COLOR: + return PIPE_BLENDFACTOR_INV_SRC_COLOR; + case GL_ONE_MINUS_SRC_ALPHA: + return PIPE_BLENDFACTOR_INV_SRC_ALPHA; + case GL_ONE_MINUS_DST_COLOR: + return PIPE_BLENDFACTOR_INV_DST_COLOR; + case GL_ONE_MINUS_DST_ALPHA: + return PIPE_BLENDFACTOR_INV_DST_ALPHA; + case GL_ONE_MINUS_CONSTANT_COLOR: + return PIPE_BLENDFACTOR_INV_CONST_COLOR; + case GL_ONE_MINUS_CONSTANT_ALPHA: + return PIPE_BLENDFACTOR_INV_CONST_ALPHA; + /* + return PIPE_BLENDFACTOR_INV_SRC1_COLOR; + return PIPE_BLENDFACTOR_INV_SRC1_ALPHA; + */ + default: + assert("invalid GL token in translate_blend()" == NULL); + return 0; + } +} + + +/** + * Convert GLenum logicop tokens to pipe tokens. + */ +static GLuint +translate_logicop(GLenum logicop) +{ + switch (logicop) { + case GL_CLEAR: + return PIPE_LOGICOP_CLEAR; + case GL_NOR: + return PIPE_LOGICOP_NOR; + case GL_AND_INVERTED: + return PIPE_LOGICOP_AND_INVERTED; + case GL_COPY_INVERTED: + return PIPE_LOGICOP_COPY_INVERTED; + case GL_AND_REVERSE: + return PIPE_LOGICOP_AND_REVERSE; + case GL_INVERT: + return PIPE_LOGICOP_INVERT; + case GL_XOR: + return PIPE_LOGICOP_XOR; + case GL_NAND: + return PIPE_LOGICOP_NAND; + case GL_AND: + return PIPE_LOGICOP_AND; + case GL_EQUIV: + return PIPE_LOGICOP_EQUIV; + case GL_NOOP: + return PIPE_LOGICOP_NOOP; + case GL_OR_INVERTED: + return PIPE_LOGICOP_OR_INVERTED; + case GL_COPY: + return PIPE_LOGICOP_COPY; + case GL_OR_REVERSE: + return PIPE_LOGICOP_OR_REVERSE; + case GL_OR: + return PIPE_LOGICOP_OR; + case GL_SET: + return PIPE_LOGICOP_SET; + default: + assert("invalid GL token in translate_logicop()" == NULL); + return 0; + } +} + + +static void +update_blend( struct st_context *st ) +{ + struct pipe_blend_state *blend = &st->state.blend; + + memset(blend, 0, sizeof(*blend)); + + if (st->ctx->Color.ColorLogicOpEnabled || + (st->ctx->Color.BlendEnabled && + st->ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) { + /* logicop enabled */ + blend->logicop_enable = 1; + blend->logicop_func = translate_logicop(st->ctx->Color.LogicOp); + } + else if (st->ctx->Color.BlendEnabled) { + /* blending enabled */ + blend->blend_enable = 1; + + blend->rgb_func = translate_blend(st->ctx->Color.BlendEquationRGB); + if (st->ctx->Color.BlendEquationRGB == GL_MIN || + st->ctx->Color.BlendEquationRGB == GL_MAX) { + /* Min/max are special */ + blend->rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + } + else { + blend->rgb_src_factor = translate_blend(st->ctx->Color.BlendSrcRGB); + blend->rgb_dst_factor = translate_blend(st->ctx->Color.BlendDstRGB); + } + + blend->alpha_func = translate_blend(st->ctx->Color.BlendEquationA); + if (st->ctx->Color.BlendEquationA == GL_MIN || + st->ctx->Color.BlendEquationA == GL_MAX) { + /* Min/max are special */ + blend->alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend->alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + } + else { + blend->alpha_src_factor = translate_blend(st->ctx->Color.BlendSrcA); + blend->alpha_dst_factor = translate_blend(st->ctx->Color.BlendDstA); + } + } + else { + /* no blending / logicop */ + } + + /* Colormask - maybe reverse these bits? */ + if (st->ctx->Color.ColorMask[0]) + blend->colormask |= PIPE_MASK_R; + if (st->ctx->Color.ColorMask[1]) + blend->colormask |= PIPE_MASK_G; + if (st->ctx->Color.ColorMask[2]) + blend->colormask |= PIPE_MASK_B; + if (st->ctx->Color.ColorMask[3]) + blend->colormask |= PIPE_MASK_A; + + if (st->ctx->Color.DitherFlag) + blend->dither = 1; + + cso_set_blend(st->cso_context, blend); + + { + struct pipe_blend_color bc; + COPY_4FV(bc.color, st->ctx->Color.BlendColor); + cso_set_blend_color(st->cso_context, &bc); + } +} + + +const struct st_tracked_state st_update_blend = { + "st_update_blend", /* name */ + { /* dirty */ + (_NEW_COLOR), /* XXX _NEW_BLEND someday? */ /* mesa */ + 0, /* st */ + }, + update_blend, /* update */ +}; diff --git a/src/mesa/state_tracker/st_atom_clip.c b/src/mesa/state_tracker/st_atom_clip.c new file mode 100644 index 0000000000..23d709b814 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_clip.c @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "st_context.h" +#include "pipe/p_context.h" +#include "st_atom.h" + + +/* Second state atom for user clip planes: + */ +static void update_clip( struct st_context *st ) +{ + struct pipe_clip_state clip; + GLuint i; + + memset(&clip, 0, sizeof(clip)); + + for (i = 0; i < PIPE_MAX_CLIP_PLANES; i++) { + if (st->ctx->Transform.ClipPlanesEnabled & (1 << i)) { + memcpy(clip.ucp[clip.nr], + st->ctx->Transform._ClipUserPlane[i], + sizeof(clip.ucp[0])); + clip.nr++; + } + } + + if (memcmp(&clip, &st->state.clip, sizeof(clip)) != 0) { + st->state.clip = clip; + st->pipe->set_clip_state(st->pipe, &clip); + } +} + + +const struct st_tracked_state st_update_clip = { + "st_update_clip", /* name */ + { /* dirty */ + (_NEW_TRANSFORM), /* mesa */ + 0, /* st */ + }, + update_clip /* update */ +}; diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c new file mode 100644 index 0000000000..d02e51cb9a --- /dev/null +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -0,0 +1,142 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "main/imports.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "st_context.h" +#include "st_atom.h" +#include "st_atom_constbuf.h" +#include "st_program.h" + + +/** + * Pass the given program parameters to the graphics pipe as a + * constant buffer. + * \param id either PIPE_SHADER_VERTEX or PIPE_SHADER_FRAGMENT + */ +void st_upload_constants( struct st_context *st, + struct gl_program_parameter_list *params, + unsigned id) +{ + struct pipe_context *pipe = st->pipe; + struct pipe_constant_buffer *cbuf = &st->state.constants[id]; + + assert(id == PIPE_SHADER_VERTEX || id == PIPE_SHADER_FRAGMENT); + + /* update constants */ + if (params && params->NumParameters) { + const uint paramBytes = params->NumParameters * sizeof(GLfloat) * 4; + + /* Update our own dependency flags. This works because this + * function will also be called whenever the program changes. + */ + st->constants.tracked_state[id].dirty.mesa = + (params->StateFlags | _NEW_PROGRAM); + + _mesa_load_state_parameters(st->ctx, params); + + /* We always need to get a new buffer, to keep the drivers simple and + * avoid gratuitous rendering synchronization. + */ + pipe_buffer_reference(pipe->screen, &cbuf->buffer, NULL ); + cbuf->buffer = pipe_buffer_create(pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, + paramBytes ); + + if (0) + { + printf("%s(shader=%d, numParams=%d, stateFlags=0x%x)\n", + __FUNCTION__, id, params->NumParameters, params->StateFlags); + _mesa_print_parameter_list(params); + } + + /* load Mesa constants into the constant buffer */ + if (cbuf->buffer) { + void *map = pipe_buffer_map(pipe->screen, cbuf->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE); + memcpy(map, params->ParameterValues, paramBytes); + pipe_buffer_unmap(pipe->screen, cbuf->buffer); + } + + cbuf->size = paramBytes; + + st->pipe->set_constant_buffer(st->pipe, id, 0, cbuf); + } + else { + st->constants.tracked_state[id].dirty.mesa = 0; + // st->pipe->set_constant_buffer(st->pipe, id, 0, NULL); + } +} + +/* Vertex shader: + */ +static void update_vs_constants(struct st_context *st ) +{ + struct st_vertex_program *vp = st->vp; + struct gl_program_parameter_list *params = vp->Base.Base.Parameters; + + st_upload_constants( st, params, PIPE_SHADER_VERTEX ); +} + +const struct st_tracked_state st_update_vs_constants = { + "st_update_vs_constants", /* name */ + { /* dirty */ + 0, /* set dynamically above */ /* mesa */ + ST_NEW_VERTEX_PROGRAM, /* st */ + }, + update_vs_constants /* update */ +}; + +/* Fragment shader: + */ +static void update_fs_constants(struct st_context *st ) +{ + struct st_fragment_program *fp = st->fp; + struct gl_program_parameter_list *params = fp->Base.Base.Parameters; + + st_upload_constants( st, params, PIPE_SHADER_FRAGMENT ); +} + +const struct st_tracked_state st_update_fs_constants = { + "st_update_fs_constants", /* name */ + { /* dirty */ + 0, /* set dynamically above */ /* mesa */ + ST_NEW_FRAGMENT_PROGRAM, /* st */ + }, + update_fs_constants /* update */ +}; + diff --git a/src/mesa/state_tracker/st_atom_constbuf.h b/src/mesa/state_tracker/st_atom_constbuf.h new file mode 100644 index 0000000000..f707534e2c --- /dev/null +++ b/src/mesa/state_tracker/st_atom_constbuf.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_ATOM_CONSTBUF_H +#define ST_ATOM_CONSTBUF_H + + +void st_upload_constants( struct st_context *st, + struct gl_program_parameter_list *params, + unsigned id); + + +#endif /* ST_ATOM_CONSTBUF_H */ diff --git a/src/mesa/state_tracker/st_atom_depth.c b/src/mesa/state_tracker/st_atom_depth.c new file mode 100644 index 0000000000..0e791ceb20 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_depth.c @@ -0,0 +1,151 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + * Zack Rusin + */ + + +#include "st_context.h" +#include "st_atom.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "cso_cache/cso_context.h" + + +/** + * Convert an OpenGL compare mode to a pipe tokens. + */ +GLuint +st_compare_func_to_pipe(GLenum func) +{ + /* Same values, just biased */ + assert(PIPE_FUNC_NEVER == GL_NEVER - GL_NEVER); + assert(PIPE_FUNC_LESS == GL_LESS - GL_NEVER); + assert(PIPE_FUNC_EQUAL == GL_EQUAL - GL_NEVER); + assert(PIPE_FUNC_LEQUAL == GL_LEQUAL - GL_NEVER); + assert(PIPE_FUNC_GREATER == GL_GREATER - GL_NEVER); + assert(PIPE_FUNC_NOTEQUAL == GL_NOTEQUAL - GL_NEVER); + assert(PIPE_FUNC_GEQUAL == GL_GEQUAL - GL_NEVER); + assert(PIPE_FUNC_ALWAYS == GL_ALWAYS - GL_NEVER); + assert(func >= GL_NEVER); + assert(func <= GL_ALWAYS); + return func - GL_NEVER; +} + + +/** + * Convert GLenum stencil op tokens to pipe tokens. + */ +static GLuint +gl_stencil_op_to_pipe(GLenum func) +{ + switch (func) { + case GL_KEEP: + return PIPE_STENCIL_OP_KEEP; + case GL_ZERO: + return PIPE_STENCIL_OP_ZERO; + case GL_REPLACE: + return PIPE_STENCIL_OP_REPLACE; + case GL_INCR: + return PIPE_STENCIL_OP_INCR; + case GL_DECR: + return PIPE_STENCIL_OP_DECR; + case GL_INCR_WRAP: + return PIPE_STENCIL_OP_INCR_WRAP; + case GL_DECR_WRAP: + return PIPE_STENCIL_OP_DECR_WRAP; + case GL_INVERT: + return PIPE_STENCIL_OP_INVERT; + default: + assert("invalid GL token in gl_stencil_op_to_pipe()" == NULL); + return 0; + } +} + +static void +update_depth_stencil_alpha(struct st_context *st) +{ + struct pipe_depth_stencil_alpha_state *dsa = &st->state.depth_stencil; + + memset(dsa, 0, sizeof(*dsa)); + + dsa->depth.enabled = st->ctx->Depth.Test; + dsa->depth.writemask = st->ctx->Depth.Mask; + dsa->depth.func = st_compare_func_to_pipe(st->ctx->Depth.Func); + + if (st->ctx->Query.CurrentOcclusionObject && + st->ctx->Query.CurrentOcclusionObject->Active) + dsa->depth.occlusion_count = 1; + + if (st->ctx->Stencil.Enabled && st->ctx->Visual.stencilBits > 0) { + dsa->stencil[0].enabled = 1; + dsa->stencil[0].func = st_compare_func_to_pipe(st->ctx->Stencil.Function[0]); + dsa->stencil[0].fail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.FailFunc[0]); + dsa->stencil[0].zfail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZFailFunc[0]); + dsa->stencil[0].zpass_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZPassFunc[0]); + dsa->stencil[0].ref_value = st->ctx->Stencil.Ref[0] & 0xff; + dsa->stencil[0].value_mask = st->ctx->Stencil.ValueMask[0] & 0xff; + dsa->stencil[0].write_mask = st->ctx->Stencil.WriteMask[0] & 0xff; + + if (st->ctx->Stencil._TestTwoSide) { + dsa->stencil[1].enabled = 1; + dsa->stencil[1].func = st_compare_func_to_pipe(st->ctx->Stencil.Function[1]); + dsa->stencil[1].fail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.FailFunc[1]); + dsa->stencil[1].zfail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZFailFunc[1]); + dsa->stencil[1].zpass_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZPassFunc[1]); + dsa->stencil[1].ref_value = st->ctx->Stencil.Ref[1] & 0xff; + dsa->stencil[1].value_mask = st->ctx->Stencil.ValueMask[1] & 0xff; + dsa->stencil[1].write_mask = st->ctx->Stencil.WriteMask[1] & 0xff; + } + else { + dsa->stencil[1] = dsa->stencil[0]; + dsa->stencil[1].enabled = 0; + } + } + + if (st->ctx->Color.AlphaEnabled) { + dsa->alpha.enabled = 1; + dsa->alpha.func = st_compare_func_to_pipe(st->ctx->Color.AlphaFunc); + dsa->alpha.ref = st->ctx->Color.AlphaRef; + } + + cso_set_depth_stencil_alpha(st->cso_context, dsa); +} + + +const struct st_tracked_state st_update_depth_stencil_alpha = { + "st_update_depth_stencil", /* name */ + { /* dirty */ + (_NEW_DEPTH|_NEW_STENCIL|_NEW_COLOR), /* mesa */ + 0, /* st */ + }, + update_depth_stencil_alpha /* update */ +}; diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c new file mode 100644 index 0000000000..b4f42c6f93 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_framebuffer.c @@ -0,0 +1,165 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "st_context.h" +#include "st_atom.h" +#include "st_cb_fbo.h" +#include "st_texture.h" +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" +#include "cso_cache/cso_context.h" + + + +/** + * When doing GL render to texture, we have to be sure that finalize_texture() + * didn't yank out the pipe_texture that we earlier created a surface for. + * Check for that here and create a new surface if needed. + */ +static void +update_renderbuffer_surface(struct st_context *st, + struct st_renderbuffer *strb) +{ + struct pipe_screen *screen = st->pipe->screen; + struct pipe_texture *texture = strb->rtt->pt; + int rtt_width = strb->Base.Width; + int rtt_height = strb->Base.Height; + + if (!strb->surface || + strb->surface->texture != texture || + strb->surface->width != rtt_width || + strb->surface->height != rtt_height) { + GLuint level; + /* find matching mipmap level size */ + for (level = 0; level <= texture->last_level; level++) { + if (texture->width[level] == rtt_width && + texture->height[level] == rtt_height) { + + pipe_surface_reference(&strb->surface, NULL); + + strb->surface = screen->get_tex_surface(screen, + texture, + strb->rtt_face, + level, + strb->rtt_slice, + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); +#if 0 + printf("-- alloc new surface %d x %d into tex %p\n", + strb->surface->width, strb->surface->height, + texture); +#endif + break; + } + } + } +} + + +/** + * Update framebuffer state (color, depth, stencil, etc. buffers) + */ +static void +update_framebuffer_state( struct st_context *st ) +{ + struct pipe_framebuffer_state *framebuffer = &st->state.framebuffer; + struct gl_framebuffer *fb = st->ctx->DrawBuffer; + struct st_renderbuffer *strb; + GLuint i; + + memset(framebuffer, 0, sizeof(*framebuffer)); + + framebuffer->width = fb->Width; + framebuffer->height = fb->Height; + + /*printf("------ fb size %d x %d\n", fb->Width, fb->Height);*/ + + /* Examine Mesa's ctx->DrawBuffer->_ColorDrawBuffers state + * to determine which surfaces to draw to + */ + framebuffer->num_cbufs = 0; + for (i = 0; i < fb->_NumColorDrawBuffers; i++) { + strb = st_renderbuffer(fb->_ColorDrawBuffers[i]); + + /*printf("--------- framebuffer surface rtt %p\n", strb->rtt);*/ + if (strb->rtt) { + /* rendering to a GL texture, may have to update surface */ + update_renderbuffer_surface(st, strb); + } + + assert(strb->surface); + framebuffer->cbufs[framebuffer->num_cbufs] = strb->surface; + framebuffer->num_cbufs++; + } + + strb = st_renderbuffer(fb->Attachment[BUFFER_DEPTH].Renderbuffer); + if (strb) { + strb = st_renderbuffer(strb->Base.Wrapped); + if (strb->rtt) { + /* rendering to a GL texture, may have to update surface */ + update_renderbuffer_surface(st, strb); + } + + assert(strb->surface); + framebuffer->zsbuf = strb->surface; + } + else { + strb = st_renderbuffer(fb->Attachment[BUFFER_STENCIL].Renderbuffer); + if (strb) { + strb = st_renderbuffer(strb->Base.Wrapped); + assert(strb->surface); + framebuffer->zsbuf = strb->surface; + } + } + + cso_set_framebuffer(st->cso_context, framebuffer); + + if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { + if (st->frontbuffer_status == FRONT_STATUS_COPY_OF_BACK) { + /* XXX copy back buf to front? */ + } + /* we're assuming we'll really draw to the front buffer */ + st->frontbuffer_status = FRONT_STATUS_DIRTY; + } +} + + +const struct st_tracked_state st_update_framebuffer = { + "st_update_framebuffer", /* name */ + { /* dirty */ + _NEW_BUFFERS, /* mesa */ + ST_NEW_FRAMEBUFFER, /* st */ + }, + update_framebuffer_state /* update */ +}; + diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c new file mode 100644 index 0000000000..a357b71677 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -0,0 +1,474 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Generate fragment programs to implement pixel transfer ops, such as + * scale/bias, colormatrix, colortable, convolution... + * + * Authors: + * Brian Paul + */ + +#include "main/imports.h" +#include "main/image.h" +#include "main/macros.h" +#include "shader/program.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "st_context.h" +#include "st_format.h" +#include "st_program.h" +#include "st_texture.h" + +#include "pipe/p_screen.h" +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" +#include "util/u_pack_color.h" + + +struct state_key +{ + GLuint scaleAndBias:1; + GLuint colorMatrix:1; + GLuint colorMatrixPostScaleBias:1; + GLuint pixelMaps:1; + +#if 0 + GLfloat Maps[3][256][4]; + int NumMaps; + GLint NumStages; + pipeline_stage Stages[STAGE_MAX]; + GLboolean StagesUsed[STAGE_MAX]; + GLfloat Scale1[4], Bias1[4]; + GLfloat Scale2[4], Bias2[4]; +#endif +}; + + +static GLboolean +is_identity(const GLfloat m[16]) +{ + GLuint i; + for (i = 0; i < 16; i++) { + const int row = i % 4, col = i / 4; + const float val = (GLfloat)(row == col); + if (m[i] != val) + return GL_FALSE; + } + return GL_TRUE; +} + + +static void +make_state_key(GLcontext *ctx, struct state_key *key) +{ + static const GLfloat zero[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; + static const GLfloat one[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; + + memset(key, 0, sizeof(*key)); + + if (ctx->Pixel.RedBias != 0.0 || ctx->Pixel.RedScale != 1.0 || + ctx->Pixel.GreenBias != 0.0 || ctx->Pixel.GreenScale != 1.0 || + ctx->Pixel.BlueBias != 0.0 || ctx->Pixel.BlueScale != 1.0 || + ctx->Pixel.AlphaBias != 0.0 || ctx->Pixel.AlphaScale != 1.0) { + key->scaleAndBias = 1; + } + + if (!is_identity(ctx->ColorMatrixStack.Top->m)) { + key->colorMatrix = 1; + } + + if (!TEST_EQ_4V(ctx->Pixel.PostColorMatrixScale, one) || + !TEST_EQ_4V(ctx->Pixel.PostColorMatrixBias, zero)) { + key->colorMatrixPostScaleBias = 1; + } + + key->pixelMaps = ctx->Pixel.MapColorFlag; +} + + +static struct pipe_texture * +create_color_map_texture(GLcontext *ctx) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct pipe_texture *pt; + enum pipe_format format; + const uint texSize = 256; /* simple, and usually perfect */ + + /* find an RGBA texture format */ + format = st_choose_format(pipe, GL_RGBA, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_SAMPLER); + + /* create texture for color map/table */ + pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, format, 0, + texSize, texSize, 1, 0, + PIPE_TEXTURE_USAGE_SAMPLER); + return pt; +} + + +/** + * Update the pixelmap texture with the contents of the R/G/B/A pixel maps. + */ +static void +load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_surface *surface; + const GLuint rSize = ctx->PixelMaps.RtoR.Size; + const GLuint gSize = ctx->PixelMaps.GtoG.Size; + const GLuint bSize = ctx->PixelMaps.BtoB.Size; + const GLuint aSize = ctx->PixelMaps.AtoA.Size; + const uint texSize = pt->width[0]; + uint *dest; + uint i, j; + + surface = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + dest = (uint *) screen->surface_map(screen, surface, + PIPE_BUFFER_USAGE_CPU_WRITE); + + /* Pack four 1D maps into a 2D texture: + * R map is placed horizontally, indexed by S, in channel 0 + * G map is placed vertically, indexed by T, in channel 1 + * B map is placed horizontally, indexed by S, in channel 2 + * A map is placed vertically, indexed by T, in channel 3 + */ + for (i = 0; i < texSize; i++) { + for (j = 0; j < texSize; j++) { + int k = (i * texSize + j); + ubyte r = ctx->PixelMaps.RtoR.Map8[j * rSize / texSize]; + ubyte g = ctx->PixelMaps.GtoG.Map8[i * gSize / texSize]; + ubyte b = ctx->PixelMaps.BtoB.Map8[j * bSize / texSize]; + ubyte a = ctx->PixelMaps.AtoA.Map8[i * aSize / texSize]; + util_pack_color_ub(r, g, b, a, pt->format, dest + k); + } + } + + screen->surface_unmap(screen, surface); + pipe_surface_reference(&surface, NULL); +} + + + +#define MAX_INST 100 + +/** + * Returns a fragment program which implements the current pixel transfer ops. + */ +static struct gl_fragment_program * +get_pixel_transfer_program(GLcontext *ctx, const struct state_key *key) +{ + struct st_context *st = ctx->st; + struct prog_instruction inst[MAX_INST]; + struct gl_program_parameter_list *params; + struct gl_fragment_program *fp; + GLuint ic = 0; + const GLuint colorTemp = 0; + + fp = (struct gl_fragment_program *) + ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + if (!fp) + return NULL; + + params = _mesa_new_parameter_list(); + + /* + * Get initial pixel color from the texture. + * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; + */ + _mesa_init_instructions(inst + ic, 1); + inst[ic].Opcode = OPCODE_TEX; + inst[ic].DstReg.File = PROGRAM_TEMPORARY; + inst[ic].DstReg.Index = colorTemp; + inst[ic].SrcReg[0].File = PROGRAM_INPUT; + inst[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; + inst[ic].TexSrcUnit = 0; + inst[ic].TexSrcTarget = TEXTURE_2D_INDEX; + ic++; + fp->Base.InputsRead = (1 << FRAG_ATTRIB_TEX0); + fp->Base.OutputsWritten = (1 << FRAG_RESULT_COLR); + fp->Base.SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */ + + if (key->scaleAndBias) { + static const gl_state_index scale_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_SCALE, 0, 0, 0 }; + static const gl_state_index bias_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_BIAS, 0, 0, 0 }; + GLfloat scale[4], bias[4]; + GLint scale_p, bias_p; + + scale[0] = ctx->Pixel.RedScale; + scale[1] = ctx->Pixel.GreenScale; + scale[2] = ctx->Pixel.BlueScale; + scale[3] = ctx->Pixel.AlphaScale; + bias[0] = ctx->Pixel.RedBias; + bias[1] = ctx->Pixel.GreenBias; + bias[2] = ctx->Pixel.BlueBias; + bias[3] = ctx->Pixel.AlphaBias; + + scale_p = _mesa_add_state_reference(params, scale_state); + bias_p = _mesa_add_state_reference(params, bias_state); + + /* MAD colorTemp, colorTemp, scale, bias; */ + _mesa_init_instructions(inst + ic, 1); + inst[ic].Opcode = OPCODE_MAD; + inst[ic].DstReg.File = PROGRAM_TEMPORARY; + inst[ic].DstReg.Index = colorTemp; + inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY; + inst[ic].SrcReg[0].Index = colorTemp; + inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR; + inst[ic].SrcReg[1].Index = scale_p; + inst[ic].SrcReg[2].File = PROGRAM_STATE_VAR; + inst[ic].SrcReg[2].Index = bias_p; + ic++; + } + + if (key->pixelMaps) { + const GLuint temp = 1; + + /* create the colormap/texture now if not already done */ + if (!st->pixel_xfer.pixelmap_texture) { + st->pixel_xfer.pixelmap_texture = create_color_map_texture(ctx); + } + + /* with a little effort, we can do four pixel map look-ups with + * two TEX instructions: + */ + + /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ + _mesa_init_instructions(inst + ic, 1); + inst[ic].Opcode = OPCODE_TEX; + inst[ic].DstReg.File = PROGRAM_TEMPORARY; + inst[ic].DstReg.Index = temp; + inst[ic].DstReg.WriteMask = WRITEMASK_XY; /* write R,G */ + inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY; + inst[ic].SrcReg[0].Index = colorTemp; + inst[ic].TexSrcUnit = 1; + inst[ic].TexSrcTarget = TEXTURE_2D_INDEX; + ic++; + + /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ + _mesa_init_instructions(inst + ic, 1); + inst[ic].Opcode = OPCODE_TEX; + inst[ic].DstReg.File = PROGRAM_TEMPORARY; + inst[ic].DstReg.Index = temp; + inst[ic].DstReg.WriteMask = WRITEMASK_ZW; /* write B,A */ + inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY; + inst[ic].SrcReg[0].Index = colorTemp; + inst[ic].SrcReg[0].Swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, + SWIZZLE_Z, SWIZZLE_W); + inst[ic].TexSrcUnit = 1; + inst[ic].TexSrcTarget = TEXTURE_2D_INDEX; + ic++; + + /* MOV colorTemp, temp; */ + _mesa_init_instructions(inst + ic, 1); + inst[ic].Opcode = OPCODE_MOV; + inst[ic].DstReg.File = PROGRAM_TEMPORARY; + inst[ic].DstReg.Index = colorTemp; + inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY; + inst[ic].SrcReg[0].Index = temp; + ic++; + + fp->Base.SamplersUsed |= (1 << 1); /* sampler 1 is used */ + } + + if (key->colorMatrix) { + static const gl_state_index row0_state[STATE_LENGTH] = + { STATE_COLOR_MATRIX, 0, 0, 0, 0 }; + static const gl_state_index row1_state[STATE_LENGTH] = + { STATE_COLOR_MATRIX, 0, 1, 1, 0 }; + static const gl_state_index row2_state[STATE_LENGTH] = + { STATE_COLOR_MATRIX, 0, 2, 2, 0 }; + static const gl_state_index row3_state[STATE_LENGTH] = + { STATE_COLOR_MATRIX, 0, 3, 3, 0 }; + + GLint row0_p = _mesa_add_state_reference(params, row0_state); + GLint row1_p = _mesa_add_state_reference(params, row1_state); + GLint row2_p = _mesa_add_state_reference(params, row2_state); + GLint row3_p = _mesa_add_state_reference(params, row3_state); + const GLuint temp = 1; + + /* DP4 temp.x, colorTemp, matrow0; */ + _mesa_init_instructions(inst + ic, 1); + inst[ic].Opcode = OPCODE_DP4; + inst[ic].DstReg.File = PROGRAM_TEMPORARY; + inst[ic].DstReg.Index = temp; + inst[ic].DstReg.WriteMask = WRITEMASK_X; + inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY; + inst[ic].SrcReg[0].Index = colorTemp; + inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR; + inst[ic].SrcReg[1].Index = row0_p; + ic++; + + /* DP4 temp.y, colorTemp, matrow1; */ + _mesa_init_instructions(inst + ic, 1); + inst[ic].Opcode = OPCODE_DP4; + inst[ic].DstReg.File = PROGRAM_TEMPORARY; + inst[ic].DstReg.Index = temp; + inst[ic].DstReg.WriteMask = WRITEMASK_Y; + inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY; + inst[ic].SrcReg[0].Index = colorTemp; + inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR; + inst[ic].SrcReg[1].Index = row1_p; + ic++; + + /* DP4 temp.z, colorTemp, matrow2; */ + _mesa_init_instructions(inst + ic, 1); + inst[ic].Opcode = OPCODE_DP4; + inst[ic].DstReg.File = PROGRAM_TEMPORARY; + inst[ic].DstReg.Index = temp; + inst[ic].DstReg.WriteMask = WRITEMASK_Z; + inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY; + inst[ic].SrcReg[0].Index = colorTemp; + inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR; + inst[ic].SrcReg[1].Index = row2_p; + ic++; + + /* DP4 temp.w, colorTemp, matrow3; */ + _mesa_init_instructions(inst + ic, 1); + inst[ic].Opcode = OPCODE_DP4; + inst[ic].DstReg.File = PROGRAM_TEMPORARY; + inst[ic].DstReg.Index = temp; + inst[ic].DstReg.WriteMask = WRITEMASK_W; + inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY; + inst[ic].SrcReg[0].Index = colorTemp; + inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR; + inst[ic].SrcReg[1].Index = row3_p; + ic++; + + /* MOV colorTemp, temp; */ + _mesa_init_instructions(inst + ic, 1); + inst[ic].Opcode = OPCODE_MOV; + inst[ic].DstReg.File = PROGRAM_TEMPORARY; + inst[ic].DstReg.Index = colorTemp; + inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY; + inst[ic].SrcReg[0].Index = temp; + ic++; + } + + if (key->colorMatrixPostScaleBias) { + static const gl_state_index scale_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_SCALE, 0, 0, 0 }; + static const gl_state_index bias_state[STATE_LENGTH] = + { STATE_INTERNAL, STATE_PT_BIAS, 0, 0, 0 }; + GLint scale_param, bias_param; + + scale_param = _mesa_add_state_reference(params, scale_state); + bias_param = _mesa_add_state_reference(params, bias_state); + + _mesa_init_instructions(inst + ic, 1); + inst[ic].Opcode = OPCODE_MAD; + inst[ic].DstReg.File = PROGRAM_TEMPORARY; + inst[ic].DstReg.Index = colorTemp; + inst[ic].SrcReg[0].File = PROGRAM_TEMPORARY; + inst[ic].SrcReg[0].Index = colorTemp; + inst[ic].SrcReg[1].File = PROGRAM_STATE_VAR; + inst[ic].SrcReg[1].Index = scale_param; + inst[ic].SrcReg[2].File = PROGRAM_STATE_VAR; + inst[ic].SrcReg[2].Index = bias_param; + ic++; + } + + /* Modify last instruction's dst reg to write to result.color */ + { + struct prog_instruction *last = &inst[ic - 1]; + last->DstReg.File = PROGRAM_OUTPUT; + last->DstReg.Index = FRAG_RESULT_COLR; + } + + /* END; */ + _mesa_init_instructions(inst + ic, 1); + inst[ic].Opcode = OPCODE_END; + ic++; + + assert(ic <= MAX_INST); + + + fp->Base.Instructions = _mesa_alloc_instructions(ic); + if (!fp->Base.Instructions) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, + "generating pixel transfer program"); + return NULL; + } + + _mesa_copy_instructions(fp->Base.Instructions, inst, ic); + fp->Base.NumInstructions = ic; + fp->Base.Parameters = params; + +#if 0 + printf("========= pixel transfer prog\n"); + _mesa_print_program(&fp->Base); + _mesa_print_parameter_list(fp->Base.Parameters); +#endif + + return fp; +} + + + +/** + * Update st->pixel_xfer.program in response to new pixel-transfer state. + */ +static void +update_pixel_transfer(struct st_context *st) +{ + GLcontext *ctx = st->ctx; + struct state_key key; + struct gl_fragment_program *fp; + + make_state_key(st->ctx, &key); + + fp = (struct gl_fragment_program *) + _mesa_search_program_cache(st->pixel_xfer.cache, &key, sizeof(key)); + if (!fp) { + fp = get_pixel_transfer_program(st->ctx, &key); + _mesa_program_cache_insert(st->ctx, st->pixel_xfer.cache, + &key, sizeof(key), &fp->Base); + } + + if (ctx->Pixel.MapColorFlag) { + load_color_map_texture(ctx, st->pixel_xfer.pixelmap_texture); + } + st->pixel_xfer.pixelmap_enabled = ctx->Pixel.MapColorFlag; + + st->pixel_xfer.program = (struct st_fragment_program *) fp; +} + + + +const struct st_tracked_state st_update_pixel_transfer = { + "st_update_pixel_transfer", /* name */ + { /* dirty */ + _NEW_PIXEL | _NEW_COLOR_MATRIX, /* mesa */ + 0, /* st */ + }, + update_pixel_transfer /* update */ +}; diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c new file mode 100644 index 0000000000..5eef4ebe92 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -0,0 +1,286 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "main/macros.h" +#include "st_context.h" +#include "st_atom.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "cso_cache/cso_context.h" + + +static GLuint translate_fill( GLenum mode ) +{ + switch (mode) { + case GL_POINT: + return PIPE_POLYGON_MODE_POINT; + case GL_LINE: + return PIPE_POLYGON_MODE_LINE; + case GL_FILL: + return PIPE_POLYGON_MODE_FILL; + default: + assert(0); + return 0; + } +} + +static GLboolean get_offset_flag( GLuint fill_mode, + const struct gl_polygon_attrib *p ) +{ + switch (fill_mode) { + case PIPE_POLYGON_MODE_POINT: + return p->OffsetPoint; + case PIPE_POLYGON_MODE_LINE: + return p->OffsetLine; + case PIPE_POLYGON_MODE_FILL: + return p->OffsetFill; + default: + assert(0); + return 0; + } +} + + +static void update_raster_state( struct st_context *st ) +{ + GLcontext *ctx = st->ctx; + struct pipe_rasterizer_state *raster = &st->state.rasterizer; + const struct gl_vertex_program *vertProg = ctx->VertexProgram._Current; + uint i; + + memset(raster, 0, sizeof(*raster)); + + raster->origin_lower_left = 1; /* Always true for OpenGL */ + + /* _NEW_POLYGON, _NEW_BUFFERS + */ + { + if (ctx->Polygon.FrontFace == GL_CCW) + raster->front_winding = PIPE_WINDING_CCW; + else + raster->front_winding = PIPE_WINDING_CW; + + /* XXX + * I think the intention here is that user-created framebuffer objects + * use Y=0=TOP layout instead of OpenGL's normal Y=0=bottom layout. + * Flipping Y changes CW to CCW and vice-versa. + * But this is an implementation/driver-specific artifact - remove... + */ + if (ctx->DrawBuffer && ctx->DrawBuffer->Name != 0) + raster->front_winding ^= PIPE_WINDING_BOTH; + } + + /* _NEW_LIGHT + */ + if (ctx->Light.ShadeModel == GL_FLAT) + raster->flatshade = 1; + + /* _NEW_LIGHT | _NEW_PROGRAM + * + * Back-face colors can come from traditional lighting (when + * GL_LIGHT_MODEL_TWO_SIDE is set) or from vertex programs (when + * GL_VERTEX_PROGRAM_TWO_SIDE is set). Note the logic here. + */ + if (ctx->VertexProgram._Current) { + if (ctx->VertexProgram._Enabled) { + /* user-defined program */ + raster->light_twoside = ctx->VertexProgram.TwoSideEnabled; + } + else { + /* TNL-generated program */ + raster->light_twoside = ctx->Light.Enabled && ctx->Light.Model.TwoSide; + } + } + else if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { + raster->light_twoside = 1; + } + + /* _NEW_POLYGON + */ + if (ctx->Polygon.CullFlag) { + if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) { + raster->cull_mode = PIPE_WINDING_BOTH; + } + else if (ctx->Polygon.CullFaceMode == GL_FRONT) { + raster->cull_mode = raster->front_winding; + } + else { + raster->cull_mode = raster->front_winding ^ PIPE_WINDING_BOTH; + } + } + + /* _NEW_POLYGON + */ + { + GLuint fill_front = translate_fill( ctx->Polygon.FrontMode ); + GLuint fill_back = translate_fill( ctx->Polygon.BackMode ); + + if (raster->front_winding == PIPE_WINDING_CW) { + raster->fill_cw = fill_front; + raster->fill_ccw = fill_back; + } + else { + raster->fill_cw = fill_back; + raster->fill_ccw = fill_front; + } + + /* Simplify when culling is active: + */ + if (raster->cull_mode & PIPE_WINDING_CW) { + raster->fill_cw = raster->fill_ccw; + } + + if (raster->cull_mode & PIPE_WINDING_CCW) { + raster->fill_ccw = raster->fill_cw; + } + } + + /* _NEW_POLYGON + */ + if (ctx->Polygon.OffsetUnits != 0.0 || + ctx->Polygon.OffsetFactor != 0.0) { + raster->offset_cw = get_offset_flag( raster->fill_cw, &ctx->Polygon ); + raster->offset_ccw = get_offset_flag( raster->fill_ccw, &ctx->Polygon ); + raster->offset_units = ctx->Polygon.OffsetUnits; + raster->offset_scale = ctx->Polygon.OffsetFactor; + } + + if (ctx->Polygon.SmoothFlag) + raster->poly_smooth = 1; + + if (ctx->Polygon.StippleFlag) + raster->poly_stipple_enable = 1; + + + /* _NEW_BUFFERS, _NEW_POLYGON + */ + if (raster->fill_cw != PIPE_POLYGON_MODE_FILL || + raster->fill_ccw != PIPE_POLYGON_MODE_FILL) + { + GLfloat mrd = (ctx->DrawBuffer ? + ctx->DrawBuffer->_MRD : + 1.0f); + + raster->offset_units = ctx->Polygon.OffsetFactor * mrd; + raster->offset_scale = (ctx->Polygon.OffsetUnits * mrd * + st->polygon_offset_scale); + } + + /* _NEW_POINT + */ + raster->point_size = ctx->Point.Size; + + raster->point_size_min = 0; /* temporary, will go away */ + raster->point_size_max = 1000; /* temporary, will go away */ + + raster->point_smooth = ctx->Point.SmoothFlag; + raster->point_sprite = ctx->Point.PointSprite; + for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { + if (ctx->Point.CoordReplace[i]) { + if (ctx->Point.SpriteOrigin == GL_UPPER_LEFT) + raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_UPPER_LEFT; + else + raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_LOWER_LEFT; + } + else { + raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_NONE; + } + } + + /* ST_NEW_VERTEX_PROGRAM + */ + if (vertProg) { + if (vertProg->Base.Id == 0) { + if (vertProg->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { + /* generated program which emits point size */ + raster->point_size_per_vertex = TRUE; + } + } + else if (ctx->VertexProgram.PointSizeEnabled) { + /* user-defined program and GL_VERTEX_PROGRAM_POINT_SIZE set */ + raster->point_size_per_vertex = ctx->VertexProgram.PointSizeEnabled; + } + } + if (!raster->point_size_per_vertex) { + /* clamp size now */ + raster->point_size = CLAMP(ctx->Point.Size, + ctx->Point.MinSize, + ctx->Point.MaxSize); + } + + /* _NEW_LINE + */ + raster->line_smooth = ctx->Line.SmoothFlag; + if (ctx->Line.SmoothFlag) { + raster->line_width = CLAMP(ctx->Line.Width, + ctx->Const.MinLineWidthAA, + ctx->Const.MaxLineWidthAA); + } + else { + raster->line_width = CLAMP(ctx->Line.Width, + ctx->Const.MinLineWidth, + ctx->Const.MaxLineWidth); + } + + raster->line_stipple_enable = ctx->Line.StippleFlag; + raster->line_stipple_pattern = ctx->Line.StipplePattern; + /* GL stipple factor is in [1,256], remap to [0, 255] here */ + raster->line_stipple_factor = ctx->Line.StippleFactor - 1; + + /* _NEW_MULTISAMPLE */ + if (ctx->Multisample._Enabled || st->force_msaa) + raster->multisample = 1; + + /* _NEW_SCISSOR */ + if (ctx->Scissor.Enabled) + raster->scissor = 1; + + raster->gl_rasterization_rules = 1; + + cso_set_rasterizer(st->cso_context, raster); +} + +const struct st_tracked_state st_update_rasterizer = { + "st_update_rasterizer", /* name */ + { + (_NEW_BUFFERS | + _NEW_LIGHT | + _NEW_LINE | + _NEW_MULTISAMPLE | + _NEW_POINT | + _NEW_POLYGON | + _NEW_PROGRAM | + _NEW_SCISSOR), /* mesa state dependencies*/ + ST_NEW_VERTEX_PROGRAM, /* state tracker dependencies */ + }, + update_raster_state /* update function */ +}; diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c new file mode 100644 index 0000000000..cef61fb55c --- /dev/null +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -0,0 +1,211 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + + +#include "main/macros.h" + +#include "st_context.h" +#include "st_cb_texture.h" +#include "st_atom.h" +#include "st_program.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" + +#include "cso_cache/cso_context.h" + + +/** + * Convert GLenum texcoord wrap tokens to pipe tokens. + */ +static GLuint +gl_wrap_to_sp(GLenum wrap) +{ + switch (wrap) { + case GL_REPEAT: + return PIPE_TEX_WRAP_REPEAT; + case GL_CLAMP: + return PIPE_TEX_WRAP_CLAMP; + case GL_CLAMP_TO_EDGE: + return PIPE_TEX_WRAP_CLAMP_TO_EDGE; + case GL_CLAMP_TO_BORDER: + return PIPE_TEX_WRAP_CLAMP_TO_BORDER; + case GL_MIRRORED_REPEAT: + return PIPE_TEX_WRAP_MIRROR_REPEAT; + case GL_MIRROR_CLAMP_EXT: + return PIPE_TEX_WRAP_MIRROR_CLAMP; + case GL_MIRROR_CLAMP_TO_EDGE_EXT: + return PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE; + case GL_MIRROR_CLAMP_TO_BORDER_EXT: + return PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER; + default: + assert(0); + return 0; + } +} + + +static GLuint +gl_filter_to_mip_filter(GLenum filter) +{ + switch (filter) { + case GL_NEAREST: + case GL_LINEAR: + return PIPE_TEX_MIPFILTER_NONE; + + case GL_NEAREST_MIPMAP_NEAREST: + case GL_LINEAR_MIPMAP_NEAREST: + return PIPE_TEX_MIPFILTER_NEAREST; + + case GL_NEAREST_MIPMAP_LINEAR: + case GL_LINEAR_MIPMAP_LINEAR: + return PIPE_TEX_MIPFILTER_LINEAR; + + default: + assert(0); + return PIPE_TEX_MIPFILTER_NONE; + } +} + + +static GLuint +gl_filter_to_img_filter(GLenum filter) +{ + switch (filter) { + case GL_NEAREST: + case GL_NEAREST_MIPMAP_NEAREST: + case GL_NEAREST_MIPMAP_LINEAR: + return PIPE_TEX_FILTER_NEAREST; + + case GL_LINEAR: + case GL_LINEAR_MIPMAP_NEAREST: + case GL_LINEAR_MIPMAP_LINEAR: + return PIPE_TEX_FILTER_LINEAR; + + default: + assert(0); + return PIPE_TEX_FILTER_NEAREST; + } +} + + +static void +update_samplers(struct st_context *st) +{ + const struct st_fragment_program *fs = st->fp; + GLuint su; + + st->state.num_samplers = 0; + + /*printf("%s samplers used = 0x%x\n", __FUNCTION__, fs->Base.Base.SamplersUsed);*/ + + /* loop over sampler units (aka tex image units) */ + for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) { + struct pipe_sampler_state *sampler = st->state.samplers + su; + + memset(sampler, 0, sizeof(*sampler)); + + if (fs->Base.Base.SamplersUsed & (1 << su)) { + GLuint texUnit = fs->Base.Base.SamplerUnits[su]; + const struct gl_texture_object *texobj + = st->ctx->Texture.Unit[texUnit]._Current; + + if (!texobj) { + texobj = st_get_default_texture(st); + } + + sampler->wrap_s = gl_wrap_to_sp(texobj->WrapS); + sampler->wrap_t = gl_wrap_to_sp(texobj->WrapT); + sampler->wrap_r = gl_wrap_to_sp(texobj->WrapR); + + sampler->min_img_filter = gl_filter_to_img_filter(texobj->MinFilter); + sampler->min_mip_filter = gl_filter_to_mip_filter(texobj->MinFilter); + sampler->mag_img_filter = gl_filter_to_img_filter(texobj->MagFilter); + + if (texobj->Target != GL_TEXTURE_RECTANGLE_ARB) + sampler->normalized_coords = 1; + + sampler->lod_bias = st->ctx->Texture.Unit[su].LodBias; + sampler->min_lod = MAX2(0.0f, texobj->MinLod); + sampler->max_lod = MIN2(texobj->MaxLevel - texobj->BaseLevel, + texobj->MaxLod); + if (sampler->max_lod < sampler->min_lod) { + /* The GL spec doesn't seem to specify what to do in this case. + * Swap the values. + */ + float tmp = sampler->max_lod; + sampler->max_lod = sampler->min_lod; + sampler->min_lod = tmp; + assert(sampler->min_lod <= sampler->max_lod); + } + + sampler->border_color[0] = texobj->BorderColor[RCOMP]; + sampler->border_color[1] = texobj->BorderColor[GCOMP]; + sampler->border_color[2] = texobj->BorderColor[BCOMP]; + sampler->border_color[3] = texobj->BorderColor[ACOMP]; + + sampler->max_anisotropy = texobj->MaxAnisotropy; + if (sampler->max_anisotropy > 1.0) { + sampler->min_img_filter = PIPE_TEX_FILTER_ANISO; + sampler->mag_img_filter = PIPE_TEX_FILTER_ANISO; + } + + /* only care about ARB_shadow, not SGI shadow */ + if (texobj->CompareMode == GL_COMPARE_R_TO_TEXTURE) { + sampler->compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE; + sampler->compare_func + = st_compare_func_to_pipe(texobj->CompareFunc); + } + + st->state.num_samplers = su + 1; + + /*printf("%s su=%u non-null\n", __FUNCTION__, su);*/ + cso_single_sampler(st->cso_context, su, sampler); + } + else { + /*printf("%s su=%u null\n", __FUNCTION__, su);*/ + cso_single_sampler(st->cso_context, su, NULL); + } + } + + cso_single_sampler_done(st->cso_context); +} + + +const struct st_tracked_state st_update_sampler = { + "st_update_sampler", /* name */ + { /* dirty */ + _NEW_TEXTURE, /* mesa */ + 0, /* st */ + }, + update_samplers /* update */ +}; diff --git a/src/mesa/state_tracker/st_atom_scissor.c b/src/mesa/state_tracker/st_atom_scissor.c new file mode 100644 index 0000000000..3fd59e1945 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_scissor.c @@ -0,0 +1,92 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "st_context.h" +#include "pipe/p_context.h" +#include "st_atom.h" + + +/** + * Scissor depends on the scissor box, and the framebuffer dimensions. + */ +static void +update_scissor( struct st_context *st ) +{ + struct pipe_scissor_state scissor; + const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + GLint miny, maxy; + + scissor.minx = 0; + scissor.miny = 0; + scissor.maxx = fb->Width; + scissor.maxy = fb->Height; + + if (st->ctx->Scissor.Enabled) { + if ((GLuint)st->ctx->Scissor.X > scissor.minx) + scissor.minx = st->ctx->Scissor.X; + if ((GLuint)st->ctx->Scissor.Y > scissor.miny) + scissor.miny = st->ctx->Scissor.Y; + + if ((GLuint)st->ctx->Scissor.X + st->ctx->Scissor.Width < scissor.maxx) + scissor.maxx = st->ctx->Scissor.X + st->ctx->Scissor.Width; + if ((GLuint)st->ctx->Scissor.Y + st->ctx->Scissor.Height < scissor.maxy) + scissor.maxy = st->ctx->Scissor.Y + st->ctx->Scissor.Height; + + /* check for null space */ + if (scissor.minx >= scissor.maxx || scissor.miny >= scissor.maxy) + scissor.minx = scissor.miny = scissor.maxx = scissor.maxy = 0; + } + + /* Now invert Y. Pipe drivers use the convention Y=0=top for surfaces + */ + miny = fb->Height - scissor.maxy; + maxy = fb->Height - scissor.miny; + scissor.miny = miny; + scissor.maxy = maxy; + + if (memcmp(&scissor, &st->state.scissor, sizeof(scissor)) != 0) { + /* state has changed */ + st->state.scissor = scissor; /* struct copy */ + st->pipe->set_scissor_state(st->pipe, &scissor); /* activate */ + } +} + + +const struct st_tracked_state st_update_scissor = { + "st_update_scissor", /* name */ + { /* dirty */ + (_NEW_SCISSOR | _NEW_BUFFERS), /* mesa */ + 0, /* st */ + }, + update_scissor /* update */ +}; diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c new file mode 100644 index 0000000000..cbd414e2d3 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -0,0 +1,369 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * State validation for vertex/fragment shaders. + * Note that we have to delay most vertex/fragment shader translation + * until rendering time since the linkage between the vertex outputs and + * fragment inputs can vary depending on the pairing of shaders. + * + * Authors: + * Brian Paul + */ + + + +#include "main/imports.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "shader/program.h" + +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" + +#include "util/u_simple_shaders.h" + +#include "cso_cache/cso_context.h" + +#include "st_context.h" +#include "st_atom.h" +#include "st_program.h" +#include "st_atom_shader.h" +#include "st_mesa_to_tgsi.h" + + +/** + * This represents a vertex program, especially translated to match + * the inputs of a particular fragment shader. + */ +struct translated_vertex_program +{ + struct st_vertex_program *master; + + /** The fragment shader "signature" this vertex shader is meant for: */ + GLbitfield frag_inputs; + + /** Compared against master vertex program's serialNo: */ + GLuint serialNo; + + /** Maps VERT_RESULT_x to slot */ + GLuint output_to_slot[VERT_RESULT_MAX]; + ubyte output_to_semantic_name[VERT_RESULT_MAX]; + ubyte output_to_semantic_index[VERT_RESULT_MAX]; + + /** Pointer to the translated vertex program */ + struct st_vertex_program *vp; + + struct translated_vertex_program *next; /**< next in linked list */ +}; + + + +/** + * Given a vertex program output attribute, return the corresponding + * fragment program input attribute. + * \return -1 for vertex outputs that have no corresponding fragment input + */ +static GLint +vp_out_to_fp_in(GLuint vertResult) +{ + if (vertResult >= VERT_RESULT_TEX0 && + vertResult < VERT_RESULT_TEX0 + MAX_TEXTURE_COORD_UNITS) + return FRAG_ATTRIB_TEX0 + (vertResult - VERT_RESULT_TEX0); + + if (vertResult >= VERT_RESULT_VAR0 && + vertResult < VERT_RESULT_VAR0 + MAX_VARYING) + return FRAG_ATTRIB_VAR0 + (vertResult - VERT_RESULT_VAR0); + + switch (vertResult) { + case VERT_RESULT_HPOS: + return FRAG_ATTRIB_WPOS; + case VERT_RESULT_COL0: + return FRAG_ATTRIB_COL0; + case VERT_RESULT_COL1: + return FRAG_ATTRIB_COL1; + case VERT_RESULT_FOGC: + return FRAG_ATTRIB_FOGC; + default: + /* Back-face colors, edge flags, etc */ + return -1; + } +} + + +/** + * Find a translated vertex program that corresponds to stvp and + * has outputs matched to stfp's inputs. + * This performs vertex and fragment translation (to TGSI) when needed. + */ +static struct translated_vertex_program * +find_translated_vp(struct st_context *st, + struct st_vertex_program *stvp, + struct st_fragment_program *stfp) +{ + static const GLuint UNUSED = ~0; + struct translated_vertex_program *xvp; + const GLbitfield fragInputsRead = stfp->Base.Base.InputsRead; + + /* + * Translate fragment program if needed. + */ + if (!stfp->state.tokens) { + GLuint inAttr, numIn = 0; + + for (inAttr = 0; inAttr < FRAG_ATTRIB_MAX; inAttr++) { + if (fragInputsRead & (1 << inAttr)) { + stfp->input_to_slot[inAttr] = numIn; + numIn++; + } + else { + stfp->input_to_slot[inAttr] = UNUSED; + } + } + + stfp->num_input_slots = numIn; + + assert(stfp->Base.Base.NumInstructions > 1); + + st_translate_fragment_program(st, stfp, stfp->input_to_slot); + } + + + /* See if we've got a translated vertex program whose outputs match + * the fragment program's inputs. + * XXX This could be a hash lookup, using InputsRead as the key. + */ + for (xvp = stfp->vertex_programs; xvp; xvp = xvp->next) { + if (xvp->master == stvp && xvp->frag_inputs == fragInputsRead) { + break; + } + } + + /* No? Allocate translated vp object now */ + if (!xvp) { + xvp = CALLOC_STRUCT(translated_vertex_program); + xvp->frag_inputs = fragInputsRead; + xvp->master = stvp; + + xvp->next = stfp->vertex_programs; + stfp->vertex_programs = xvp; + } + + /* See if we need to translate vertex program to TGSI form */ + if (xvp->serialNo != stvp->serialNo) { + GLuint outAttr, dummySlot; + const GLbitfield outputsWritten = stvp->Base.Base.OutputsWritten; + GLuint numVpOuts = 0; + GLboolean emitPntSize = GL_FALSE, emitBFC0 = GL_FALSE, emitBFC1 = GL_FALSE; + GLint maxGeneric; + + /* Compute mapping of vertex program outputs to slots, which depends + * on the fragment program's input->slot mapping. + */ + for (outAttr = 0; outAttr < VERT_RESULT_MAX; outAttr++) { + /* set defaults: */ + xvp->output_to_slot[outAttr] = UNUSED; + xvp->output_to_semantic_name[outAttr] = TGSI_SEMANTIC_COUNT; + xvp->output_to_semantic_index[outAttr] = 99; + + if (outAttr == VERT_RESULT_HPOS) { + /* always put xformed position into slot zero */ + xvp->output_to_slot[VERT_RESULT_HPOS] = 0; + xvp->output_to_semantic_name[outAttr] = TGSI_SEMANTIC_POSITION; + xvp->output_to_semantic_index[outAttr] = 0; + numVpOuts++; + } + else if (outputsWritten & (1 << outAttr)) { + /* see if the frag prog wants this vert output */ + GLint fpInAttrib = vp_out_to_fp_in(outAttr); + if (fpInAttrib >= 0) { + GLuint fpInSlot = stfp->input_to_slot[fpInAttrib]; + if (fpInSlot != ~0) { + /* match this vp output to the fp input */ + GLuint vpOutSlot = stfp->input_map[fpInSlot]; + xvp->output_to_slot[outAttr] = vpOutSlot; + xvp->output_to_semantic_name[outAttr] = stfp->input_semantic_name[fpInSlot]; + xvp->output_to_semantic_index[outAttr] = stfp->input_semantic_index[fpInSlot]; + numVpOuts++; + } + } + else if (outAttr == VERT_RESULT_PSIZ) + emitPntSize = GL_TRUE; + else if (outAttr == VERT_RESULT_BFC0) + emitBFC0 = GL_TRUE; + else if (outAttr == VERT_RESULT_BFC1) + emitBFC1 = GL_TRUE; + } +#if 0 /*debug*/ + printf("assign vp output_to_slot[%d] = %d\n", outAttr, + xvp->output_to_slot[outAttr]); +#endif + } + + /* must do these last */ + if (emitPntSize) { + xvp->output_to_slot[VERT_RESULT_PSIZ] = numVpOuts++; + xvp->output_to_semantic_name[VERT_RESULT_PSIZ] = TGSI_SEMANTIC_PSIZE; + xvp->output_to_semantic_index[VERT_RESULT_PSIZ] = 0; + } + if (emitBFC0) { + xvp->output_to_slot[VERT_RESULT_BFC0] = numVpOuts++; + xvp->output_to_semantic_name[VERT_RESULT_BFC0] = TGSI_SEMANTIC_COLOR; + xvp->output_to_semantic_index[VERT_RESULT_BFC0] = 0; + } + if (emitBFC1) { + xvp->output_to_slot[VERT_RESULT_BFC1] = numVpOuts++; + xvp->output_to_semantic_name[VERT_RESULT_BFC0] = TGSI_SEMANTIC_COLOR; + xvp->output_to_semantic_index[VERT_RESULT_BFC0] = 1; + } + + /* Unneeded vertex program outputs will go to this slot. + * We could use this info to do dead code elimination in the + * vertex program. + */ + dummySlot = numVpOuts; + + /* find max GENERIC slot index */ + maxGeneric = -1; + for (outAttr = 0; outAttr < VERT_RESULT_MAX; outAttr++) { + if (xvp->output_to_semantic_name[outAttr] == TGSI_SEMANTIC_GENERIC) { + maxGeneric = MAX2(maxGeneric, + xvp->output_to_semantic_index[outAttr]); + } + } + + /* Map vert program outputs that aren't used to the dummy slot + * (and an unused generic attribute slot). + */ + for (outAttr = 0; outAttr < VERT_RESULT_MAX; outAttr++) { + if (outputsWritten & (1 << outAttr)) { + if (xvp->output_to_slot[outAttr] == UNUSED) { + xvp->output_to_slot[outAttr] = dummySlot; + xvp->output_to_semantic_name[outAttr] = TGSI_SEMANTIC_GENERIC; + xvp->output_to_semantic_index[outAttr] = maxGeneric + 1; + } + } + +#if 0 /*debug*/ + printf("vp output_to_slot[%d] = %d\n", outAttr, + xvp->output_to_slot[outAttr]); +#endif + } + + assert(stvp->Base.Base.NumInstructions > 1); + + st_translate_vertex_program(st, stvp, xvp->output_to_slot, + xvp->output_to_semantic_name, + xvp->output_to_semantic_index); + + xvp->vp = stvp; + + /* translated VP is up to date now */ + xvp->serialNo = stvp->serialNo; + } + + return xvp; +} + + +void +st_free_translated_vertex_programs(struct st_context *st, + struct translated_vertex_program *xvp) +{ + struct translated_vertex_program *next; + + while (xvp) { + next = xvp->next; + free(xvp); + xvp = next; + } +} + + +static void * +get_passthrough_fs(struct st_context *st) +{ + struct pipe_shader_state shader; + + if (!st->passthrough_fs) { + st->passthrough_fs = + util_make_fragment_passthrough_shader(st->pipe, &shader); +#if 0 /* We actually need to keep the tokens around at this time */ + free((void *) shader.tokens); +#endif + } + + return st->passthrough_fs; +} + + +static void +update_linkage( struct st_context *st ) +{ + struct st_vertex_program *stvp; + struct st_fragment_program *stfp; + struct translated_vertex_program *xvp; + + /* find active shader and params -- Should be covered by + * ST_NEW_VERTEX_PROGRAM + */ + assert(st->ctx->VertexProgram._Current); + stvp = st_vertex_program(st->ctx->VertexProgram._Current); + assert(stvp->Base.Base.Target == GL_VERTEX_PROGRAM_ARB); + + assert(st->ctx->FragmentProgram._Current); + stfp = st_fragment_program(st->ctx->FragmentProgram._Current); + assert(stfp->Base.Base.Target == GL_FRAGMENT_PROGRAM_ARB); + + xvp = find_translated_vp(st, stvp, stfp); + + st_reference_vertprog(st, &st->vp, stvp); + st_reference_fragprog(st, &st->fp, stfp); + + cso_set_vertex_shader_handle(st->cso_context, stvp->driver_shader); + + if (st->missing_textures) { + /* use a pass-through frag shader that uses no textures */ + void *fs = get_passthrough_fs(st); + cso_set_fragment_shader_handle(st->cso_context, fs); + } + else { + cso_set_fragment_shader_handle(st->cso_context, stfp->driver_shader); + } + + st->vertex_result_to_slot = xvp->output_to_slot; +} + + +const struct st_tracked_state st_update_shader = { + "st_update_shader", /* name */ + { /* dirty */ + 0, /* mesa */ + ST_NEW_VERTEX_PROGRAM | ST_NEW_FRAGMENT_PROGRAM /* st */ + }, + update_linkage /* update */ +}; diff --git a/src/mesa/state_tracker/st_atom_shader.h b/src/mesa/state_tracker/st_atom_shader.h new file mode 100644 index 0000000000..8403bc66c9 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_shader.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_ATOM_SHADER_H +#define ST_ATOM_SHADER_H + + +extern void +st_free_translated_vertex_programs(struct st_context *st, + struct translated_vertex_program *xvp); + + +#endif /* ST_ATOM_SHADER_H */ diff --git a/src/mesa/state_tracker/st_atom_stipple.c b/src/mesa/state_tracker/st_atom_stipple.c new file mode 100644 index 0000000000..f395930ab4 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_stipple.c @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * \brief polygon stipple state + * + * Authors: + * Brian Paul + */ + + +#include "st_context.h" +#include "st_atom.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" + + +static void +update_stipple( struct st_context *st ) +{ + const GLuint sz = sizeof(st->state.poly_stipple.stipple); + assert(sz == sizeof(st->ctx->PolygonStipple)); + + if (memcmp(&st->state.poly_stipple.stipple, st->ctx->PolygonStipple, sz)) { + /* state has changed */ + memcpy(st->state.poly_stipple.stipple, st->ctx->PolygonStipple, sz); + st->pipe->set_polygon_stipple(st->pipe, &st->state.poly_stipple); + } +} + + +const struct st_tracked_state st_update_polygon_stipple = { + "st_update_polygon_stipple", /* name */ + { /* dirty */ + (_NEW_POLYGONSTIPPLE), /* mesa */ + 0, /* st */ + }, + update_stipple /* update */ +}; diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c new file mode 100644 index 0000000000..fb03766ff5 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -0,0 +1,154 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + + +#include "st_context.h" +#include "st_atom.h" +#include "st_texture.h" +#include "st_cb_texture.h" +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" +#include "cso_cache/cso_context.h" + + +static void +update_textures(struct st_context *st) +{ + struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current; + GLuint su; + + st->state.num_textures = 0; + + /*printf("%s samplers used = 0x%x\n", __FUNCTION__, fprog->Base.SamplersUsed);*/ + + for (su = 0; su < st->ctx->Const.MaxTextureCoordUnits; su++) { + struct pipe_texture *pt = NULL; + + if (fprog->Base.SamplersUsed & (1 << su)) { + const GLuint texUnit = fprog->Base.SamplerUnits[su]; + struct gl_texture_object *texObj + = st->ctx->Texture.Unit[texUnit]._Current; + struct st_texture_object *stObj; + GLboolean flush, retval; + + if (!texObj) { + texObj = st_get_default_texture(st); + } + stObj = st_texture_object(texObj); + + retval = st_finalize_texture(st->ctx, st->pipe, texObj, &flush); + if (!retval) { + /* out of mem */ + continue; + } + + st->state.num_textures = su + 1; + + pt = st_get_stobj_texture(stObj); + } + + /* + if (pt) { + printf("%s su=%u non-null\n", __FUNCTION__, su); + } + else { + printf("%s su=%u null\n", __FUNCTION__, su); + } + */ + + pipe_texture_reference(&st->state.sampler_texture[su], pt); + } + + cso_set_sampler_textures(st->cso_context, + st->state.num_textures, + st->state.sampler_texture); +} + + +const struct st_tracked_state st_update_texture = { + "st_update_texture", /* name */ + { /* dirty */ + _NEW_TEXTURE, /* mesa */ + ST_NEW_FRAGMENT_PROGRAM, /* st */ + }, + update_textures /* update */ +}; + + + + +static void +finalize_textures(struct st_context *st) +{ + struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current; + const GLboolean prev_missing_textures = st->missing_textures; + GLuint su; + + st->missing_textures = GL_FALSE; + + for (su = 0; su < st->ctx->Const.MaxTextureCoordUnits; su++) { + if (fprog->Base.SamplersUsed & (1 << su)) { + const GLuint texUnit = fprog->Base.SamplerUnits[su]; + struct gl_texture_object *texObj + = st->ctx->Texture.Unit[texUnit]._Current; + struct st_texture_object *stObj = st_texture_object(texObj); + + if (texObj) { + GLboolean flush, retval; + + retval = st_finalize_texture(st->ctx, st->pipe, texObj, &flush); + if (!retval) { + /* out of mem */ + st->missing_textures = GL_TRUE; + continue; + } + + stObj->teximage_realloc = TRUE; + } + } + } + + if (prev_missing_textures != st->missing_textures) + st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; +} + + + +const struct st_tracked_state st_finalize_textures = { + "st_finalize_textures", /* name */ + { /* dirty */ + _NEW_TEXTURE, /* mesa */ + 0, /* st */ + }, + finalize_textures /* update */ +}; diff --git a/src/mesa/state_tracker/st_atom_viewport.c b/src/mesa/state_tracker/st_atom_viewport.c new file mode 100644 index 0000000000..27ec2eb033 --- /dev/null +++ b/src/mesa/state_tracker/st_atom_viewport.c @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/context.h" +#include "main/colormac.h" +#include "st_context.h" +#include "st_atom.h" +#include "pipe/p_context.h" +#include "cso_cache/cso_context.h" + +/** + * Update the viewport transformation matrix. Depends on: + * - viewport pos/size + * - depthrange + * - window pos/size or FBO size + */ +static void +update_viewport( struct st_context *st ) +{ + GLcontext *ctx = st->ctx; + GLfloat yScale, yBias; + + /* _NEW_BUFFERS + */ + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + yScale = -1; + yBias = (GLfloat)ctx->DrawBuffer->Height; + } + else { + yScale = 1.0; + yBias = 0.0; + } + + /* _NEW_VIEWPORT + */ + { + GLfloat x = (GLfloat)ctx->Viewport.X; + GLfloat y = (GLfloat)ctx->Viewport.Y; + GLfloat z = ctx->Viewport.Near; + GLfloat half_width = (GLfloat)ctx->Viewport.Width / 2.0f; + GLfloat half_height = (GLfloat)ctx->Viewport.Height / 2.0f; + GLfloat half_depth = (GLfloat)(ctx->Viewport.Far - ctx->Viewport.Near) / 2.0f; + + st->state.viewport.scale[0] = half_width; + st->state.viewport.scale[1] = half_height * yScale; + st->state.viewport.scale[2] = half_depth; + st->state.viewport.scale[3] = 1.0; + + st->state.viewport.translate[0] = half_width + x; + st->state.viewport.translate[1] = (half_height + y) * yScale + yBias; + st->state.viewport.translate[2] = half_depth + z; + st->state.viewport.translate[3] = 0.0; + + cso_set_viewport(st->cso_context, &st->state.viewport); + } +} + + +const struct st_tracked_state st_update_viewport = { + "st_update_viewport", /* name */ + { /* dirty */ + _NEW_BUFFERS | _NEW_VIEWPORT, /* mesa */ + 0, /* st */ + }, + update_viewport /* update */ +}; diff --git a/src/mesa/state_tracker/st_cache.h b/src/mesa/state_tracker/st_cache.h new file mode 100644 index 0000000000..b81de316ec --- /dev/null +++ b/src/mesa/state_tracker/st_cache.h @@ -0,0 +1,68 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin <zack@tungstengraphics.com> + */ + +#ifndef ST_CACHE_H +#define ST_CACHE_H + +#include "cso_cache/cso_cache.h" + +struct pipe_blend_state; +struct pipe_sampler_state; +struct st_context; + + +const struct cso_blend * +st_cached_blend_state(struct st_context *st, + const struct pipe_blend_state *blend); + +const struct cso_sampler * +st_cached_sampler_state(struct st_context *st, + const struct pipe_sampler_state *sampler); + +const struct cso_depth_stencil_alpha * +st_cached_depth_stencil_alpha_state(struct st_context *st, + const struct pipe_depth_stencil_alpha_state *depth_stencil); + +const struct cso_rasterizer * +st_cached_rasterizer_state(struct st_context *st, + const struct pipe_rasterizer_state *raster); + +const struct cso_fragment_shader * +st_cached_fs_state(struct st_context *st, + const struct pipe_shader_state *templ); + + +const struct cso_vertex_shader * +st_cached_vs_state(struct st_context *st, + const struct pipe_shader_state *templ); + +#endif diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c new file mode 100644 index 0000000000..cf3a99e7e9 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -0,0 +1,361 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Brian Paul + */ + +#include "main/imports.h" +#include "main/image.h" +#include "main/macros.h" + +#include "st_context.h" +#include "st_cb_accum.h" +#include "st_cb_fbo.h" +#include "st_draw.h" +#include "st_format.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + + +#define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ + us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) ) + + +/** + * For hardware that supports deep color buffers, we could accelerate + * most/all the accum operations with blending/texturing. + * For now, just use the get/put_tile() functions and do things in software. + */ + + +/** + * Wrapper for pipe_get_tile_rgba(). Do format/cpp override to make the + * tile util function think the surface is 16bit/channel, even if it's not. + * See also: st_renderbuffer_alloc_storage() + */ +static void +acc_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, + uint x, uint y, uint w, uint h, float *p) +{ + const enum pipe_format f = acc_ps->format; + const struct pipe_format_block b = acc_ps->block; + + acc_ps->format = DEFAULT_ACCUM_PIPE_FORMAT; + acc_ps->block.size = 8; + acc_ps->block.width = 1; + acc_ps->block.height = 1; + + pipe_get_tile_rgba(acc_ps, x, y, w, h, p); + + acc_ps->format = f; + acc_ps->block = b; +} + + +/** + * Wrapper for pipe_put_tile_rgba(). Do format/cpp override to make the + * tile util function think the surface is 16bit/channel, even if it's not. + * See also: st_renderbuffer_alloc_storage() + */ +static void +acc_put_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, + uint x, uint y, uint w, uint h, const float *p) +{ + enum pipe_format f = acc_ps->format; + const struct pipe_format_block b = acc_ps->block; + + acc_ps->format = DEFAULT_ACCUM_PIPE_FORMAT; + acc_ps->block.size = 8; + acc_ps->block.width = 1; + acc_ps->block.height = 1; + + pipe_put_tile_rgba(acc_ps, x, y, w, h, p); + + acc_ps->format = f; + acc_ps->block = b; +} + + + +void +st_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) +{ + struct st_renderbuffer *acc_strb = st_renderbuffer(rb); + struct pipe_surface *acc_ps; + struct pipe_screen *screen = ctx->st->pipe->screen; + const GLint xpos = ctx->DrawBuffer->_Xmin; + const GLint ypos = ctx->DrawBuffer->_Ymin; + const GLint width = ctx->DrawBuffer->_Xmax - xpos; + const GLint height = ctx->DrawBuffer->_Ymax - ypos; + GLubyte *map; + + acc_ps = screen->get_tex_surface(screen, acc_strb->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + map = screen->surface_map(screen, acc_ps, + PIPE_BUFFER_USAGE_CPU_WRITE); + + /* note acc_strb->format might not equal acc_ps->format */ + switch (acc_strb->format) { + case PIPE_FORMAT_R16G16B16A16_SNORM: + { + GLshort r = FLOAT_TO_SHORT(ctx->Accum.ClearColor[0]); + GLshort g = FLOAT_TO_SHORT(ctx->Accum.ClearColor[1]); + GLshort b = FLOAT_TO_SHORT(ctx->Accum.ClearColor[2]); + GLshort a = FLOAT_TO_SHORT(ctx->Accum.ClearColor[3]); + int i, j; + for (i = 0; i < height; i++) { + GLshort *dst = (GLshort *) (map + (ypos + i) * acc_ps->stride + xpos * 8); + for (j = 0; j < width; j++) { + dst[0] = r; + dst[1] = g; + dst[2] = b; + dst[3] = a; + dst += 4; + } + } + } + break; + default: + _mesa_problem(ctx, "unexpected format in st_clear_accum_buffer()"); + } + + screen->surface_unmap(screen, acc_ps); + pipe_surface_reference(&acc_ps, NULL); +} + + +/** For ADD/MULT */ +static void +accum_mad(GLcontext *ctx, GLfloat scale, GLfloat bias, + GLint xpos, GLint ypos, GLint width, GLint height, + struct st_renderbuffer *acc_strb) +{ + struct pipe_screen *screen = ctx->st->pipe->screen; + struct pipe_surface *acc_ps = acc_strb->surface; + GLubyte *map; + + map = screen->surface_map(screen, acc_ps, + PIPE_BUFFER_USAGE_CPU_WRITE); + + /* note acc_strb->format might not equal acc_ps->format */ + switch (acc_strb->format) { + case PIPE_FORMAT_R16G16B16A16_SNORM: + { + int i, j; + for (i = 0; i < height; i++) { + GLshort *acc = (GLshort *) (map + (ypos + i) * acc_ps->stride + xpos * 8); + for (j = 0; j < width * 4; j++) { + float val = SHORT_TO_FLOAT(acc[j]) * scale + bias; + acc[j] = FLOAT_TO_SHORT(val); + } + } + } + break; + default: + _mesa_problem(NULL, "unexpected format in st_clear_accum_buffer()"); + } + + screen->surface_unmap(screen, acc_ps); +} + + +static void +accum_accum(struct pipe_context *pipe, GLfloat value, + GLint xpos, GLint ypos, GLint width, GLint height, + struct st_renderbuffer *acc_strb, + struct st_renderbuffer *color_strb) +{ + struct pipe_screen *screen = pipe->screen; + struct pipe_surface *acc_surf, *color_surf; + GLfloat *colorBuf, *accBuf; + GLint i; + + acc_surf = screen->get_tex_surface(screen, acc_strb->texture, 0, 0, 0, + (PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_CPU_READ)); + + color_surf = screen->get_tex_surface(screen, color_strb->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_READ); + + colorBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + pipe_get_tile_rgba(color_surf, xpos, ypos, width, height, colorBuf); + acc_get_tile_rgba(pipe, acc_surf, xpos, ypos, width, height, accBuf); + + for (i = 0; i < 4 * width * height; i++) { + accBuf[i] = accBuf[i] + colorBuf[i] * value; + } + + acc_put_tile_rgba(pipe, acc_surf, xpos, ypos, width, height, accBuf); + + free(colorBuf); + free(accBuf); + pipe_surface_reference(&acc_surf, NULL); + pipe_surface_reference(&color_surf, NULL); +} + + +static void +accum_load(struct pipe_context *pipe, GLfloat value, + GLint xpos, GLint ypos, GLint width, GLint height, + struct st_renderbuffer *acc_strb, + struct st_renderbuffer *color_strb) +{ + struct pipe_screen *screen = pipe->screen; + struct pipe_surface *acc_surf, *color_surf; + GLfloat *buf; + GLint i; + + acc_surf = screen->get_tex_surface(screen, acc_strb->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + + color_surf = screen->get_tex_surface(screen, color_strb->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_READ); + + buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + pipe_get_tile_rgba(color_surf, xpos, ypos, width, height, buf); + + for (i = 0; i < 4 * width * height; i++) { + buf[i] = buf[i] * value; + } + + acc_put_tile_rgba(pipe, acc_surf, xpos, ypos, width, height, buf); + + free(buf); + pipe_surface_reference(&acc_surf, NULL); + pipe_surface_reference(&color_surf, NULL); +} + + +static void +accum_return(GLcontext *ctx, GLfloat value, + GLint xpos, GLint ypos, GLint width, GLint height, + struct st_renderbuffer *acc_strb, + struct st_renderbuffer *color_strb) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; + const GLubyte *colormask = ctx->Color.ColorMask; + struct pipe_surface *acc_surf, *color_surf; + GLfloat *abuf, *cbuf = NULL; + GLint i, ch; + + abuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + acc_surf = screen->get_tex_surface(screen, acc_strb->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_READ); + + color_surf = screen->get_tex_surface(screen, color_strb->texture, 0, 0, 0, + (PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE)); + + acc_get_tile_rgba(pipe, acc_surf, xpos, ypos, width, height, abuf); + + if (!colormask[0] || !colormask[1] || !colormask[2] || !colormask[3]) { + cbuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + pipe_get_tile_rgba(color_surf, xpos, ypos, width, height, cbuf); + } + + for (i = 0; i < width * height; i++) { + for (ch = 0; ch < 4; ch++) { + if (colormask[ch]) { + GLfloat val = abuf[i * 4 + ch] * value; + abuf[i * 4 + ch] = CLAMP(val, 0.0f, 1.0f); + } + else { + abuf[i * 4 + ch] = cbuf[i * 4 + ch]; + } + } + } + + pipe_put_tile_rgba(color_surf, xpos, ypos, width, height, abuf); + + free(abuf); + if (cbuf) + free(cbuf); + pipe_surface_reference(&acc_surf, NULL); + pipe_surface_reference(&color_surf, NULL); +} + + +static void +st_Accum(GLcontext *ctx, GLenum op, GLfloat value) +{ + struct st_context *st = ctx->st; + struct pipe_context *pipe = st->pipe; + struct st_renderbuffer *acc_strb + = st_renderbuffer(ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer); + struct st_renderbuffer *color_strb + = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + + const GLint xpos = ctx->DrawBuffer->_Xmin; + const GLint ypos = ctx->DrawBuffer->_Ymin; + const GLint width = ctx->DrawBuffer->_Xmax - xpos; + const GLint height = ctx->DrawBuffer->_Ymax - ypos; + + /* make sure color bufs aren't cached */ + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + + switch (op) { + case GL_ADD: + if (value != 0.0F) { + accum_mad(ctx, 1.0, value, xpos, ypos, width, height, acc_strb); + } + break; + case GL_MULT: + if (value != 1.0F) { + accum_mad(ctx, value, 0.0, xpos, ypos, width, height, acc_strb); + } + break; + case GL_ACCUM: + if (value != 0.0F) { + accum_accum(pipe, value, xpos, ypos, width, height, acc_strb, color_strb); + } + break; + case GL_LOAD: + accum_load(pipe, value, xpos, ypos, width, height, acc_strb, color_strb); + break; + case GL_RETURN: + accum_return(ctx, value, xpos, ypos, width, height, acc_strb, color_strb); + break; + default: + assert(0); + } +} + + + +void st_init_accum_functions(struct dd_function_table *functions) +{ + functions->Accum = st_Accum; +} diff --git a/src/mesa/state_tracker/st_cb_accum.h b/src/mesa/state_tracker/st_cb_accum.h new file mode 100644 index 0000000000..ed9b7dab94 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_accum.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_CB_ACCUM_H +#define ST_CB_ACCUM_H + + +extern void +st_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb); + +extern void st_init_accum_functions(struct dd_function_table *functions); + + +#endif /* ST_CB_ACCUM_H */ diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c new file mode 100644 index 0000000000..694104f9cf --- /dev/null +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -0,0 +1,791 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Brian Paul + */ + +#include "main/imports.h" +#include "main/image.h" +#include "main/bufferobj.h" +#include "main/macros.h" +#include "main/texformat.h" +#include "shader/program.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "st_context.h" +#include "st_atom.h" +#include "st_atom_constbuf.h" +#include "st_program.h" +#include "st_cb_bitmap.h" +#include "st_cb_program.h" +#include "st_mesa_to_tgsi.h" +#include "st_texture.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" +#include "util/u_draw_quad.h" +#include "util/u_simple_shaders.h" +#include "shader/prog_instruction.h" +#include "cso_cache/cso_context.h" + + + +/** + * glBitmaps are drawn as textured quads. The user's bitmap pattern + * is stored in a texture image. An alpha8 texture format is used. + * The fragment shader samples a bit (texel) from the texture, then + * discards the fragment if the bit is off. + * + * Note that we actually store the inverse image of the bitmap to + * simplify the fragment program. An "on" bit gets stored as texel=0x0 + * and an "off" bit is stored as texel=0xff. Then we kill the + * fragment if the negated texel value is less than zero. + */ + + +/** + * The bitmap cache attempts to accumulate multiple glBitmap calls in a + * buffer which is then rendered en mass upon a flush, state change, etc. + * A wide, short buffer is used to target the common case of a series + * of glBitmap calls being used to draw text. + */ +static GLboolean UseBitmapCache = GL_TRUE; + + +#define BITMAP_CACHE_WIDTH 512 +#define BITMAP_CACHE_HEIGHT 32 + +struct bitmap_cache +{ + /** Window pos to render the cached image */ + GLint xpos, ypos; + /** Bounds of region used in window coords */ + GLint xmin, ymin, xmax, ymax; + + GLfloat color[4]; + + struct pipe_texture *texture; + struct pipe_surface *surf; + + GLboolean empty; + + /** An I8 texture image: */ + ubyte *buffer; +}; + + + + +/** + * Make fragment program for glBitmap: + * Sample the texture and kill the fragment if the bit is 0. + * This program will be combined with the user's fragment program. + */ +static struct st_fragment_program * +make_bitmap_fragment_program(GLcontext *ctx, GLuint samplerIndex) +{ + struct st_fragment_program *stfp; + struct gl_program *p; + GLuint ic = 0; + + p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + if (!p) + return NULL; + + p->NumInstructions = 3; + + p->Instructions = _mesa_alloc_instructions(p->NumInstructions); + if (!p->Instructions) { + ctx->Driver.DeleteProgram(ctx, p); + return NULL; + } + _mesa_init_instructions(p->Instructions, p->NumInstructions); + + /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ + p->Instructions[ic].Opcode = OPCODE_TEX; + p->Instructions[ic].DstReg.File = PROGRAM_TEMPORARY; + p->Instructions[ic].DstReg.Index = 0; + p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; + p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; + p->Instructions[ic].TexSrcUnit = samplerIndex; + p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; + ic++; + + /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ + p->Instructions[ic].Opcode = OPCODE_KIL; + p->Instructions[ic].SrcReg[0].File = PROGRAM_TEMPORARY; + p->Instructions[ic].SrcReg[0].Index = 0; + p->Instructions[ic].SrcReg[0].NegateBase = NEGATE_XYZW; + ic++; + + /* END; */ + p->Instructions[ic++].Opcode = OPCODE_END; + + assert(ic == p->NumInstructions); + + p->InputsRead = FRAG_BIT_TEX0; + p->OutputsWritten = 0x0; + p->SamplersUsed = (1 << samplerIndex); + + stfp = (struct st_fragment_program *) p; + stfp->Base.UsesKill = GL_TRUE; + st_translate_fragment_program(ctx->st, stfp, NULL); + + return stfp; +} + + +static int +find_free_bit(uint bitfield) +{ + int i; + for (i = 0; i < 32; i++) { + if ((bitfield & (1 << i)) == 0) { + return i; + } + } + return -1; +} + + +/** + * Combine basic bitmap fragment program with the user-defined program. + */ +static struct st_fragment_program * +combined_bitmap_fragment_program(GLcontext *ctx) +{ + struct st_context *st = ctx->st; + struct st_fragment_program *stfp = st->fp; + + if (!stfp->bitmap_program) { + /* + * Generate new program which is the user-defined program prefixed + * with the bitmap sampler/kill instructions. + */ + struct st_fragment_program *bitmap_prog; + uint sampler; + + sampler = find_free_bit(st->fp->Base.Base.SamplersUsed); + bitmap_prog = make_bitmap_fragment_program(ctx, sampler); + + stfp->bitmap_program = (struct st_fragment_program *) + _mesa_combine_programs(ctx, + &bitmap_prog->Base.Base, &stfp->Base.Base); + stfp->bitmap_program->bitmap_sampler = sampler; + + /* done with this after combining */ + st_reference_fragprog(st, &bitmap_prog, NULL); + +#if 0 + { + struct gl_program *p = &stfp->bitmap_program->Base.Base; + printf("Combined bitmap program:\n"); + _mesa_print_program(p); + printf("InputsRead: 0x%x\n", p->InputsRead); + printf("OutputsWritten: 0x%x\n", p->OutputsWritten); + _mesa_print_parameter_list(p->Parameters); + } +#endif + + /* translate to TGSI tokens */ + st_translate_fragment_program(st, stfp->bitmap_program, NULL); + } + + /* Ideally we'd have updated the pipe constants during the normal + * st/atom mechanism. But we can't since this is specific to glBitmap. + */ + st_upload_constants(st, stfp->Base.Base.Parameters, PIPE_SHADER_FRAGMENT); + + return stfp->bitmap_program; +} + + +/** + * Copy user-provide bitmap bits into texture buffer, expanding + * bits into texels. + * "On" bits will set texels to 0xff. + * "Off" bits will not modify texels. + * Note that the image is actually going to be upside down in + * the texture. We deal with that with texcoords. + */ +static void +unpack_bitmap(struct st_context *st, + GLint px, GLint py, GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap, + ubyte *destBuffer, uint destStride) +{ + GLint row, col; + +#define SET_PIXEL(COL, ROW) \ + destBuffer[(py + (ROW)) * destStride + px + (COL)] = 0x0; + + for (row = 0; row < height; row++) { + const GLubyte *src = (const GLubyte *) _mesa_image_address2d(unpack, + bitmap, width, height, GL_COLOR_INDEX, GL_BITMAP, row, 0); + + if (unpack->LsbFirst) { + /* Lsb first */ + GLubyte mask = 1U << (unpack->SkipPixels & 0x7); + for (col = 0; col < width; col++) { + + if (*src & mask) { + SET_PIXEL(col, row); + } + + if (mask == 128U) { + src++; + mask = 1U; + } + else { + mask = mask << 1; + } + } + + /* get ready for next row */ + if (mask != 1) + src++; + } + else { + /* Msb first */ + GLubyte mask = 128U >> (unpack->SkipPixels & 0x7); + for (col = 0; col < width; col++) { + + if (*src & mask) { + SET_PIXEL(col, row); + } + + if (mask == 1U) { + src++; + mask = 128U; + } + else { + mask = mask >> 1; + } + } + + /* get ready for next row */ + if (mask != 128) + src++; + } + + } /* row */ + +#undef SET_PIXEL +} + + +/** + * Create a texture which represents a bitmap image. + */ +static struct pipe_texture * +make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_surface *surface; + ubyte *dest; + struct pipe_texture *pt; + + /* PBO source... */ + bitmap = _mesa_map_bitmap_pbo(ctx, unpack, bitmap); + if (!bitmap) { + return NULL; + } + + /** + * Create texture to hold bitmap pattern. + */ + pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, ctx->st->bitmap.tex_format, + 0, width, height, 1, 0, + PIPE_TEXTURE_USAGE_SAMPLER); + if (!pt) { + _mesa_unmap_bitmap_pbo(ctx, unpack); + return NULL; + } + + surface = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + + /* map texture surface */ + dest = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_WRITE); + + /* Put image into texture surface */ + memset(dest, 0xff, height * surface->stride); + unpack_bitmap(ctx->st, 0, 0, width, height, unpack, bitmap, + dest, surface->stride); + + _mesa_unmap_bitmap_pbo(ctx, unpack); + + /* Release surface */ + screen->surface_unmap(screen, surface); + pipe_surface_reference(&surface, NULL); + + return pt; +} + + +static void +setup_bitmap_vertex_data(struct st_context *st, + int x, int y, int width, int height, + float z, const float color[4]) +{ + struct pipe_context *pipe = st->pipe; + const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const GLfloat fb_width = (GLfloat)fb->Width; + const GLfloat fb_height = (GLfloat)fb->Height; + const GLfloat x0 = (GLfloat)x; + const GLfloat x1 = (GLfloat)(x + width); + const GLfloat y0 = (GLfloat)y; + const GLfloat y1 = (GLfloat)(y + height); + const GLfloat sLeft = (GLfloat)0.0, sRight = (GLfloat)1.0; + const GLfloat tTop = (GLfloat)0.0, tBot = (GLfloat)1.0 - tTop; + const GLfloat clip_x0 = (GLfloat)(x0 / fb_width * 2.0 - 1.0); + const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0); + const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0); + const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0); + GLuint i; + void *buf; + + if (!st->bitmap.vbuf) { + st->bitmap.vbuf = pipe_buffer_create(pipe->screen, 32, PIPE_BUFFER_USAGE_VERTEX, + sizeof(st->bitmap.vertices)); + } + + /* Positions are in clip coords since we need to do clipping in case + * the bitmap quad goes beyond the window bounds. + */ + st->bitmap.vertices[0][0][0] = clip_x0; + st->bitmap.vertices[0][0][1] = clip_y0; + st->bitmap.vertices[0][2][0] = sLeft; + st->bitmap.vertices[0][2][1] = tTop; + + st->bitmap.vertices[1][0][0] = clip_x1; + st->bitmap.vertices[1][0][1] = clip_y0; + st->bitmap.vertices[1][2][0] = sRight; + st->bitmap.vertices[1][2][1] = tTop; + + st->bitmap.vertices[2][0][0] = clip_x1; + st->bitmap.vertices[2][0][1] = clip_y1; + st->bitmap.vertices[2][2][0] = sRight; + st->bitmap.vertices[2][2][1] = tBot; + + st->bitmap.vertices[3][0][0] = clip_x0; + st->bitmap.vertices[3][0][1] = clip_y1; + st->bitmap.vertices[3][2][0] = sLeft; + st->bitmap.vertices[3][2][1] = tBot; + + /* same for all verts: */ + for (i = 0; i < 4; i++) { + st->bitmap.vertices[i][0][2] = z; + st->bitmap.vertices[i][0][3] = 1.0; + st->bitmap.vertices[i][1][0] = color[0]; + st->bitmap.vertices[i][1][1] = color[1]; + st->bitmap.vertices[i][1][2] = color[2]; + st->bitmap.vertices[i][1][3] = color[3]; + st->bitmap.vertices[i][2][2] = 0.0; /*R*/ + st->bitmap.vertices[i][2][3] = 1.0; /*Q*/ + } + + /* put vertex data into vbuf */ + buf = pipe_buffer_map(pipe->screen, st->bitmap.vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); + memcpy(buf, st->bitmap.vertices, sizeof(st->bitmap.vertices)); + pipe_buffer_unmap(pipe->screen, st->bitmap.vbuf); +} + + + +/** + * Render a glBitmap by drawing a textured quad + */ +static void +draw_bitmap_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, + GLsizei width, GLsizei height, + struct pipe_texture *pt, + const GLfloat *color) +{ + struct st_context *st = ctx->st; + struct pipe_context *pipe = ctx->st->pipe; + struct cso_context *cso = ctx->st->cso_context; + struct st_fragment_program *stfp; + GLuint maxSize; + + stfp = combined_bitmap_fragment_program(ctx); + + /* limit checks */ + /* XXX if the bitmap is larger than the max texture size, break + * it up into chunks. + */ + maxSize = 1 << (pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1); + assert(width <= (GLsizei)maxSize); + assert(height <= (GLsizei)maxSize); + + cso_save_rasterizer(cso); + cso_save_samplers(cso); + cso_save_sampler_textures(cso); + cso_save_viewport(cso); + cso_save_fragment_shader(cso); + cso_save_vertex_shader(cso); + + /* rasterizer state: just scissor */ + st->bitmap.rasterizer.scissor = ctx->Scissor.Enabled; + cso_set_rasterizer(cso, &st->bitmap.rasterizer); + + /* fragment shader state: TEX lookup program */ + cso_set_fragment_shader_handle(cso, stfp->driver_shader); + + /* vertex shader state: position + texcoord pass-through */ + cso_set_vertex_shader_handle(cso, st->bitmap.vs); + + /* user samplers, plus our bitmap sampler */ + { + struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; + uint num = MAX2(stfp->bitmap_sampler + 1, st->state.num_samplers); + uint i; + for (i = 0; i < st->state.num_samplers; i++) { + samplers[i] = &st->state.samplers[i]; + } + samplers[stfp->bitmap_sampler] = &st->bitmap.sampler; + cso_set_samplers(cso, num, (const struct pipe_sampler_state **) samplers); } + + /* user textures, plus the bitmap texture */ + { + struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; + uint num = MAX2(stfp->bitmap_sampler + 1, st->state.num_textures); + memcpy(textures, st->state.sampler_texture, sizeof(textures)); + textures[stfp->bitmap_sampler] = pt; + cso_set_sampler_textures(cso, num, textures); + } + + /* viewport state: viewport matching window dims */ + { + const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const GLboolean invert = (st_fb_orientation(fb) == Y_0_TOP); + const GLfloat width = (GLfloat)fb->Width; + const GLfloat height = (GLfloat)fb->Height; + struct pipe_viewport_state vp; + vp.scale[0] = 0.5f * width; + vp.scale[1] = height * (invert ? -0.5f : 0.5f); + vp.scale[2] = 1.0f; + vp.scale[3] = 1.0f; + vp.translate[0] = 0.5f * width; + vp.translate[1] = 0.5f * height; + vp.translate[2] = 0.0f; + vp.translate[3] = 0.0f; + cso_set_viewport(cso, &vp); + } + + /* draw textured quad */ + setup_bitmap_vertex_data(st, x, y, width, height, + ctx->Current.RasterPos[2], + color); + + util_draw_vertex_buffer(pipe, st->bitmap.vbuf, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 3); /* attribs/vert */ + + + /* restore state */ + cso_restore_rasterizer(cso); + cso_restore_samplers(cso); + cso_restore_sampler_textures(cso); + cso_restore_viewport(cso); + cso_restore_fragment_shader(cso); + cso_restore_vertex_shader(cso); +} + + +static void +reset_cache(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + struct bitmap_cache *cache = st->bitmap.cache; + + //memset(cache->buffer, 0xff, sizeof(cache->buffer)); + cache->empty = GL_TRUE; + + cache->xmin = 1000000; + cache->xmax = -1000000; + cache->ymin = 1000000; + cache->ymax = -1000000; + + if (cache->surf) + screen->tex_surface_release(screen, &cache->surf); + + assert(!cache->texture); + + /* allocate a new texture */ + cache->texture = st_texture_create(st, PIPE_TEXTURE_2D, + st->bitmap.tex_format, 0, + BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT, + 1, 0, + PIPE_TEXTURE_USAGE_SAMPLER); + + /* Map the texture surface. + * Subsequent glBitmap calls will write into the texture image. + */ + cache->surf = screen->get_tex_surface(screen, cache->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + cache->buffer = screen->surface_map(screen, cache->surf, + PIPE_BUFFER_USAGE_CPU_WRITE); + + /* init image to all 0xff */ + memset(cache->buffer, 0xff, BITMAP_CACHE_WIDTH * BITMAP_CACHE_HEIGHT); +} + + +/** + * If there's anything in the bitmap cache, draw/flush it now. + */ +void +st_flush_bitmap_cache(struct st_context *st) +{ + if (!st->bitmap.cache->empty) { + struct bitmap_cache *cache = st->bitmap.cache; + + if (st->ctx->DrawBuffer) { + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + + assert(cache->xmin <= cache->xmax); + /* + printf("flush size %d x %d at %d, %d\n", + cache->xmax - cache->xmin, + cache->ymax - cache->ymin, + cache->xpos, cache->ypos); + */ + + /* The texture surface has been mapped until now. + * So unmap and release the texture surface before drawing. + */ + screen->surface_unmap(screen, cache->surf); + cache->buffer = NULL; + + screen->tex_surface_release(screen, &cache->surf); + + draw_bitmap_quad(st->ctx, + cache->xpos, + cache->ypos, + st->ctx->Current.RasterPos[2], + BITMAP_CACHE_WIDTH, BITMAP_CACHE_HEIGHT, + cache->texture, + cache->color); + } + + /* release/free the texture */ + pipe_texture_reference(&cache->texture, NULL); + + reset_cache(st); + } +} + + +/** + * Try to accumulate this glBitmap call in the bitmap cache. + * \return GL_TRUE for success, GL_FALSE if bitmap is too large, etc. + */ +static GLboolean +accum_bitmap(struct st_context *st, + GLint x, GLint y, GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, + const GLubyte *bitmap ) +{ + struct bitmap_cache *cache = st->bitmap.cache; + int px = -999, py; + + if (width > BITMAP_CACHE_WIDTH || + height > BITMAP_CACHE_HEIGHT) + return GL_FALSE; /* too big to cache */ + + if (!cache->empty) { + px = x - cache->xpos; /* pos in buffer */ + py = y - cache->ypos; + if (px < 0 || px + width > BITMAP_CACHE_WIDTH || + py < 0 || py + height > BITMAP_CACHE_HEIGHT || + !TEST_EQ_4V(st->ctx->Current.RasterColor, cache->color)) { + /* This bitmap would extend beyond cache bounds, or the bitmap + * color is changing + * so flush and continue. + */ + st_flush_bitmap_cache(st); + } + } + + if (cache->empty) { + /* Initialize. Center bitmap vertically in the buffer. */ + px = 0; + py = (BITMAP_CACHE_HEIGHT - height) / 2; + cache->xpos = x; + cache->ypos = y - py; + cache->empty = GL_FALSE; + COPY_4FV(cache->color, st->ctx->Current.RasterColor); + } + + assert(px != -999); + + if (x < cache->xmin) + cache->xmin = x; + if (y < cache->ymin) + cache->ymin = y; + if (x + width > cache->xmax) + cache->xmax = x + width; + if (y + height > cache->ymax) + cache->ymax = y + height; + + unpack_bitmap(st, px, py, width, height, unpack, bitmap, + cache->buffer, BITMAP_CACHE_WIDTH); + + return GL_TRUE; /* accumulated */ +} + + + +/** + * Called via ctx->Driver.Bitmap() + */ +static void +st_Bitmap(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, + const struct gl_pixelstore_attrib *unpack, const GLubyte *bitmap ) +{ + struct st_context *st = ctx->st; + struct pipe_texture *pt; + + if (width == 0 || height == 0) + return; + + st_validate_state(st); + + if (!st->bitmap.vs) { + /* create pass-through vertex shader now */ + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_COLOR, + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indexes[] = { 0, 0, 0 }; + st->bitmap.vs = util_make_vertex_passthrough_shader(st->pipe, 3, + semantic_names, + semantic_indexes, + &st->bitmap.vert_shader); + } + + if (UseBitmapCache && accum_bitmap(st, x, y, width, height, unpack, bitmap)) + return; + + pt = make_bitmap_texture(ctx, width, height, unpack, bitmap); + if (pt) { + assert(pt->target == PIPE_TEXTURE_2D); + draw_bitmap_quad(ctx, x, y, ctx->Current.RasterPos[2], + width, height, pt, + st->ctx->Current.RasterColor); + /* release/free the texture */ + pipe_texture_reference(&pt, NULL); + } +} + + +/** Per-context init */ +void +st_init_bitmap_functions(struct dd_function_table *functions) +{ + functions->Bitmap = st_Bitmap; +} + + +/** Per-context init */ +void +st_init_bitmap(struct st_context *st) +{ + struct pipe_sampler_state *sampler = &st->bitmap.sampler; + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + + /* init sampler state once */ + memset(sampler, 0, sizeof(*sampler)); + sampler->wrap_s = PIPE_TEX_WRAP_CLAMP; + sampler->wrap_t = PIPE_TEX_WRAP_CLAMP; + sampler->wrap_r = PIPE_TEX_WRAP_CLAMP; + sampler->min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler->min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler->mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler->normalized_coords = 1; + + /* init baseline rasterizer state once */ + memset(&st->bitmap.rasterizer, 0, sizeof(st->bitmap.rasterizer)); + st->bitmap.rasterizer.gl_rasterization_rules = 1; + st->bitmap.rasterizer.bypass_vs = 1; + + /* find a usable texture format */ + if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { + st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM; + } + else { + /* XXX support more formats */ + assert(0); + } + + /* alloc bitmap cache object */ + st->bitmap.cache = CALLOC_STRUCT(bitmap_cache); + + reset_cache(st); +} + + +/** Per-context tear-down */ +void +st_destroy_bitmap(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + struct bitmap_cache *cache = st->bitmap.cache; + + screen->surface_unmap(screen, cache->surf); + screen->tex_surface_release(screen, &cache->surf); + + if (st->bitmap.vs) { + cso_delete_vertex_shader(st->cso_context, st->bitmap.vs); + st->bitmap.vs = NULL; + } + + if (st->bitmap.vbuf) { + pipe_buffer_destroy(pipe->screen, st->bitmap.vbuf); + st->bitmap.vbuf = NULL; + } + + if (st->bitmap.cache) { + pipe_texture_release(&st->bitmap.cache->texture); + FREE(st->bitmap.cache); + st->bitmap.cache = NULL; + } +} diff --git a/src/mesa/state_tracker/st_cb_bitmap.h b/src/mesa/state_tracker/st_cb_bitmap.h new file mode 100644 index 0000000000..aae11d34c9 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_bitmap.h @@ -0,0 +1,46 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_CB_BITMAP_H +#define ST_CB_BITMAP_H + + +extern void +st_init_bitmap_functions(struct dd_function_table *functions); + +extern void +st_init_bitmap(struct st_context *st); + +extern void +st_destroy_bitmap(struct st_context *st); + +extern void +st_flush_bitmap_cache(struct st_context *st); + + +#endif /* ST_CB_BITMAP_H */ diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c new file mode 100644 index 0000000000..2852623472 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -0,0 +1,111 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Brian Paul + */ + +#include "main/imports.h" +#include "main/image.h" +#include "main/macros.h" +#include "main/texformat.h" +#include "shader/program.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "st_context.h" +#include "st_program.h" +#include "st_cb_blit.h" +#include "st_cb_fbo.h" + +#include "util/u_blit.h" + +#include "cso_cache/cso_context.h" + + +void +st_init_blit(struct st_context *st) +{ + st->blit = util_create_blit(st->pipe, st->cso_context); +} + + +void +st_destroy_blit(struct st_context *st) +{ + util_destroy_blit(st->blit); + st->blit = NULL; +} + + +static void +st_BlitFramebuffer(GLcontext *ctx, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ + struct st_context *st = ctx->st; + + const uint pFilter = ((filter == GL_NEAREST) + ? PIPE_TEX_MIPFILTER_NEAREST + : PIPE_TEX_MIPFILTER_LINEAR); + + if (mask & GL_COLOR_BUFFER_BIT) { + struct st_renderbuffer *srcRb = + st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + struct st_renderbuffer *dstRb = + st_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[0]); + struct pipe_surface *srcSurf = srcRb->surface; + struct pipe_surface *dstSurf = dstRb->surface; + + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + /* invert Y */ + srcY0 = srcRb->Base.Height - srcY0; + srcY1 = srcRb->Base.Height - srcY1; + + dstY0 = dstRb->Base.Height - dstY0; + dstY1 = dstRb->Base.Height - dstY1; + } + + util_blit_pixels(st->blit, + srcSurf, srcX0, srcY0, srcX1, srcY1, + dstSurf, dstX0, dstY0, dstX1, dstY1, + 0.0, pFilter); + + } +} + + + +void +st_init_blit_functions(struct dd_function_table *functions) +{ +#if FEATURE_EXT_framebuffer_blit + functions->BlitFramebuffer = st_BlitFramebuffer; +#endif +} diff --git a/src/mesa/state_tracker/st_cb_blit.h b/src/mesa/state_tracker/st_cb_blit.h new file mode 100644 index 0000000000..ed22986b53 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_blit.h @@ -0,0 +1,46 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef ST_CB_BLIT_H +#define ST_CB_BLIT_H + + +#include "st_context.h" + + + +extern void +st_init_blit(struct st_context *st); + +extern void +st_destroy_blit(struct st_context *st); + +extern void +st_init_blit_functions(struct dd_function_table *functions); + + +#endif /* ST_CB_BLIT_H */ diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c new file mode 100644 index 0000000000..07fa2afce0 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -0,0 +1,240 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/imports.h" +#include "main/mtypes.h" +#include "main/bufferobj.h" + +#include "st_context.h" +#include "st_cb_bufferobjects.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + + + +/* Pixel buffers and Vertex/index buffers are handled through these + * mesa callbacks. Framebuffer/Renderbuffer objects are + * created/managed elsewhere. + */ + + + +/** + * There is some duplication between mesa's bufferobjects and our + * bufmgr buffers. Both have an integer handle and a hashtable to + * lookup an opaque structure. It would be nice if the handles and + * internal structure where somehow shared. + */ +static struct gl_buffer_object * +st_bufferobj_alloc(GLcontext *ctx, GLuint name, GLenum target) +{ + struct st_buffer_object *st_obj = CALLOC_STRUCT(st_buffer_object); + + if (!st_obj) + return NULL; + + _mesa_initialize_buffer_object(&st_obj->Base, name, target); + + return &st_obj->Base; +} + + + +/** + * Deallocate/free a vertex/pixel buffer object. + * Called via glDeleteBuffersARB(). + */ +static void +st_bufferobj_free(GLcontext *ctx, struct gl_buffer_object *obj) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + + if (st_obj->buffer) + pipe_buffer_reference(pipe->screen, &st_obj->buffer, NULL); + + free(st_obj); +} + + + +/** + * Replace data in a subrange of buffer object. If the data range + * specified by size + offset extends beyond the end of the buffer or + * if data is NULL, no copy is performed. + * Called via glBufferSubDataARB(). + */ +static void +st_bufferobj_subdata(GLcontext *ctx, + GLenum target, + GLintptrARB offset, + GLsizeiptrARB size, + const GLvoid * data, struct gl_buffer_object *obj) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + char *map; + + if (offset >= st_obj->size || size > (st_obj->size - offset)) + return; + + map = pipe_buffer_map(pipe->screen, st_obj->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + memcpy(map + offset, data, size); + pipe_buffer_unmap(pipe->screen, st_obj->buffer); +} + + +/** + * Called via glGetBufferSubDataARB(). + */ +static void +st_bufferobj_get_subdata(GLcontext *ctx, + GLenum target, + GLintptrARB offset, + GLsizeiptrARB size, + GLvoid * data, struct gl_buffer_object *obj) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + char *map; + + if (offset >= st_obj->size || size > (st_obj->size - offset)) + return; + + map = pipe_buffer_map(pipe->screen, st_obj->buffer, PIPE_BUFFER_USAGE_CPU_READ); + memcpy(data, map + offset, size); + pipe_buffer_unmap(pipe->screen, st_obj->buffer); +} + + +/** + * Allocate space for and store data in a buffer object. Any data that was + * previously stored in the buffer object is lost. If data is NULL, + * memory will be allocated, but no copy will occur. + * Called via glBufferDataARB(). + */ +static void +st_bufferobj_data(GLcontext *ctx, + GLenum target, + GLsizeiptrARB size, + const GLvoid * data, + GLenum usage, + struct gl_buffer_object *obj) +{ + struct st_context *st = st_context(ctx); + struct pipe_context *pipe = st->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + unsigned buffer_usage; + + st_obj->Base.Size = size; + st_obj->Base.Usage = usage; + + switch(target) { + case GL_PIXEL_PACK_BUFFER_ARB: + case GL_PIXEL_UNPACK_BUFFER_ARB: + buffer_usage = PIPE_BUFFER_USAGE_PIXEL; + break; + case GL_ARRAY_BUFFER_ARB: + buffer_usage = PIPE_BUFFER_USAGE_VERTEX; + break; + case GL_ELEMENT_ARRAY_BUFFER_ARB: + buffer_usage = PIPE_BUFFER_USAGE_INDEX; + break; + default: + buffer_usage = 0; + } + + pipe_buffer_reference( pipe->screen, &st_obj->buffer, NULL ); + + st_obj->buffer = pipe_buffer_create( pipe->screen, 32, buffer_usage, size ); + + st_obj->size = size; + + if (data) + st_bufferobj_subdata(ctx, target, 0, size, data, obj); +} + + +/** + * Called via glMapBufferARB(). + */ +static void * +st_bufferobj_map(GLcontext *ctx, GLenum target, GLenum access, + struct gl_buffer_object *obj) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + GLuint flags; + + switch (access) { + case GL_WRITE_ONLY: + flags = PIPE_BUFFER_USAGE_CPU_WRITE; + break; + case GL_READ_ONLY: + flags = PIPE_BUFFER_USAGE_CPU_READ; + break; + case GL_READ_WRITE: + /* fall-through */ + default: + flags = PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE; + break; + } + + obj->Pointer = pipe_buffer_map(pipe->screen, st_obj->buffer, flags); + return obj->Pointer; +} + + +/** + * Called via glMapBufferARB(). + */ +static GLboolean +st_bufferobj_unmap(GLcontext *ctx, GLenum target, struct gl_buffer_object *obj) +{ + struct pipe_context *pipe = st_context(ctx)->pipe; + struct st_buffer_object *st_obj = st_buffer_object(obj); + + pipe_buffer_unmap(pipe->screen, st_obj->buffer); + obj->Pointer = NULL; + return GL_TRUE; +} + + +void +st_init_bufferobject_functions(struct dd_function_table *functions) +{ + functions->NewBufferObject = st_bufferobj_alloc; + functions->DeleteBuffer = st_bufferobj_free; + functions->BufferData = st_bufferobj_data; + functions->BufferSubData = st_bufferobj_subdata; + functions->GetBufferSubData = st_bufferobj_get_subdata; + functions->MapBuffer = st_bufferobj_map; + functions->UnmapBuffer = st_bufferobj_unmap; +} diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.h b/src/mesa/state_tracker/st_cb_bufferobjects.h new file mode 100644 index 0000000000..dcbb5a5233 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_bufferobjects.h @@ -0,0 +1,66 @@ +/************************************************************************** + * + * Copyright 2005 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef ST_CB_BUFFEROBJECTS_H +#define ST_CB_BUFFEROBJECTS_H + +struct st_context; +struct gl_buffer_object; +struct pipe_buffer; + +/** + * State_tracker vertex/pixel buffer object, derived from Mesa's + * gl_buffer_object. + */ +struct st_buffer_object +{ + struct gl_buffer_object Base; + struct pipe_buffer *buffer; + GLsizeiptrARB size; +}; + + +/* Are the obj->Name tests necessary? Unfortunately yes, mesa + * allocates a couple of gl_buffer_object structs statically, and the + * Name == 0 test is the only way to identify them and avoid casting + * them erroneously to our structs. + */ +static INLINE struct st_buffer_object * +st_buffer_object(struct gl_buffer_object *obj) +{ + if (obj->Name) + return (struct st_buffer_object *) obj; + else + return NULL; +} + + +extern void +st_init_bufferobject_functions(struct dd_function_table *functions); + + +#endif diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c new file mode 100644 index 0000000000..47ad3c2bc1 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -0,0 +1,565 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "shader/prog_instruction.h" +#include "st_context.h" +#include "st_atom.h" +#include "st_cb_accum.h" +#include "st_cb_clear.h" +#include "st_cb_fbo.h" +#include "st_draw.h" +#include "st_program.h" +#include "st_public.h" +#include "st_mesa_to_tgsi.h" + +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "util/u_pack_color.h" +#include "util/u_simple_shaders.h" +#include "util/u_draw_quad.h" + +#include "cso_cache/cso_context.h" + + +void +st_init_clear(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + + /* rasterizer state: bypass clipping */ + memset(&st->clear.raster, 0, sizeof(st->clear.raster)); + st->clear.raster.gl_rasterization_rules = 1; + st->clear.raster.bypass_clipping = 1; + + /* viewport state: identity since we're drawing in window coords */ + st->clear.viewport.scale[0] = 1.0; + st->clear.viewport.scale[1] = 1.0; + st->clear.viewport.scale[2] = 1.0; + st->clear.viewport.scale[3] = 1.0; + st->clear.viewport.translate[0] = 0.0; + st->clear.viewport.translate[1] = 0.0; + st->clear.viewport.translate[2] = 0.0; + st->clear.viewport.translate[3] = 0.0; + + /* fragment shader state: color pass-through program */ + st->clear.fs = + util_make_fragment_passthrough_shader(pipe, &st->clear.frag_shader); + + /* vertex shader state: color/position pass-through */ + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_COLOR }; + const uint semantic_indexes[] = { 0, 0 }; + st->clear.vs = util_make_vertex_passthrough_shader(pipe, 2, + semantic_names, + semantic_indexes, + &st->clear.vert_shader); + } +} + + +void +st_destroy_clear(struct st_context *st) +{ + struct pipe_context *pipe = st->pipe; + + if (st->clear.vert_shader.tokens) { + FREE((void *) st->clear.vert_shader.tokens); + st->clear.vert_shader.tokens = NULL; + } + + if (st->clear.frag_shader.tokens) { + FREE((void *) st->clear.frag_shader.tokens); + st->clear.frag_shader.tokens = NULL; + } + + if (st->clear.fs) { + cso_delete_fragment_shader(st->cso_context, st->clear.fs); + st->clear.fs = NULL; + } + if (st->clear.vs) { + cso_delete_vertex_shader(st->cso_context, st->clear.vs); + st->clear.vs = NULL; + } + if (st->clear.vbuf) { + pipe_buffer_destroy(pipe->screen, st->clear.vbuf); + st->clear.vbuf = NULL; + } +} + + +static GLboolean +is_depth_stencil_format(enum pipe_format pipeFormat) +{ + switch (pipeFormat) { + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return GL_TRUE; + default: + return GL_FALSE; + } +} + + + +/** + * Draw a screen-aligned quadrilateral. + * Coords are window coords with y=0=bottom. These coords will be transformed + * by the vertex shader and viewport transform (which will flip Y if needed). + */ +static void +draw_quad(GLcontext *ctx, + float x0, float y0, float x1, float y1, GLfloat z, + const GLfloat color[4]) +{ + struct st_context *st = ctx->st; + struct pipe_context *pipe = st->pipe; + GLuint i; + void *buf; + + if (!st->clear.vbuf) { + st->clear.vbuf = pipe_buffer_create(pipe->screen, 32, PIPE_BUFFER_USAGE_VERTEX, + sizeof(st->clear.vertices)); + } + + /* positions */ + st->clear.vertices[0][0][0] = x0; + st->clear.vertices[0][0][1] = y0; + + st->clear.vertices[1][0][0] = x1; + st->clear.vertices[1][0][1] = y0; + + st->clear.vertices[2][0][0] = x1; + st->clear.vertices[2][0][1] = y1; + + st->clear.vertices[3][0][0] = x0; + st->clear.vertices[3][0][1] = y1; + + /* same for all verts: */ + for (i = 0; i < 4; i++) { + st->clear.vertices[i][0][2] = z; + st->clear.vertices[i][0][3] = 1.0; + st->clear.vertices[i][1][0] = color[0]; + st->clear.vertices[i][1][1] = color[1]; + st->clear.vertices[i][1][2] = color[2]; + st->clear.vertices[i][1][3] = color[3]; + } + + /* put vertex data into vbuf */ + buf = pipe_buffer_map(pipe->screen, st->clear.vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); + memcpy(buf, st->clear.vertices, sizeof(st->clear.vertices)); + pipe_buffer_unmap(pipe->screen, st->clear.vbuf); + + /* draw */ + util_draw_vertex_buffer(pipe, st->clear.vbuf, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ +} + + + +/** + * Do glClear by drawing a quadrilateral. + * The vertices of the quad will be computed from the + * ctx->DrawBuffer->_X/Ymin/max fields. + */ +static void +clear_with_quad(GLcontext *ctx, + GLboolean color, GLboolean depth, GLboolean stencil) +{ + struct st_context *st = ctx->st; + const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin; + const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax; + GLfloat y0, y1; + + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + y0 = (GLfloat) (ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax); + y1 = (GLfloat) (ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin); + } + else { + y0 = (GLfloat) ctx->DrawBuffer->_Ymin; + y1 = (GLfloat) ctx->DrawBuffer->_Ymax; + } + + /* + printf("%s %s%s%s %f,%f %f,%f\n", __FUNCTION__, + color ? "color, " : "", + depth ? "depth, " : "", + stencil ? "stencil" : "", + x0, y0, + x1, y1); + */ + + cso_save_blend(st->cso_context); + cso_save_depth_stencil_alpha(st->cso_context); + cso_save_rasterizer(st->cso_context); + cso_save_viewport(st->cso_context); + cso_save_fragment_shader(st->cso_context); + cso_save_vertex_shader(st->cso_context); + + /* blend state: RGBA masking */ + { + struct pipe_blend_state blend; + memset(&blend, 0, sizeof(blend)); + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + if (color) { + if (ctx->Color.ColorMask[0]) + blend.colormask |= PIPE_MASK_R; + if (ctx->Color.ColorMask[1]) + blend.colormask |= PIPE_MASK_G; + if (ctx->Color.ColorMask[2]) + blend.colormask |= PIPE_MASK_B; + if (ctx->Color.ColorMask[3]) + blend.colormask |= PIPE_MASK_A; + if (st->ctx->Color.DitherFlag) + blend.dither = 1; + } + cso_set_blend(st->cso_context, &blend); + } + + /* depth_stencil state: always pass/set to ref value */ + { + struct pipe_depth_stencil_alpha_state depth_stencil; + memset(&depth_stencil, 0, sizeof(depth_stencil)); + if (depth) { + depth_stencil.depth.enabled = 1; + depth_stencil.depth.writemask = 1; + depth_stencil.depth.func = PIPE_FUNC_ALWAYS; + } + + if (stencil) { + depth_stencil.stencil[0].enabled = 1; + depth_stencil.stencil[0].func = PIPE_FUNC_ALWAYS; + depth_stencil.stencil[0].fail_op = PIPE_STENCIL_OP_REPLACE; + depth_stencil.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE; + depth_stencil.stencil[0].zfail_op = PIPE_STENCIL_OP_REPLACE; + depth_stencil.stencil[0].ref_value = ctx->Stencil.Clear; + depth_stencil.stencil[0].value_mask = 0xff; + depth_stencil.stencil[0].write_mask = ctx->Stencil.WriteMask[0] & 0xff; + } + + cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil); + } + + cso_set_rasterizer(st->cso_context, &st->clear.raster); + cso_set_viewport(st->cso_context, &st->clear.viewport); + + cso_set_fragment_shader_handle(st->cso_context, st->clear.fs); + cso_set_vertex_shader_handle(st->cso_context, st->clear.vs); + + /* draw quad matching scissor rect (XXX verify coord round-off) */ + draw_quad(ctx, x0, y0, x1, y1, (GLfloat) ctx->Depth.Clear, ctx->Color.ClearColor); + + /* Restore pipe state */ + cso_restore_blend(st->cso_context); + cso_restore_depth_stencil_alpha(st->cso_context); + cso_restore_rasterizer(st->cso_context); + cso_restore_viewport(st->cso_context); + cso_restore_fragment_shader(st->cso_context); + cso_restore_vertex_shader(st->cso_context); +} + + +/** + * Determine if we need to clear the depth buffer by drawing a quad. + */ +static INLINE GLboolean +check_clear_color_with_quad(GLcontext *ctx, struct gl_renderbuffer *rb) +{ + const struct st_renderbuffer *strb = st_renderbuffer(rb); + + if (strb->surface->status == PIPE_SURFACE_STATUS_UNDEFINED) + return FALSE; + + if (ctx->Scissor.Enabled) + return TRUE; + + if (!ctx->Color.ColorMask[0] || + !ctx->Color.ColorMask[1] || + !ctx->Color.ColorMask[2] || + !ctx->Color.ColorMask[3]) + return TRUE; + + return FALSE; +} + + +static INLINE GLboolean +check_clear_depth_stencil_with_quad(GLcontext *ctx, struct gl_renderbuffer *rb) +{ + const struct st_renderbuffer *strb = st_renderbuffer(rb); + const GLuint stencilMax = (1 << rb->StencilBits) - 1; + GLboolean maskStencil + = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax; + + if (strb->surface->status == PIPE_SURFACE_STATUS_UNDEFINED) + return FALSE; + + if (ctx->Scissor.Enabled) + return TRUE; + + if (maskStencil) + return TRUE; + + return FALSE; +} + + +/** + * Determine if we need to clear the depth buffer by drawing a quad. + */ +static INLINE GLboolean +check_clear_depth_with_quad(GLcontext *ctx, struct gl_renderbuffer *rb) +{ + const struct st_renderbuffer *strb = st_renderbuffer(rb); + const GLboolean isDS = is_depth_stencil_format(strb->surface->format); + + if (strb->surface->status == PIPE_SURFACE_STATUS_UNDEFINED) + return FALSE; + + if (ctx->Scissor.Enabled) + return TRUE; + + if (isDS && + strb->surface->status == PIPE_SURFACE_STATUS_DEFINED && + ctx->DrawBuffer->Visual.stencilBits > 0) + return TRUE; + + return FALSE; +} + + +/** + * Determine if we need to clear the stencil buffer by drawing a quad. + */ +static INLINE GLboolean +check_clear_stencil_with_quad(GLcontext *ctx, struct gl_renderbuffer *rb) +{ + const struct st_renderbuffer *strb = st_renderbuffer(rb); + const GLboolean isDS = is_depth_stencil_format(strb->surface->format); + const GLuint stencilMax = (1 << rb->StencilBits) - 1; + const GLboolean maskStencil + = (ctx->Stencil.WriteMask[0] & stencilMax) != stencilMax; + + if (strb->surface->status == PIPE_SURFACE_STATUS_UNDEFINED) + return FALSE; + + if (maskStencil) + return TRUE; + + if (ctx->Scissor.Enabled) + return TRUE; + + /* This is correct, but it is necessary to look at the depth clear + * value held in the surface when it comes time to issue the clear, + * rather than taking depth and stencil clear values from the + * current state. + */ + if (isDS && + strb->surface->status == PIPE_SURFACE_STATUS_DEFINED && + ctx->DrawBuffer->Visual.depthBits > 0) + return TRUE; + + return FALSE; +} + + + +static void +clear_color_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) +{ + if (check_clear_color_with_quad( ctx, rb )) { + /* masking or scissoring */ + clear_with_quad(ctx, GL_TRUE, GL_FALSE, GL_FALSE); + } + else { + /* clear whole buffer w/out masking */ + struct st_renderbuffer *strb = st_renderbuffer(rb); + uint clearValue; + /* NOTE: we always pass the clear color as PIPE_FORMAT_A8R8G8B8_UNORM + * at this time! + */ + util_pack_color(ctx->Color.ClearColor, PIPE_FORMAT_A8R8G8B8_UNORM, &clearValue); + ctx->st->pipe->clear(ctx->st->pipe, strb->surface, clearValue); + } +} + + +static void +clear_depth_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) +{ + if (check_clear_depth_with_quad(ctx, rb)) { + /* scissoring or we have a combined depth/stencil buffer */ + clear_with_quad(ctx, GL_FALSE, GL_TRUE, GL_FALSE); + } + else { + struct st_renderbuffer *strb = st_renderbuffer(rb); + + /* simple clear of whole buffer */ + uint clearValue = util_pack_z(strb->surface->format, ctx->Depth.Clear); + ctx->st->pipe->clear(ctx->st->pipe, strb->surface, clearValue); + } +} + + +static void +clear_stencil_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) +{ + if (check_clear_stencil_with_quad(ctx, rb)) { + /* masking or scissoring or combined depth/stencil buffer */ + clear_with_quad(ctx, GL_FALSE, GL_FALSE, GL_TRUE); + } + else { + struct st_renderbuffer *strb = st_renderbuffer(rb); + + /* simple clear of whole buffer */ + GLuint clearValue = ctx->Stencil.Clear; + + switch (strb->surface->format) { + case PIPE_FORMAT_S8Z24_UNORM: + clearValue <<= 24; + break; + default: + ; /* no-op, stencil value is in least significant bits */ + } + + ctx->st->pipe->clear(ctx->st->pipe, strb->surface, clearValue); + } +} + + +static void +clear_depth_stencil_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) +{ + + if (check_clear_depth_stencil_with_quad(ctx, rb)) { + /* masking or scissoring */ + clear_with_quad(ctx, GL_FALSE, GL_TRUE, GL_TRUE); + } + else { + struct st_renderbuffer *strb = st_renderbuffer(rb); + + /* clear whole buffer w/out masking */ + GLuint clearValue = util_pack_z(strb->surface->format, ctx->Depth.Clear); + + switch (strb->surface->format) { + case PIPE_FORMAT_S8Z24_UNORM: + clearValue |= ctx->Stencil.Clear << 24; + break; + case PIPE_FORMAT_Z24S8_UNORM: + clearValue |= ctx->Stencil.Clear; + break; + default: + assert(0); + } + + ctx->st->pipe->clear(ctx->st->pipe, strb->surface, clearValue); + } +} + + + +/** + * Called via ctx->Driver.Clear() + * XXX: doesn't pick up the differences between front/back/left/right + * clears. Need to sort that out... + */ +static void st_clear(GLcontext *ctx, GLbitfield mask) +{ + static const GLbitfield BUFFER_BITS_DS + = (BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); + struct st_context *st = ctx->st; + struct gl_renderbuffer *depthRb + = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer; + struct gl_renderbuffer *stencilRb + = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer; + GLbitfield cmask = mask & BUFFER_BITS_COLOR; + + /* This makes sure the softpipe has the latest scissor, etc values */ + st_validate_state( st ); + + /* + * XXX TO-DO: + * If we're going to use clear_with_quad() for any reason, use it to + * clear as many other buffers as possible. + * As it is now, we sometimes call clear_with_quad() three times to clear + * color/depth/stencil individually... + */ + + if (cmask) { + GLuint b; + for (b = 0; cmask; b++) { + if (cmask & (1 << b)) { + struct gl_renderbuffer *rb + = ctx->DrawBuffer->Attachment[b].Renderbuffer; + assert(rb); + clear_color_buffer(ctx, rb); + cmask &= ~(1 << b); /* turn off bit */ + } + assert(b < BUFFER_COUNT); + } + } + + if (mask & BUFFER_BIT_ACCUM) { + st_clear_accum_buffer(ctx, + ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer); + } + + if ((mask & BUFFER_BITS_DS) == BUFFER_BITS_DS && depthRb == stencilRb) { + /* clearing combined depth + stencil */ + clear_depth_stencil_buffer(ctx, depthRb); + } + else { + /* separate depth/stencil clears */ + if (mask & BUFFER_BIT_DEPTH) { + clear_depth_buffer(ctx, depthRb); + } + if (mask & BUFFER_BIT_STENCIL) { + clear_stencil_buffer(ctx, stencilRb); + } + } +} + + +void st_init_clear_functions(struct dd_function_table *functions) +{ + functions->Clear = st_clear; +} diff --git a/src/mesa/state_tracker/st_cb_clear.h b/src/mesa/state_tracker/st_cb_clear.h new file mode 100644 index 0000000000..f49387747d --- /dev/null +++ b/src/mesa/state_tracker/st_cb_clear.h @@ -0,0 +1,46 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_CB_CLEAR_H +#define ST_CB_CLEAR_H + + +extern void +st_init_clear(struct st_context *st); + + +extern void +st_destroy_clear(struct st_context *st); + + +extern void +st_init_clear_functions(struct dd_function_table *functions); + + +#endif /* ST_CB_CLEAR_H */ + diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c new file mode 100644 index 0000000000..5b24b9f068 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -0,0 +1,1104 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Brian Paul + */ + +#include "main/imports.h" +#include "main/image.h" +#include "main/bufferobj.h" +#include "main/macros.h" +#include "main/texformat.h" +#include "main/state.h" +#include "shader/program.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" + +#include "st_context.h" +#include "st_atom.h" +#include "st_atom_constbuf.h" +#include "st_draw.h" +#include "st_program.h" +#include "st_cb_drawpixels.h" +#include "st_cb_readpixels.h" +#include "st_cb_fbo.h" +#include "st_cb_texture.h" +#include "st_draw.h" +#include "st_format.h" +#include "st_mesa_to_tgsi.h" +#include "st_texture.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" +#include "util/u_draw_quad.h" +#include "shader/prog_instruction.h" +#include "cso_cache/cso_context.h" + + +/** + * Check if the given program is: + * 0: MOVE result.color, fragment.color; + * 1: END; + */ +static GLboolean +is_passthrough_program(const struct gl_fragment_program *prog) +{ + if (prog->Base.NumInstructions == 2) { + const struct prog_instruction *inst = prog->Base.Instructions; + if (inst[0].Opcode == OPCODE_MOV && + inst[1].Opcode == OPCODE_END && + inst[0].DstReg.File == PROGRAM_OUTPUT && + inst[0].DstReg.Index == FRAG_RESULT_COLR && + inst[0].DstReg.WriteMask == WRITEMASK_XYZW && + inst[0].SrcReg[0].File == PROGRAM_INPUT && + inst[0].SrcReg[0].Index == FRAG_ATTRIB_COL0 && + inst[0].SrcReg[0].Swizzle == SWIZZLE_XYZW) { + return GL_TRUE; + } + } + return GL_FALSE; +} + + + +/** + * Make fragment shader for glDraw/CopyPixels. This shader is made + * by combining the pixel transfer shader with the user-defined shader. + */ +static struct st_fragment_program * +combined_drawpix_fragment_program(GLcontext *ctx) +{ + struct st_context *st = ctx->st; + struct st_fragment_program *stfp; + + if (st->pixel_xfer.program->serialNo == st->pixel_xfer.xfer_prog_sn + && st->fp->serialNo == st->pixel_xfer.user_prog_sn) { + /* the pixel tranfer program has not changed and the user-defined + * program has not changed, so re-use the combined program. + */ + stfp = st->pixel_xfer.combined_prog; + } + else { + /* Concatenate the pixel transfer program with the current user- + * defined program. + */ + if (is_passthrough_program(&st->fp->Base)) { + stfp = (struct st_fragment_program *) + _mesa_clone_program(ctx, &st->pixel_xfer.program->Base.Base); + } + else { +#if 0 + printf("Base program:\n"); + _mesa_print_program(&st->fp->Base.Base); + printf("DrawPix program:\n"); + _mesa_print_program(&st->pixel_xfer.program->Base.Base); +#endif + stfp = (struct st_fragment_program *) + _mesa_combine_programs(ctx, + &st->pixel_xfer.program->Base.Base, + &st->fp->Base.Base); + } + +#if 0 + { + struct gl_program *p = &stfp->Base.Base; + printf("Combined DrawPixels program:\n"); + _mesa_print_program(p); + printf("InputsRead: 0x%x\n", p->InputsRead); + printf("OutputsWritten: 0x%x\n", p->OutputsWritten); + _mesa_print_parameter_list(p->Parameters); + } +#endif + + /* translate to TGSI tokens */ + st_translate_fragment_program(st, stfp, NULL); + + /* save new program, update serial numbers */ + st->pixel_xfer.xfer_prog_sn = st->pixel_xfer.program->serialNo; + st->pixel_xfer.user_prog_sn = st->fp->serialNo; + st->pixel_xfer.combined_prog_sn = stfp->serialNo; + st->pixel_xfer.combined_prog = stfp; + } + + /* Ideally we'd have updated the pipe constants during the normal + * st/atom mechanism. But we can't since this is specific to glDrawPixels. + */ + st_upload_constants(st, stfp->Base.Base.Parameters, PIPE_SHADER_FRAGMENT); + + return stfp; +} + + +/** + * Create fragment shader that does a TEX() instruction to get a Z + * value, then writes to FRAG_RESULT_DEPR. + * Pass fragment color through as-is. + */ +static struct st_fragment_program * +make_fragment_shader_z(struct st_context *st) +{ + GLcontext *ctx = st->ctx; + struct gl_program *p; + GLuint ic = 0; + + if (st->drawpix.z_shader) { + return st->drawpix.z_shader; + } + + /* + * Create shader now + */ + p = ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); + if (!p) + return NULL; + + p->NumInstructions = 3; + + p->Instructions = _mesa_alloc_instructions(p->NumInstructions); + if (!p->Instructions) { + ctx->Driver.DeleteProgram(ctx, p); + return NULL; + } + _mesa_init_instructions(p->Instructions, p->NumInstructions); + + /* TEX result.depth, fragment.texcoord[0], texture[0], 2D; */ + p->Instructions[ic].Opcode = OPCODE_TEX; + p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; + p->Instructions[ic].DstReg.Index = FRAG_RESULT_DEPR; + p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Z; + p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; + p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0; + p->Instructions[ic].TexSrcUnit = 0; + p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX; + ic++; + + /* MOV result.color, fragment.color */ + p->Instructions[ic].Opcode = OPCODE_MOV; + p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; + p->Instructions[ic].DstReg.Index = FRAG_RESULT_COLR; + p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; + p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_COL0; + ic++; + + /* END; */ + p->Instructions[ic++].Opcode = OPCODE_END; + + assert(ic == p->NumInstructions); + + p->InputsRead = FRAG_BIT_TEX0 | FRAG_BIT_COL0; + p->OutputsWritten = (1 << FRAG_RESULT_COLR) | (1 << FRAG_RESULT_DEPR); + p->SamplersUsed = 0x1; /* sampler 0 (bit 0) is used */ + + st->drawpix.z_shader = (struct st_fragment_program *) p; + st_translate_fragment_program(st, st->drawpix.z_shader, NULL); + + return st->drawpix.z_shader; +} + + + +/** + * Create a simple vertex shader that just passes through the + * vertex position and texcoord (and optionally, color). + */ +static struct st_vertex_program * +st_make_passthrough_vertex_shader(struct st_context *st, GLboolean passColor) +{ + GLcontext *ctx = st->ctx; + struct st_vertex_program *stvp; + struct gl_program *p; + GLuint ic = 0; + + if (st->drawpix.vert_shaders[passColor]) + return st->drawpix.vert_shaders[passColor]; + + /* + * Create shader now + */ + p = ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); + if (!p) + return NULL; + + if (passColor) + p->NumInstructions = 4; + else + p->NumInstructions = 3; + + p->Instructions = _mesa_alloc_instructions(p->NumInstructions); + if (!p->Instructions) { + ctx->Driver.DeleteProgram(ctx, p); + return NULL; + } + _mesa_init_instructions(p->Instructions, p->NumInstructions); + /* MOV result.pos, vertex.pos; */ + p->Instructions[0].Opcode = OPCODE_MOV; + p->Instructions[0].DstReg.File = PROGRAM_OUTPUT; + p->Instructions[0].DstReg.Index = VERT_RESULT_HPOS; + p->Instructions[0].SrcReg[0].File = PROGRAM_INPUT; + p->Instructions[0].SrcReg[0].Index = VERT_ATTRIB_POS; + /* MOV result.texcoord0, vertex.texcoord0; */ + p->Instructions[1].Opcode = OPCODE_MOV; + p->Instructions[1].DstReg.File = PROGRAM_OUTPUT; + p->Instructions[1].DstReg.Index = VERT_RESULT_TEX0; + p->Instructions[1].SrcReg[0].File = PROGRAM_INPUT; + p->Instructions[1].SrcReg[0].Index = VERT_ATTRIB_TEX0; + ic = 2; + if (passColor) { + /* MOV result.color0, vertex.color0; */ + p->Instructions[ic].Opcode = OPCODE_MOV; + p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT; + p->Instructions[ic].DstReg.Index = VERT_RESULT_COL0; + p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT; + p->Instructions[ic].SrcReg[0].Index = VERT_ATTRIB_COLOR0; + ic++; + } + + /* END; */ + p->Instructions[ic].Opcode = OPCODE_END; + ic++; + + assert(ic == p->NumInstructions); + + p->InputsRead = VERT_BIT_POS | VERT_BIT_TEX0; + p->OutputsWritten = ((1 << VERT_RESULT_TEX0) | + (1 << VERT_RESULT_HPOS)); + if (passColor) { + p->InputsRead |= VERT_BIT_COLOR0; + p->OutputsWritten |= (1 << VERT_RESULT_COL0); + } + + stvp = (struct st_vertex_program *) p; + st_translate_vertex_program(st, stvp, NULL, NULL, NULL); + + st->drawpix.vert_shaders[passColor] = stvp; + + return stvp; +} + + +static GLenum +_mesa_base_format(GLenum format) +{ + switch (format) { + case GL_DEPTH_COMPONENT: + return GL_DEPTH_COMPONENT; + case GL_STENCIL_INDEX: + return GL_STENCIL_INDEX; + default: + return GL_RGBA; + } +} + + +/** + * Make texture containing an image for glDrawPixels image. + * If 'pixels' is NULL, leave the texture image data undefined. + */ +static struct pipe_texture * +make_texture(struct st_context *st, + GLsizei width, GLsizei height, GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid *pixels) +{ + GLcontext *ctx = st->ctx; + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + const struct gl_texture_format *mformat; + struct pipe_texture *pt; + enum pipe_format pipeFormat; + GLuint cpp; + GLenum baseFormat; + + baseFormat = _mesa_base_format(format); + + mformat = st_ChooseTextureFormat(ctx, baseFormat, format, type); + assert(mformat); + + pipeFormat = st_mesa_format_to_pipe_format(mformat->MesaFormat); + assert(pipeFormat); + cpp = st_sizeof_format(pipeFormat); + + pixels = _mesa_map_drawpix_pbo(ctx, unpack, pixels); + if (!pixels) + return NULL; + + pt = st_texture_create(st, PIPE_TEXTURE_2D, pipeFormat, 0, width, height, + 1, 0, + PIPE_TEXTURE_USAGE_SAMPLER); + if (!pt) { + _mesa_unmap_drawpix_pbo(ctx, unpack); + return NULL; + } + + { + struct pipe_surface *surface; + static const GLuint dstImageOffsets = 0; + GLboolean success; + GLubyte *dest; + const GLbitfield imageTransferStateSave = ctx->_ImageTransferState; + + /* we'll do pixel transfer in a fragment shader */ + ctx->_ImageTransferState = 0x0; + + surface = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + + /* map texture surface */ + dest = screen->surface_map(screen, surface, + PIPE_BUFFER_USAGE_CPU_WRITE); + + /* Put image into texture surface. + * Note that the image is actually going to be upside down in + * the texture. We deal with that with texcoords. + */ + success = mformat->StoreImage(ctx, 2, /* dims */ + baseFormat, /* baseInternalFormat */ + mformat, /* gl_texture_format */ + dest, /* dest */ + 0, 0, 0, /* dstX/Y/Zoffset */ + surface->stride, /* dstRowStride, bytes */ + &dstImageOffsets, /* dstImageOffsets */ + width, height, 1, /* size */ + format, type, /* src format/type */ + pixels, /* data source */ + unpack); + + /* unmap */ + screen->surface_unmap(screen, surface); + pipe_surface_reference(&surface, NULL); + + assert(success); + + /* restore */ + ctx->_ImageTransferState = imageTransferStateSave; + } + + _mesa_unmap_drawpix_pbo(ctx, unpack); + + return pt; +} + + +/** + * Draw quad with texcoords and optional color. + * Coords are window coords with y=0=bottom. + * \param color may be null + * \param invertTex if true, flip texcoords vertically + */ +static void +draw_quad(GLcontext *ctx, GLfloat x0, GLfloat y0, GLfloat z, + GLfloat x1, GLfloat y1, const GLfloat *color, + GLboolean invertTex) +{ + struct st_context *st = ctx->st; + struct pipe_context *pipe = ctx->st->pipe; + GLfloat verts[4][3][4]; /* four verts, three attribs, XYZW */ + + /* setup vertex data */ + { + const struct gl_framebuffer *fb = st->ctx->DrawBuffer; + const GLfloat fb_width = (GLfloat) fb->Width; + const GLfloat fb_height = (GLfloat) fb->Height; + const GLfloat clip_x0 = x0 / fb_width * 2.0f - 1.0f; + const GLfloat clip_y0 = y0 / fb_height * 2.0f - 1.0f; + const GLfloat clip_x1 = x1 / fb_width * 2.0f - 1.0f; + const GLfloat clip_y1 = y1 / fb_height * 2.0f - 1.0f; + const GLfloat sLeft = 0.0f, sRight = 1.0f; + const GLfloat tTop = invertTex, tBot = 1.0f - tTop; + GLuint tex, i; + + /* upper-left */ + verts[0][0][0] = clip_x0; /* v[0].attr[0].x */ + verts[0][0][1] = clip_y0; /* v[0].attr[0].y */ + + /* upper-right */ + verts[1][0][0] = clip_x1; + verts[1][0][1] = clip_y0; + + /* lower-right */ + verts[2][0][0] = clip_x1; + verts[2][0][1] = clip_y1; + + /* lower-left */ + verts[3][0][0] = clip_x0; + verts[3][0][1] = clip_y1; + + tex = color ? 2 : 1; + verts[0][tex][0] = sLeft; /* v[0].attr[tex].s */ + verts[0][tex][1] = tTop; /* v[0].attr[tex].t */ + verts[1][tex][0] = sRight; + verts[1][tex][1] = tTop; + verts[2][tex][0] = sRight; + verts[2][tex][1] = tBot; + verts[3][tex][0] = sLeft; + verts[3][tex][1] = tBot; + + /* same for all verts: */ + if (color) { + for (i = 0; i < 4; i++) { + verts[i][0][2] = z; /*Z*/ + verts[i][0][3] = 1.0f; /*W*/ + verts[i][1][0] = color[0]; + verts[i][1][1] = color[1]; + verts[i][1][2] = color[2]; + verts[i][1][3] = color[3]; + verts[i][2][2] = 0.0f; /*R*/ + verts[i][2][3] = 1.0f; /*Q*/ + } + } + else { + for (i = 0; i < 4; i++) { + verts[i][0][2] = z; /*Z*/ + verts[i][0][3] = 1.0f; /*W*/ + verts[i][1][2] = 0.0f; /*R*/ + verts[i][1][3] = 1.0f; /*Q*/ + } + } + } + + { + struct pipe_buffer *buf; + ubyte *map; + + /* allocate/load buffer object with vertex data */ + buf = pipe_buffer_create(pipe->screen, 32, PIPE_BUFFER_USAGE_VERTEX, + sizeof(verts)); + map = pipe_buffer_map(pipe->screen, buf, PIPE_BUFFER_USAGE_CPU_WRITE); + memcpy(map, verts, sizeof(verts)); + pipe_buffer_unmap(pipe->screen, buf); + + util_draw_vertex_buffer(pipe, buf, + PIPE_PRIM_QUADS, + 4, /* verts */ + 3); /* attribs/vert */ + pipe_buffer_reference(pipe->screen, &buf, NULL); + } +} + + + +static void +draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, + GLsizei width, GLsizei height, + GLfloat zoomX, GLfloat zoomY, + struct pipe_texture *pt, + struct st_vertex_program *stvp, + struct st_fragment_program *stfp, + const GLfloat *color, + GLboolean invertTex) +{ + struct st_context *st = ctx->st; + struct pipe_context *pipe = ctx->st->pipe; + struct cso_context *cso = ctx->st->cso_context; + GLfloat x0, y0, x1, y1; + GLsizei maxSize; + + /* limit checks */ + /* XXX if DrawPixels image is larger than max texture size, break + * it up into chunks. + */ + maxSize = 1 << (pipe->screen->get_param(pipe->screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS) - 1); + assert(width <= maxSize); + assert(height <= maxSize); + + cso_save_rasterizer(cso); + cso_save_viewport(cso); + cso_save_samplers(cso); + cso_save_sampler_textures(cso); + cso_save_fragment_shader(cso); + cso_save_vertex_shader(cso); + + /* rasterizer state: just scissor */ + { + struct pipe_rasterizer_state rasterizer; + memset(&rasterizer, 0, sizeof(rasterizer)); + rasterizer.gl_rasterization_rules = 1; + rasterizer.scissor = ctx->Scissor.Enabled; + cso_set_rasterizer(cso, &rasterizer); + } + + /* fragment shader state: TEX lookup program */ + cso_set_fragment_shader_handle(cso, stfp->driver_shader); + + /* vertex shader state: position + texcoord pass-through */ + cso_set_vertex_shader_handle(cso, stvp->driver_shader); + + + /* texture sampling state: */ + { + struct pipe_sampler_state sampler; + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.normalized_coords = 1; + + cso_single_sampler(cso, 0, &sampler); + if (st->pixel_xfer.pixelmap_enabled) { + cso_single_sampler(cso, 1, &sampler); + } + cso_single_sampler_done(cso); + } + + /* viewport state: viewport matching window dims */ + { + const float width = (float) ctx->DrawBuffer->Width; + const float height = (float) ctx->DrawBuffer->Height; + struct pipe_viewport_state vp; + vp.scale[0] = 0.5f * width; + vp.scale[1] = -0.5f * height; + vp.scale[2] = 1.0f; + vp.scale[3] = 1.0f; + vp.translate[0] = 0.5f * width; + vp.translate[1] = 0.5f * height; + vp.translate[2] = 0.0f; + vp.translate[3] = 0.0f; + cso_set_viewport(cso, &vp); + } + + /* texture state: */ + if (st->pixel_xfer.pixelmap_enabled) { + struct pipe_texture *textures[2]; + textures[0] = pt; + textures[1] = st->pixel_xfer.pixelmap_texture; + pipe->set_sampler_textures(pipe, 2, textures); + } + else { + pipe->set_sampler_textures(pipe, 1, &pt); + } + + /* Compute window coords (y=0=bottom) with pixel zoom. + * Recall that these coords are transformed by the current + * vertex shader and viewport transformation. + */ + x0 = (GLfloat) x; + x1 = x + width * ctx->Pixel.ZoomX; + y0 = (GLfloat) y; + y1 = y + height * ctx->Pixel.ZoomY; + //if(!color) + draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex); + //else + //printf("skip draw quad\n"); + /* restore state */ + cso_restore_rasterizer(cso); + cso_restore_viewport(cso); + cso_restore_samplers(cso); + cso_restore_sampler_textures(cso); + cso_restore_fragment_shader(cso); + cso_restore_vertex_shader(cso); +} + + +/** + * Check if a GL format/type combination is a match to the given pipe format. + * XXX probably move this to a re-usable place. + */ +static GLboolean +compatible_formats(GLenum format, GLenum type, enum pipe_format pipeFormat) +{ + static const GLuint one = 1; + GLubyte littleEndian = *((GLubyte *) &one); + + if (pipeFormat == PIPE_FORMAT_R8G8B8A8_UNORM && + format == GL_RGBA && + type == GL_UNSIGNED_BYTE && + !littleEndian) { + return GL_TRUE; + } + else if (pipeFormat == PIPE_FORMAT_R8G8B8A8_UNORM && + format == GL_ABGR_EXT && + type == GL_UNSIGNED_BYTE && + littleEndian) { + return GL_TRUE; + } + else if (pipeFormat == PIPE_FORMAT_A8R8G8B8_UNORM && + format == GL_BGRA && + type == GL_UNSIGNED_BYTE && + littleEndian) { + return GL_TRUE; + } + else if (pipeFormat == PIPE_FORMAT_R5G6B5_UNORM && + format == GL_RGB && + type == GL_UNSIGNED_SHORT_5_6_5) { + /* endian don't care */ + return GL_TRUE; + } + else if (pipeFormat == PIPE_FORMAT_R5G6B5_UNORM && + format == GL_BGR && + type == GL_UNSIGNED_SHORT_5_6_5_REV) { + /* endian don't care */ + return GL_TRUE; + } + else if (pipeFormat == PIPE_FORMAT_S8_UNORM && + format == GL_STENCIL_INDEX && + type == GL_UNSIGNED_BYTE) { + return GL_TRUE; + } + else if (pipeFormat == PIPE_FORMAT_Z32_UNORM && + format == GL_DEPTH_COMPONENT && + type == GL_UNSIGNED_INT) { + return GL_TRUE; + } + /* XXX add more cases */ + else { + return GL_FALSE; + } +} + + +/** + * Check if any per-fragment ops are enabled. + * XXX probably move this to a re-usable place. + */ +static GLboolean +any_fragment_ops(const struct st_context *st) +{ + if (st->state.depth_stencil.alpha.enabled || + st->state.depth_stencil.depth.enabled || + st->state.blend.blend_enable || + st->state.blend.logicop_enable) + /* XXX more checks */ + return GL_TRUE; + else + return GL_FALSE; +} + + +/** + * Check if any pixel transfer ops are enabled. + * XXX probably move this to a re-usable place. + */ +static GLboolean +any_pixel_transfer_ops(const struct st_context *st) +{ + if (st->ctx->Pixel.RedScale != 1.0 || + st->ctx->Pixel.RedBias != 0.0 || + st->ctx->Pixel.GreenScale != 1.0 || + st->ctx->Pixel.GreenBias != 0.0 || + st->ctx->Pixel.BlueScale != 1.0 || + st->ctx->Pixel.BlueBias != 0.0 || + st->ctx->Pixel.AlphaScale != 1.0 || + st->ctx->Pixel.AlphaBias != 0.0 || + st->ctx->Pixel.MapColorFlag) + /* XXX more checks */ + return GL_TRUE; + else + return GL_FALSE; +} + + +/** + * Draw image with a blit, or other non-textured quad method. + */ +static void +draw_blit(struct st_context *st, + GLsizei width, GLsizei height, + GLenum format, GLenum type, const GLvoid *pixels) +{ + + +} + + +static void +draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, + GLsizei width, GLsizei height, GLenum type, + const struct gl_pixelstore_attrib *unpack, + const GLvoid *pixels) +{ + struct st_context *st = ctx->st; + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + struct st_renderbuffer *strb; + struct pipe_surface *ps; + const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0; + GLint skipPixels; + ubyte *stmap; + + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + + strb = st_renderbuffer(ctx->DrawBuffer-> + Attachment[BUFFER_STENCIL].Renderbuffer); + ps = screen->get_tex_surface(screen, strb->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + + /* map the stencil buffer */ + stmap = screen->surface_map(screen, ps, + PIPE_BUFFER_USAGE_CPU_WRITE); + + /* if width > MAX_WIDTH, have to process image in chunks */ + skipPixels = 0; + while (skipPixels < width) { + const GLint spanX = x + skipPixels; + const GLint spanWidth = MIN2(width - skipPixels, MAX_WIDTH); + GLint row; + for (row = 0; row < height; row++) { + GLint spanY = y + row; + GLubyte values[MAX_WIDTH]; + GLenum destType = GL_UNSIGNED_BYTE; + const GLvoid *source = _mesa_image_address2d(unpack, pixels, + width, height, + GL_COLOR_INDEX, type, + row, skipPixels); + _mesa_unpack_stencil_span(ctx, spanWidth, destType, values, + type, source, unpack, + ctx->_ImageTransferState); + if (zoom) { + /* + _swrast_write_zoomed_stencil_span(ctx, x, y, spanWidth, + spanX, spanY, values); + */ + } + else { + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + spanY = ctx->DrawBuffer->Height - spanY - 1; + } + + switch (ps->format) { + case PIPE_FORMAT_S8_UNORM: + { + ubyte *dest = stmap + spanY * ps->stride + spanX; + memcpy(dest, values, spanWidth); + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + { + uint *dest = (uint *) (stmap + spanY * ps->stride + spanX*4); + GLint k; + for (k = 0; k < spanWidth; k++) { + uint p = dest[k]; + p = (p & 0xffffff) | (values[k] << 24); + dest[k] = p; + } + } + break; + default: + assert(0); + } + } + } + skipPixels += spanWidth; + } + + /* unmap the stencil buffer */ + screen->surface_unmap(screen, ps); + pipe_surface_reference(&ps, NULL); +} + + +/** + * Called via ctx->Driver.DrawPixels() + */ +static void +st_DrawPixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *unpack, const GLvoid *pixels) +{ + struct st_fragment_program *stfp; + struct st_vertex_program *stvp; + struct st_context *st = ctx->st; + struct pipe_surface *ps; + GLuint bufferFormat; + const GLfloat *color; + + if (format == GL_STENCIL_INDEX) { + draw_stencil_pixels(ctx, x, y, width, height, type, unpack, pixels); + return; + } + + _mesa_set_vp_override( ctx, TRUE ); + _mesa_update_state( ctx ); + + st_validate_state(st); + + if (format == GL_DEPTH_COMPONENT) { + ps = st->state.framebuffer.zsbuf; + stfp = make_fragment_shader_z(ctx->st); + stvp = st_make_passthrough_vertex_shader(ctx->st, GL_TRUE); + color = ctx->Current.RasterColor; + } + else if (format == GL_STENCIL_INDEX) { + ps = st->state.framebuffer.zsbuf; + /* XXX special case - can't use texture map */ + color = NULL; + } + else { + ps = st->state.framebuffer.cbufs[0]; + stfp = combined_drawpix_fragment_program(ctx); + stvp = st_make_passthrough_vertex_shader(ctx->st, GL_FALSE); + color = NULL; + } + + bufferFormat = ps->format; + + if (1/*any_fragment_ops(st) || + any_pixel_transfer_ops(st) || + !compatible_formats(format, type, ps->format)*/) { + /* textured quad */ + struct pipe_texture *pt + = make_texture(ctx->st, width, height, format, type, unpack, pixels); + if (pt) { + draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2], + width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY, + pt, stvp, stfp, color, GL_FALSE); + pipe_texture_reference(&pt, NULL); + } + } + else { + /* blit */ + draw_blit(st, width, height, format, type, pixels); + } + + _mesa_set_vp_override( ctx, FALSE ); +} + + + +static void +copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint dstx, GLint dsty) +{ + struct st_renderbuffer *rbDraw = st_renderbuffer(ctx->DrawBuffer->_StencilBuffer); + struct pipe_screen *screen = ctx->st->pipe->screen; + struct pipe_surface *psDraw; + ubyte *drawMap; + ubyte *buffer; + int i; + + buffer = malloc(width * height * sizeof(ubyte)); + if (!buffer) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCopyPixels(stencil)"); + return; + } + + /* this will do stencil pixel transfer ops */ + st_read_stencil_pixels(ctx, srcx, srcy, width, height, GL_UNSIGNED_BYTE, + &ctx->DefaultPacking, buffer); + + psDraw = screen->get_tex_surface(screen, rbDraw->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + + assert(psDraw->block.width == 1); + assert(psDraw->block.height == 1); + + /* map the stencil buffer */ + drawMap = screen->surface_map(screen, psDraw, PIPE_BUFFER_USAGE_CPU_WRITE); + + /* draw */ + /* XXX PixelZoom not handled yet */ + for (i = 0; i < height; i++) { + ubyte *dst; + const ubyte *src; + int y; + + y = dsty + i; + + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + y = ctx->DrawBuffer->Height - y - 1; + } + + dst = drawMap + y * psDraw->stride + dstx * psDraw->block.size; + src = buffer + i * width; + + switch (psDraw->format) { + case PIPE_FORMAT_S8Z24_UNORM: + { + uint *dst4 = (uint *) dst; + int j; + for (j = 0; j < width; j++) { + *dst4 = (*dst4 & 0xffffff) | (src[j] << 24); + dst4++; + } + } + break; + case PIPE_FORMAT_S8_UNORM: + memcpy(dst, src, width); + break; + default: + assert(0); + } + } + + free(buffer); + + /* unmap the stencil buffer */ + screen->surface_unmap(screen, psDraw); + pipe_surface_reference(&psDraw, NULL); +} + + +static void +st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, + GLsizei width, GLsizei height, + GLint dstx, GLint dsty, GLenum type) +{ + struct st_context *st = ctx->st; + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + struct st_renderbuffer *rbRead; + struct st_vertex_program *stvp; + struct st_fragment_program *stfp; + struct pipe_surface *psTex; + struct pipe_texture *pt; + GLfloat *color; + enum pipe_format srcFormat, texFormat; + + /* make sure rendering has completed */ + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + + st_validate_state(st); + + if (type == GL_STENCIL) { + /* can't use texturing to do stencil */ + copy_stencil_pixels(ctx, srcx, srcy, width, height, dstx, dsty); + return; + } + + if (type == GL_COLOR) { + rbRead = st_get_color_read_renderbuffer(ctx); + color = NULL; + stfp = combined_drawpix_fragment_program(ctx); + stvp = st_make_passthrough_vertex_shader(ctx->st, GL_FALSE); + } + else { + assert(type == GL_DEPTH); + rbRead = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer); + color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; + stfp = make_fragment_shader_z(ctx->st); + stvp = st_make_passthrough_vertex_shader(ctx->st, GL_TRUE); + } + + srcFormat = rbRead->texture->format; + + if (screen->is_format_supported(screen, srcFormat, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { + texFormat = srcFormat; + } + else { + /* srcFormat can't be used as a texture format */ + if (type == GL_DEPTH) { + texFormat = st_choose_format(pipe, GL_DEPTH_COMPONENT, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_DEPTH_STENCIL); + assert(texFormat != PIPE_FORMAT_NONE); /* XXX no depth texture formats??? */ + } + else { + /* default color format */ + texFormat = st_choose_format(pipe, GL_RGBA, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER); + assert(texFormat != PIPE_FORMAT_NONE); + } + } + + pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, texFormat, 0, + width, height, 1, 0, + PIPE_TEXTURE_USAGE_SAMPLER); + if (!pt) + return; + + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + srcy = ctx->DrawBuffer->Height - srcy - height; + } + + if (srcFormat == texFormat) { + /* copy source framebuffer surface into mipmap/texture */ + struct pipe_surface *psRead = screen->get_tex_surface(screen, + rbRead->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_READ); + psTex = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE ); + pipe->surface_copy(pipe, + FALSE, + psTex, /* dest */ + 0, 0, /* destx/y */ + psRead, + srcx, srcy, width, height); + pipe_surface_reference(&psRead, NULL); + } + else { + /* CPU-based fallback/conversion */ + struct pipe_surface *psRead = screen->get_tex_surface(screen, + rbRead->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_READ); + + psTex = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE ); + + if (type == GL_COLOR) { + /* alternate path using get/put_tile() */ + GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + pipe_get_tile_rgba(psRead, srcx, srcy, width, height, buf); + pipe_put_tile_rgba(psTex, 0, 0, width, height, buf); + + free(buf); + } + else { + /* GL_DEPTH */ + GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint)); + pipe_get_tile_z(psRead, srcx, srcy, width, height, buf); + pipe_put_tile_z(psTex, 0, 0, width, height, buf); + free(buf); + } + pipe_surface_reference(&psRead, NULL); + } + + pipe_surface_reference(&psTex, NULL); + + /* draw textured quad */ + draw_textured_quad(ctx, dstx, dsty, ctx->Current.RasterPos[2], + width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY, + pt, stvp, stfp, color, GL_TRUE); + + pipe_texture_reference(&pt, NULL); +} + + + +void st_init_drawpixels_functions(struct dd_function_table *functions) +{ + functions->DrawPixels = st_DrawPixels; + functions->CopyPixels = st_CopyPixels; +} + + +void +st_destroy_drawpix(struct st_context *st) +{ + st_reference_fragprog(st, &st->drawpix.z_shader, NULL); + st_reference_fragprog(st, &st->pixel_xfer.combined_prog, NULL); + st_reference_vertprog(st, &st->drawpix.vert_shaders[0], NULL); + st_reference_vertprog(st, &st->drawpix.vert_shaders[1], NULL); +} + + diff --git a/src/mesa/state_tracker/st_cb_drawpixels.h b/src/mesa/state_tracker/st_cb_drawpixels.h new file mode 100644 index 0000000000..26fe864d18 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_drawpixels.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_CB_DRAWPIXELS_H +#define ST_CB_DRAWPIXELS_H + + +extern void st_init_drawpixels_functions(struct dd_function_table *functions); + +extern void +st_destroy_drawpix(struct st_context *st); + + +#endif /* ST_CB_DRAWPIXELS_H */ diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c new file mode 100644 index 0000000000..00076f61e0 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -0,0 +1,459 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Framebuffer/renderbuffer functions. + * + * \author Brian Paul + */ + + +#include "main/imports.h" +#include "main/context.h" +#include "main/fbobject.h" +#include "main/framebuffer.h" +#include "main/renderbuffer.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "st_context.h" +#include "st_cb_fbo.h" +#include "st_cb_texture.h" +#include "st_format.h" +#include "st_public.h" +#include "st_texture.h" + + + +/** + * Compute the renderbuffer's Red/Green/EtcBit fields from the pipe format. + */ +static int +init_renderbuffer_bits(struct st_renderbuffer *strb, + enum pipe_format pipeFormat) +{ + struct pipe_format_info info; + + if (!st_get_format_info( pipeFormat, &info )) { + assert( 0 ); + } + + strb->Base._ActualFormat = info.base_format; + strb->Base.RedBits = info.red_bits; + strb->Base.GreenBits = info.green_bits; + strb->Base.BlueBits = info.blue_bits; + strb->Base.AlphaBits = info.alpha_bits; + strb->Base.DepthBits = info.depth_bits; + strb->Base.StencilBits = info.stencil_bits; + strb->Base.DataType = st_format_datatype(pipeFormat); + + return info.size; +} + +/** + * gl_renderbuffer::AllocStorage() + * This is called to allocate the original drawing surface, and + * during window resize. + */ +static GLboolean +st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, + GLenum internalFormat, + GLuint width, GLuint height) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct st_renderbuffer *strb = st_renderbuffer(rb); + struct pipe_texture template; + unsigned surface_usage; + + /* Free the old surface and texture + */ + pipe_surface_reference( &strb->surface, NULL ); + pipe_texture_reference( &strb->texture, NULL ); + + + memset(&template, 0, sizeof(template)); + + if (strb->format != PIPE_FORMAT_NONE) { + template.format = strb->format; + } + else { + template.format = st_choose_renderbuffer_format(pipe, internalFormat); + } + + strb->Base.Width = width; + strb->Base.Height = height; + init_renderbuffer_bits(strb, template.format); + + template.target = PIPE_TEXTURE_2D; + template.compressed = 0; + pf_get_block(template.format, &template.block); + template.width[0] = width; + template.height[0] = height; + template.depth[0] = 1; + template.last_level = 0; + template.nr_samples = rb->Samples; + + if (pf_is_depth_stencil(template.format)) { + template.tex_usage = PIPE_TEXTURE_USAGE_DEPTH_STENCIL; + } + else { + template.tex_usage = (PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_RENDER_TARGET); + } + + + /* Probably need dedicated flags for surface usage too: + */ + surface_usage = (PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); +#if 0 + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); +#endif + + strb->texture = pipe->screen->texture_create( pipe->screen, + &template ); + + /* Special path for accum buffers. + * + * Try a different surface format. Since accum buffers are s/w + * only for now, the surface pixel format doesn't really matter, + * only that the buffer is large enough. + */ + if (!strb->texture && template.format == DEFAULT_ACCUM_PIPE_FORMAT) + { + /* Actually, just setting this usage value should be sufficient + * to tell the driver to go ahead and allocate the buffer, even + * if HW doesn't support the format. + */ + template.tex_usage = 0; + surface_usage = (PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); + + strb->texture = pipe->screen->texture_create( pipe->screen, + &template ); + + } + + if (!strb->texture) + return FALSE; + + strb->surface = pipe->screen->get_tex_surface( pipe->screen, + strb->texture, + 0, 0, 0, + surface_usage ); + + assert(strb->surface->texture); + assert(strb->surface->buffer); + assert(strb->surface->format); + assert(strb->surface->block.size); + assert(strb->surface->block.width); + assert(strb->surface->block.height); + assert(strb->surface->width == width); + assert(strb->surface->height == height); + assert(strb->surface->stride); + + + return strb->surface != NULL; +} + + +/** + * gl_renderbuffer::Delete() + */ +static void +st_renderbuffer_delete(struct gl_renderbuffer *rb) +{ + struct st_renderbuffer *strb = st_renderbuffer(rb); + ASSERT(strb); + pipe_surface_reference(&strb->surface, NULL); + pipe_texture_reference(&strb->texture, NULL); + free(strb); +} + + +/** + * gl_renderbuffer::GetPointer() + */ +static void * +null_get_pointer(GLcontext * ctx, struct gl_renderbuffer *rb, + GLint x, GLint y) +{ + /* By returning NULL we force all software rendering to go through + * the span routines. + */ +#if 0 + assert(0); /* Should never get called with softpipe */ +#endif + return NULL; +} + + +/** + * Called via ctx->Driver.NewFramebuffer() + */ +static struct gl_framebuffer * +st_new_framebuffer(GLcontext *ctx, GLuint name) +{ + /* XXX not sure we need to subclass gl_framebuffer for pipe */ + return _mesa_new_framebuffer(ctx, name); +} + + +/** + * Called via ctx->Driver.NewRenderbuffer() + */ +static struct gl_renderbuffer * +st_new_renderbuffer(GLcontext *ctx, GLuint name) +{ + struct st_renderbuffer *strb = CALLOC_STRUCT(st_renderbuffer); + if (strb) { + _mesa_init_renderbuffer(&strb->Base, name); + strb->Base.Delete = st_renderbuffer_delete; + strb->Base.AllocStorage = st_renderbuffer_alloc_storage; + strb->Base.GetPointer = null_get_pointer; + strb->format = PIPE_FORMAT_NONE; + return &strb->Base; + } + return NULL; +} + + +/** + * Allocate a renderbuffer for a an on-screen window (not a user-created + * renderbuffer). The window system code determines the format. + */ +struct gl_renderbuffer * +st_new_renderbuffer_fb(enum pipe_format format, int samples) +{ + struct st_renderbuffer *strb; + + strb = CALLOC_STRUCT(st_renderbuffer); + if (!strb) { + _mesa_error(NULL, GL_OUT_OF_MEMORY, "creating renderbuffer"); + return NULL; + } + + _mesa_init_renderbuffer(&strb->Base, 0); + strb->Base.ClassID = 0x4242; /* just a unique value */ + strb->Base.Samples = samples; + strb->format = format; + + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + strb->Base.InternalFormat = GL_RGBA; + strb->Base._BaseFormat = GL_RGBA; + break; + case PIPE_FORMAT_Z16_UNORM: + strb->Base.InternalFormat = GL_DEPTH_COMPONENT16; + strb->Base._BaseFormat = GL_DEPTH_COMPONENT; + break; + case PIPE_FORMAT_Z32_UNORM: + strb->Base.InternalFormat = GL_DEPTH_COMPONENT32; + strb->Base._BaseFormat = GL_DEPTH_COMPONENT; + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + strb->Base.InternalFormat = GL_DEPTH24_STENCIL8_EXT; + strb->Base._BaseFormat = GL_DEPTH_STENCIL_EXT; + break; + case PIPE_FORMAT_S8_UNORM: + strb->Base.InternalFormat = GL_STENCIL_INDEX8_EXT; + strb->Base._BaseFormat = GL_STENCIL_INDEX; + break; + case DEFAULT_ACCUM_PIPE_FORMAT: /*PIPE_FORMAT_R16G16B16A16_SNORM*/ + strb->Base.InternalFormat = GL_RGBA16; + strb->Base._BaseFormat = GL_RGBA; + break; + default: + _mesa_problem(NULL, + "Unexpected format in st_new_renderbuffer_fb"); + return NULL; + } + + /* st-specific methods */ + strb->Base.Delete = st_renderbuffer_delete; + strb->Base.AllocStorage = st_renderbuffer_alloc_storage; + strb->Base.GetPointer = null_get_pointer; + + /* surface is allocated in st_renderbuffer_alloc_storage() */ + strb->surface = NULL; + + return &strb->Base; +} + + + + +/** + * Called via ctx->Driver.BindFramebufferEXT(). + */ +static void +st_bind_framebuffer(GLcontext *ctx, GLenum target, + struct gl_framebuffer *fb, struct gl_framebuffer *fbread) +{ + +} + +/** + * Called by ctx->Driver.FramebufferRenderbuffer + */ +static void +st_framebuffer_renderbuffer(GLcontext *ctx, + struct gl_framebuffer *fb, + GLenum attachment, + struct gl_renderbuffer *rb) +{ + /* XXX no need for derivation? */ + _mesa_framebuffer_renderbuffer(ctx, fb, attachment, rb); +} + + +/** + * Called by ctx->Driver.RenderTexture + */ +static void +st_render_texture(GLcontext *ctx, + struct gl_framebuffer *fb, + struct gl_renderbuffer_attachment *att) +{ + struct st_renderbuffer *strb; + struct gl_renderbuffer *rb; + struct pipe_texture *pt; + struct st_texture_object *stObj; + const struct gl_texture_image *texImage = + att->Texture->Image[att->CubeMapFace][att->TextureLevel]; + + + assert(!att->Renderbuffer); + + /* create new renderbuffer which wraps the texture image */ + rb = st_new_renderbuffer(ctx, 0); + if (!rb) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glFramebufferTexture()"); + return; + } + + _mesa_reference_renderbuffer(&att->Renderbuffer, rb); + assert(rb->RefCount == 1); + rb->AllocStorage = NULL; /* should not get called */ + strb = st_renderbuffer(rb); + + /* get the texture for the texture object */ + stObj = st_texture_object(att->Texture); + + /* point renderbuffer at texobject */ + strb->rtt = stObj; + strb->rtt_level = att->TextureLevel; + strb->rtt_face = att->CubeMapFace; + strb->rtt_slice = att->Zoffset; + + rb->Width = texImage->Width2; + rb->Height = texImage->Height2; + /*printf("***** render to texture level %d: %d x %d\n", att->TextureLevel, rb->Width, rb->Height);*/ + + pt = st_get_texobj_texture(att->Texture); + assert(pt); + /*printf("***** pipe texture %d x %d\n", pt->width[0], pt->height[0]);*/ + + pipe_texture_reference( &strb->texture, pt ); + + pipe_surface_reference(&strb->surface, NULL); + + /* the new surface will be created during framebuffer validation */ + + init_renderbuffer_bits(strb, pt->format); + + /* + printf("RENDER TO TEXTURE obj=%p pt=%p surf=%p %d x %d\n", + att->Texture, pt, strb->surface, rb->Width, rb->Height); + */ + + /* Invalidate buffer state so that the pipe's framebuffer state + * gets updated. + * That's where the new renderbuffer (which we just created) gets + * passed to the pipe as a (color/depth) render target. + */ + st_invalidate_state(ctx, _NEW_BUFFERS); +} + + +/** + * Called via ctx->Driver.FinishRenderTexture. + */ +static void +st_finish_render_texture(GLcontext *ctx, + struct gl_renderbuffer_attachment *att) +{ + struct pipe_screen *screen = ctx->st->pipe->screen; + struct st_renderbuffer *strb = st_renderbuffer(att->Renderbuffer); + + if (!strb) + return; + + ctx->st->pipe->flush(ctx->st->pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + + if (strb->surface) + screen->tex_surface_release( screen, &strb->surface ); + + strb->rtt = NULL; + + /* + printf("FINISH RENDER TO TEXTURE surf=%p\n", strb->surface); + */ + + _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); + + /* restore previous framebuffer state */ + st_invalidate_state(ctx, _NEW_BUFFERS); +} + + + +void st_init_fbo_functions(struct dd_function_table *functions) +{ + functions->NewFramebuffer = st_new_framebuffer; + functions->NewRenderbuffer = st_new_renderbuffer; + functions->BindFramebuffer = st_bind_framebuffer; + functions->FramebufferRenderbuffer = st_framebuffer_renderbuffer; + functions->RenderTexture = st_render_texture; + functions->FinishRenderTexture = st_finish_render_texture; + /* no longer needed by core Mesa, drivers handle resizes... + functions->ResizeBuffers = st_resize_buffers; + */ +} diff --git a/src/mesa/state_tracker/st_cb_fbo.h b/src/mesa/state_tracker/st_cb_fbo.h new file mode 100644 index 0000000000..44fa9fe9a4 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_fbo.h @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_CB_FBO_H +#define ST_CB_FBO_H + + +#define DEFAULT_ACCUM_PIPE_FORMAT PIPE_FORMAT_R16G16B16A16_SNORM + + + +/** + * Derived renderbuffer class. Just need to add a pointer to the + * pipe surface. + */ +struct st_renderbuffer +{ + struct gl_renderbuffer Base; + struct pipe_texture *texture; + struct pipe_surface *surface; /* temporary view into texture */ + enum pipe_format format; /** preferred format, or PIPE_FORMAT_NONE */ + + struct st_texture_object *rtt; /**< GL render to texture's texture */ + int rtt_level, rtt_face, rtt_slice; + + /** Render to texture state */ + struct pipe_texture *texture_save; + struct pipe_surface *surface_save; +}; + + +static INLINE struct st_renderbuffer * +st_renderbuffer(struct gl_renderbuffer *rb) +{ + return (struct st_renderbuffer *) rb; +} + + +extern struct gl_renderbuffer * +st_new_renderbuffer_fb(enum pipe_format format, int samples); + +extern void +st_init_fbo_functions(struct dd_function_table *functions); + + +#endif /* ST_CB_FBO_H */ diff --git a/src/mesa/state_tracker/st_cb_feedback.c b/src/mesa/state_tracker/st_cb_feedback.c new file mode 100644 index 0000000000..19021411cf --- /dev/null +++ b/src/mesa/state_tracker/st_cb_feedback.c @@ -0,0 +1,310 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * GL_SELECT and GL_FEEDBACK render modes. + * Basically, we use a private instance of the 'draw' module for doing + * selection/feedback. It would be nice to use the transform_feedback + * hardware feature, but it's defined as happening pre-clip and we want + * post-clipped primitives. Also, there's concerns about the efficiency + * of using the hardware for this anyway. + * + * Authors: + * Brian Paul + */ + +#include "main/imports.h" +#include "main/context.h" +#include "main/feedback.h" +#include "main/macros.h" + +#include "vbo/vbo.h" + +#include "st_context.h" +#include "st_atom.h" +#include "st_draw.h" +#include "st_cb_feedback.h" +#include "st_cb_bufferobjects.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" +#include "cso_cache/cso_cache.h" + +#include "draw/draw_context.h" +#include "draw/draw_pipe.h" + + +/** + * This is actually used for both feedback and selection. + */ +struct feedback_stage +{ + struct draw_stage stage; /**< Base class */ + GLcontext *ctx; /**< Rendering context */ + GLboolean reset_stipple_counter; +}; + + +/********************************************************************** + * GL Feedback functions + **********************************************************************/ + +static INLINE struct feedback_stage * +feedback_stage( struct draw_stage *stage ) +{ + return (struct feedback_stage *)stage; +} + + +static void +feedback_vertex(GLcontext *ctx, const struct draw_context *draw, + const struct vertex_header *v) +{ + const struct st_context *st = ctx->st; + GLfloat win[4]; + const GLfloat *color, *texcoord; + const GLfloat ci = 0; + GLuint slot; + + /* Recall that Y=0=Top of window for Gallium wincoords */ + win[0] = v->data[0][0]; + win[1] = ctx->DrawBuffer->Height - v->data[0][1]; + win[2] = v->data[0][2]; + win[3] = 1.0F / v->data[0][3]; + + /* XXX + * When we compute vertex layout, save info about position of the + * color and texcoord attribs to use here. + */ + + slot = st->vertex_result_to_slot[VERT_RESULT_COL0]; + if (slot != ~0U) + color = v->data[slot]; + else + color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0]; + + slot = st->vertex_result_to_slot[VERT_RESULT_TEX0]; + if (slot != ~0U) + texcoord = v->data[slot]; + else + texcoord = ctx->Current.Attrib[VERT_ATTRIB_TEX0]; + + _mesa_feedback_vertex(ctx, win, color, ci, texcoord); +} + + +static void +feedback_tri( struct draw_stage *stage, struct prim_header *prim ) +{ + struct feedback_stage *fs = feedback_stage(stage); + struct draw_context *draw = stage->draw; + FEEDBACK_TOKEN(fs->ctx, (GLfloat) GL_POLYGON_TOKEN); + FEEDBACK_TOKEN(fs->ctx, (GLfloat) 3); /* three vertices */ + feedback_vertex(fs->ctx, draw, prim->v[0]); + feedback_vertex(fs->ctx, draw, prim->v[1]); + feedback_vertex(fs->ctx, draw, prim->v[2]); +} + + +static void +feedback_line( struct draw_stage *stage, struct prim_header *prim ) +{ + struct feedback_stage *fs = feedback_stage(stage); + struct draw_context *draw = stage->draw; + if (fs->reset_stipple_counter) { + FEEDBACK_TOKEN(fs->ctx, (GLfloat) GL_LINE_RESET_TOKEN); + fs->reset_stipple_counter = GL_FALSE; + } + else { + FEEDBACK_TOKEN(fs->ctx, (GLfloat) GL_LINE_TOKEN); + } + feedback_vertex(fs->ctx, draw, prim->v[0]); + feedback_vertex(fs->ctx, draw, prim->v[1]); +} + + +static void +feedback_point( struct draw_stage *stage, struct prim_header *prim ) +{ + struct feedback_stage *fs = feedback_stage(stage); + struct draw_context *draw = stage->draw; + FEEDBACK_TOKEN(fs->ctx, (GLfloat) GL_POINT_TOKEN); + feedback_vertex(fs->ctx, draw, prim->v[0]); +} + + +static void +feedback_flush( struct draw_stage *stage, unsigned flags ) +{ + /* no-op */ +} + + +static void +feedback_reset_stipple_counter( struct draw_stage *stage ) +{ + struct feedback_stage *fs = feedback_stage(stage); + fs->reset_stipple_counter = GL_TRUE; +} + + +static void +feedback_destroy( struct draw_stage *stage ) +{ + /* no-op */ +} + +/** + * Create GL feedback drawing stage. + */ +static struct draw_stage * +draw_glfeedback_stage(GLcontext *ctx, struct draw_context *draw) +{ + struct feedback_stage *fs = CALLOC_STRUCT(feedback_stage); + + fs->stage.draw = draw; + fs->stage.next = NULL; + fs->stage.point = feedback_point; + fs->stage.line = feedback_line; + fs->stage.tri = feedback_tri; + fs->stage.flush = feedback_flush; + fs->stage.reset_stipple_counter = feedback_reset_stipple_counter; + fs->stage.destroy = feedback_destroy; + fs->ctx = ctx; + + return &fs->stage; +} + + + +/********************************************************************** + * GL Selection functions + **********************************************************************/ + +static void +select_tri( struct draw_stage *stage, struct prim_header *prim ) +{ + struct feedback_stage *fs = feedback_stage(stage); + _mesa_update_hitflag( fs->ctx, prim->v[0]->data[0][2] ); + _mesa_update_hitflag( fs->ctx, prim->v[1]->data[0][2] ); + _mesa_update_hitflag( fs->ctx, prim->v[2]->data[0][2] ); +} + +static void +select_line( struct draw_stage *stage, struct prim_header *prim ) +{ + struct feedback_stage *fs = feedback_stage(stage); + _mesa_update_hitflag( fs->ctx, prim->v[0]->data[0][2] ); + _mesa_update_hitflag( fs->ctx, prim->v[1]->data[0][2] ); +} + + +static void +select_point( struct draw_stage *stage, struct prim_header *prim ) +{ + struct feedback_stage *fs = feedback_stage(stage); + _mesa_update_hitflag( fs->ctx, prim->v[0]->data[0][2] ); +} + + +static void +select_flush( struct draw_stage *stage, unsigned flags ) +{ + /* no-op */ +} + + +static void +select_reset_stipple_counter( struct draw_stage *stage ) +{ + /* no-op */ +} + +static void +select_destroy( struct draw_stage *stage ) +{ + /* no-op */ +} + + +/** + * Create GL selection mode drawing stage. + */ +static struct draw_stage * +draw_glselect_stage(GLcontext *ctx, struct draw_context *draw) +{ + struct feedback_stage *fs = CALLOC_STRUCT(feedback_stage); + + fs->stage.draw = draw; + fs->stage.next = NULL; + fs->stage.point = select_point; + fs->stage.line = select_line; + fs->stage.tri = select_tri; + fs->stage.flush = select_flush; + fs->stage.reset_stipple_counter = select_reset_stipple_counter; + fs->stage.destroy = select_destroy; + fs->ctx = ctx; + + return &fs->stage; +} + + +static void +st_RenderMode(GLcontext *ctx, GLenum newMode ) +{ + struct st_context *st = ctx->st; + struct draw_context *draw = st->draw; + + if (newMode == GL_RENDER) { + /* restore normal VBO draw function */ + vbo_set_draw_func(ctx, st_draw_vbo); + } + else if (newMode == GL_SELECT) { + if (!st->selection_stage) + st->selection_stage = draw_glselect_stage(ctx, draw); + draw_set_rasterize_stage(draw, st->selection_stage); + /* Plug in new vbo draw function */ + vbo_set_draw_func(ctx, st_feedback_draw_vbo); + } + else { + if (!st->feedback_stage) + st->feedback_stage = draw_glfeedback_stage(ctx, draw); + draw_set_rasterize_stage(draw, st->feedback_stage); + /* Plug in new vbo draw function */ + vbo_set_draw_func(ctx, st_feedback_draw_vbo); + /* need to generate/use a vertex program that emits pos/color/tex */ + st->dirty.st |= ST_NEW_VERTEX_PROGRAM; + } +} + + + +void st_init_feedback_functions(struct dd_function_table *functions) +{ + functions->RenderMode = st_RenderMode; +} diff --git a/src/mesa/state_tracker/st_cb_feedback.h b/src/mesa/state_tracker/st_cb_feedback.h new file mode 100644 index 0000000000..2559ba3817 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_feedback.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_CB_FEEDBACK_H +#define ST_CB_FEEDBACK_H + + +extern void +st_init_feedback_functions(struct dd_function_table *functions); + + +#endif /* ST_CB_FEEDBACK_H */ diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c new file mode 100644 index 0000000000..d8f9537d2d --- /dev/null +++ b/src/mesa/state_tracker/st_cb_flush.c @@ -0,0 +1,138 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/context.h" +#include "st_context.h" +#include "st_cb_bitmap.h" +#include "st_cb_flush.h" +#include "st_cb_fbo.h" +#include "st_public.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_winsys.h" + + +static INLINE GLboolean +is_front_buffer_dirty(struct st_context *st) +{ + return st->frontbuffer_status == FRONT_STATUS_DIRTY; +} + + +/** + * Tell the winsys to display the front color buffer on-screen. + */ +static void +display_front_buffer(struct st_context *st) +{ + GLframebuffer *fb = st->ctx->DrawBuffer; + struct st_renderbuffer *strb + = st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); + struct pipe_surface *front_surf = strb->surface; + + /* Hook for copying "fake" frontbuffer if necessary: + */ + st->pipe->winsys->flush_frontbuffer( st->pipe->winsys, front_surf, + st->pipe->priv ); + + /* + st->frontbuffer_status = FRONT_STATUS_UNDEFINED; + */ +} + + +void st_flush( struct st_context *st, uint pipeFlushFlags, + struct pipe_fence_handle **fence ) +{ + FLUSH_VERTICES(st->ctx, 0); + + st_flush_bitmap_cache(st); + + st->pipe->flush( st->pipe, pipeFlushFlags, fence ); +} + + +/** + * Flush, and wait for completion. + */ +void st_finish( struct st_context *st ) +{ + struct pipe_fence_handle *fence = NULL; + + st_flush(st, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, &fence); + + st->pipe->winsys->fence_finish(st->pipe->winsys, fence, 0); + st->pipe->winsys->fence_reference(st->pipe->winsys, &fence, NULL); +} + + + +/** + * Called via ctx->Driver.Flush() + */ +static void st_glFlush(GLcontext *ctx) +{ + struct st_context *st = ctx->st; + + if (is_front_buffer_dirty(st)) { + st_finish(st); + display_front_buffer(st); + } + else { + st_flush(st, PIPE_FLUSH_RENDER_CACHE, NULL); + } +} + + +/** + * Called via ctx->Driver.Finish() + */ +static void st_glFinish(GLcontext *ctx) +{ + struct st_context *st = ctx->st; + + st_finish(st); + + if (is_front_buffer_dirty(st)) { + display_front_buffer(st); + } +} + + +void st_init_flush_functions(struct dd_function_table *functions) +{ + functions->Flush = st_glFlush; + functions->Finish = st_glFinish; +} diff --git a/src/mesa/state_tracker/st_cb_flush.h b/src/mesa/state_tracker/st_cb_flush.h new file mode 100644 index 0000000000..c26f779225 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_flush.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_CB_FLUSH_H +#define ST_CB_FLUSH_H + + +extern void +st_init_flush_functions(struct dd_function_table *functions); + + +#endif /* ST_CB_FLUSH_H */ + diff --git a/src/mesa/state_tracker/st_cb_get.c b/src/mesa/state_tracker/st_cb_get.c new file mode 100644 index 0000000000..e7d7f03bc9 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_get.c @@ -0,0 +1,97 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * glGet functions + * + * \author Brian Paul + */ + +#include "main/imports.h" +#include "main/context.h" + +#include "pipe/p_defines.h" + +#include "st_cb_fbo.h" +#include "st_cb_get.h" + + + +/** + * Examine the current color read buffer format to determine + * which GL pixel format/type combo is the best match. + */ +static void +get_preferred_read_format_type(GLcontext *ctx, GLint *format, GLint *type) +{ + struct gl_framebuffer *fb = ctx->ReadBuffer; + struct st_renderbuffer *strb = st_renderbuffer(fb->_ColorReadBuffer); + + /* defaults */ + *format = ctx->Const.ColorReadFormat; + *type = ctx->Const.ColorReadType; + + if (strb) { + /* XXX could add more cases here... */ + if (strb->format == PIPE_FORMAT_A8R8G8B8_UNORM) { + *format = GL_BGRA; + if (_mesa_little_endian()) + *type = GL_UNSIGNED_INT_8_8_8_8_REV; + else + *type = GL_UNSIGNED_INT_8_8_8_8; + } + } +} + + +/** + * We only intercept the OES preferred ReadPixels format/type. + * Everything else goes to the default _mesa_GetIntegerv. + */ +static GLboolean +st_GetIntegerv(GLcontext *ctx, GLenum pname, GLint *params) +{ + GLint dummy; + + switch (pname) { + case GL_IMPLEMENTATION_COLOR_READ_TYPE_OES: + get_preferred_read_format_type(ctx, &dummy, params); + return GL_TRUE; + case GL_IMPLEMENTATION_COLOR_READ_FORMAT_OES: + get_preferred_read_format_type(ctx, params, &dummy); + return GL_TRUE; + default: + return GL_FALSE; + } +} + + +void st_init_get_functions(struct dd_function_table *functions) +{ + functions->GetIntegerv = st_GetIntegerv; +} diff --git a/src/mesa/state_tracker/st_cb_get.h b/src/mesa/state_tracker/st_cb_get.h new file mode 100644 index 0000000000..8e9f3e9306 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_get.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_CB_GET_H +#define ST_CB_GET_H + + +extern void +st_init_get_functions(struct dd_function_table *functions); + + +#endif diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c new file mode 100644 index 0000000000..ea0fa20012 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_program.c @@ -0,0 +1,266 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/enums.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/program.h" +#include "shader/programopt.h" +#include "shader/shader_api.h" + +#include "cso_cache/cso_context.h" +#include "draw/draw_context.h" + +#include "st_context.h" +#include "st_program.h" +#include "st_atom_shader.h" +#include "st_cb_program.h" + + +static GLuint SerialNo = 1; + + +/** + * Called via ctx->Driver.BindProgram() to bind an ARB vertex or + * fragment program. + */ +static void st_bind_program( GLcontext *ctx, + GLenum target, + struct gl_program *prog ) +{ + struct st_context *st = st_context(ctx); + + switch (target) { + case GL_VERTEX_PROGRAM_ARB: + st->dirty.st |= ST_NEW_VERTEX_PROGRAM; + break; + case GL_FRAGMENT_PROGRAM_ARB: + st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; + break; + } +} + + +/** + * Called via ctx->Driver.UseProgram() to bind a linked GLSL program + * (vertex shader + fragment shader). + */ +static void st_use_program( GLcontext *ctx, + GLuint program ) +{ + struct st_context *st = st_context(ctx); + + st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; + st->dirty.st |= ST_NEW_VERTEX_PROGRAM; + + _mesa_use_program(ctx, program); +} + + + +/** + * Called via ctx->Driver.NewProgram() to allocate a new vertex or + * fragment program. + */ +static struct gl_program *st_new_program( GLcontext *ctx, + GLenum target, + GLuint id ) +{ + switch (target) { + case GL_VERTEX_PROGRAM_ARB: { + struct st_vertex_program *prog = CALLOC_STRUCT(st_vertex_program); + + prog->serialNo = SerialNo++; + + return _mesa_init_vertex_program( ctx, + &prog->Base, + target, + id ); + } + + case GL_FRAGMENT_PROGRAM_ARB: + case GL_FRAGMENT_PROGRAM_NV: { + struct st_fragment_program *prog = CALLOC_STRUCT(st_fragment_program); + + prog->serialNo = SerialNo++; + + return _mesa_init_fragment_program( ctx, + &prog->Base, + target, + id ); + } + + default: + assert(0); + return NULL; + } +} + + +void +st_delete_program(GLcontext *ctx, struct gl_program *prog) +{ + struct st_context *st = st_context(ctx); + + switch( prog->Target ) { + case GL_VERTEX_PROGRAM_ARB: + { + struct st_vertex_program *stvp = (struct st_vertex_program *) prog; + + if (stvp->driver_shader) { + cso_delete_vertex_shader(st->cso_context, stvp->driver_shader); + stvp->driver_shader = NULL; + } + + if (stvp->draw_shader) { +#if FEATURE_feedback || FEATURE_drawpix + /* this would only have been allocated for the RasterPos path */ + draw_delete_vertex_shader(st->draw, stvp->draw_shader); + stvp->draw_shader = NULL; +#endif + } + + if (stvp->state.tokens) { + FREE((void *) stvp->state.tokens); + stvp->state.tokens = NULL; + } + } + break; + case GL_FRAGMENT_PROGRAM_ARB: + { + struct st_fragment_program *stfp = (struct st_fragment_program *) prog; + + if (stfp->driver_shader) { + cso_delete_fragment_shader(st->cso_context, stfp->driver_shader); + stfp->driver_shader = NULL; + } + + if (stfp->state.tokens) { + FREE((void *) stfp->state.tokens); + stfp->state.tokens = NULL; + } + + if (stfp->bitmap_program) { + struct gl_program *prg = &stfp->bitmap_program->Base.Base; + _mesa_reference_program(ctx, &prg, NULL); + stfp->bitmap_program = NULL; + } + + st_free_translated_vertex_programs(st, stfp->vertex_programs); + } + break; + default: + assert(0); /* problem */ + } + + /* delete base class */ + _mesa_delete_program( ctx, prog ); +} + + +static GLboolean st_is_program_native( GLcontext *ctx, + GLenum target, + struct gl_program *prog ) +{ + return GL_TRUE; +} + + +static void st_program_string_notify( GLcontext *ctx, + GLenum target, + struct gl_program *prog ) +{ + struct st_context *st = st_context(ctx); + + if (target == GL_FRAGMENT_PROGRAM_ARB) { + struct st_fragment_program *stfp = (struct st_fragment_program *) prog; + + stfp->serialNo++; + + if (stfp->driver_shader) { + cso_delete_fragment_shader(st->cso_context, stfp->driver_shader); + stfp->driver_shader = NULL; + } + + if (stfp->state.tokens) { + FREE((void *) stfp->state.tokens); + stfp->state.tokens = NULL; + } + + stfp->param_state = stfp->Base.Base.Parameters->StateFlags; + + if (st->fp == stfp) + st->dirty.st |= ST_NEW_FRAGMENT_PROGRAM; + } + else if (target == GL_VERTEX_PROGRAM_ARB) { + struct st_vertex_program *stvp = (struct st_vertex_program *) prog; + + stvp->serialNo++; + + if (stvp->driver_shader) { + cso_delete_vertex_shader(st->cso_context, stvp->driver_shader); + stvp->driver_shader = NULL; + } + + if (stvp->draw_shader) { +#if FEATURE_feedback || FEATURE_drawpix + /* this would only have been allocated for the RasterPos path */ + draw_delete_vertex_shader(st->draw, stvp->draw_shader); + stvp->draw_shader = NULL; +#endif + } + + if (stvp->state.tokens) { + FREE((void *) stvp->state.tokens); + stvp->state.tokens = NULL; + } + + stvp->param_state = stvp->Base.Base.Parameters->StateFlags; + + if (st->vp == stvp) + st->dirty.st |= ST_NEW_VERTEX_PROGRAM; + } +} + + + +void st_init_program_functions(struct dd_function_table *functions) +{ + functions->BindProgram = st_bind_program; + functions->UseProgram = st_use_program; + functions->NewProgram = st_new_program; + functions->DeleteProgram = st_delete_program; + functions->IsProgramNative = st_is_program_native; + functions->ProgramStringNotify = st_program_string_notify; +} diff --git a/src/mesa/state_tracker/st_cb_program.h b/src/mesa/state_tracker/st_cb_program.h new file mode 100644 index 0000000000..0de96f2fd2 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_program.h @@ -0,0 +1,39 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef ST_CB_PROGRAM_H +#define ST_CB_PROGRAM_H + + +extern void +st_init_program_functions(struct dd_function_table *functions); + +extern void +st_delete_program(GLcontext *ctx, struct gl_program *prog); + + +#endif diff --git a/src/mesa/state_tracker/st_cb_queryobj.c b/src/mesa/state_tracker/st_cb_queryobj.c new file mode 100644 index 0000000000..21c2c7dd9f --- /dev/null +++ b/src/mesa/state_tracker/st_cb_queryobj.c @@ -0,0 +1,171 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * glBegin/EndQuery interface to pipe + * + * \author Brian Paul + */ + + +#include "main/imports.h" +#include "main/context.h" +#include "main/image.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "st_context.h" +#include "st_cb_queryobj.h" +#include "st_public.h" + + +struct st_query_object +{ + struct gl_query_object base; + struct pipe_query *pq; +}; + + +/** + * Cast wrapper + */ +static struct st_query_object * +st_query_object(struct gl_query_object *q) +{ + return (struct st_query_object *) q; +} + + +static struct gl_query_object * +st_NewQueryObject(GLcontext *ctx, GLuint id) +{ + struct st_query_object *stq = CALLOC_STRUCT(st_query_object); + if (stq) { + stq->base.Id = id; + stq->base.Ready = GL_TRUE; + stq->pq = NULL; + return &stq->base; + } + return NULL; +} + + + +static void +st_DeleteQuery(GLcontext *ctx, struct gl_query_object *q) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct st_query_object *stq = st_query_object(q); + + if (stq->pq) { + pipe->destroy_query(pipe, stq->pq); + stq->pq = NULL; + } + + FREE(stq); +} + + +static void +st_BeginQuery(GLcontext *ctx, struct gl_query_object *q) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct st_query_object *stq = st_query_object(q); + + switch (q->Target) { + case GL_SAMPLES_PASSED_ARB: + if (!stq->pq) + stq->pq = pipe->create_query( pipe, PIPE_QUERY_OCCLUSION_COUNTER ); + break; + default: + assert(0); + return; + } + + pipe->begin_query(pipe, stq->pq); +} + + +static void +st_EndQuery(GLcontext *ctx, struct gl_query_object *q) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct st_query_object *stq = st_query_object(q); + + pipe->end_query(pipe, stq->pq); +} + + +static void +st_WaitQuery(GLcontext *ctx, struct gl_query_object *q) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct st_query_object *stq = st_query_object(q); + + /* this function should only be called if we don't have a ready result */ + assert(!stq->base.Ready); + + while (!stq->base.Ready && + !pipe->get_query_result(pipe, + stq->pq, + TRUE, + &q->Result)) + { + /* nothing */ + } + + q->Ready = GL_TRUE; +} + + +static void +st_CheckQuery(GLcontext *ctx, struct gl_query_object *q) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct st_query_object *stq = st_query_object(q); + + if (!q->Ready) { + q->Ready = pipe->get_query_result(pipe, + stq->pq, + FALSE, + &q->Result); + } +} + + + + +void st_init_query_functions(struct dd_function_table *functions) +{ + functions->NewQueryObject = st_NewQueryObject; + functions->DeleteQuery = st_DeleteQuery; + functions->BeginQuery = st_BeginQuery; + functions->EndQuery = st_EndQuery; + functions->WaitQuery = st_WaitQuery; + functions->CheckQuery = st_CheckQuery; +} diff --git a/src/mesa/state_tracker/st_cb_queryobj.h b/src/mesa/state_tracker/st_cb_queryobj.h new file mode 100644 index 0000000000..9220a212b6 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_queryobj.h @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef ST_CB_QUERYOBJ_H +#define ST_CB_QUERYOBJ_H + + +extern void +st_init_query_functions(struct dd_function_table *functions); + + +#endif diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c new file mode 100644 index 0000000000..3b30c2a61b --- /dev/null +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -0,0 +1,258 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * glRasterPos implementation. Basically render a GL_POINT with our + * private draw module. Plug in a special "rasterpos" stage at the end + * of the 'draw' pipeline to capture the results and update the current + * raster pos attributes. + * + * Authors: + * Brian Paul + */ + + +#include "main/imports.h" +#include "main/macros.h" +#include "main/feedback.h" + +#include "st_context.h" +#include "st_atom.h" +#include "st_draw.h" +#include "st_cb_rasterpos.h" +#include "st_draw.h" +#include "draw/draw_context.h" +#include "draw/draw_pipe.h" +#include "shader/prog_instruction.h" +#include "vbo/vbo.h" + + + +/** + * Our special drawing pipeline stage (replaces rasterization). + */ +struct rastpos_stage +{ + struct draw_stage stage; /**< Base class */ + GLcontext *ctx; /**< Rendering context */ + + /* vertex attrib info we can setup once and re-use */ + struct gl_client_array array[VERT_ATTRIB_MAX]; + const struct gl_client_array *arrays[VERT_ATTRIB_MAX]; + struct _mesa_prim prim; +}; + + +static INLINE struct rastpos_stage * +rastpos_stage( struct draw_stage *stage ) +{ + return (struct rastpos_stage *) stage; +} + +static void +rastpos_flush( struct draw_stage *stage, unsigned flags ) +{ + /* no-op */ +} + +static void +rastpos_reset_stipple_counter( struct draw_stage *stage ) +{ + /* no-op */ +} + +static void +rastpos_tri( struct draw_stage *stage, struct prim_header *prim ) +{ + /* should never get here */ + assert(0); +} + +static void +rastpos_line( struct draw_stage *stage, struct prim_header *prim ) +{ + /* should never get here */ + assert(0); +} + +static void +rastpos_destroy(struct draw_stage *stage) +{ + free(stage); +} + + +/** + * Update a raster pos attribute from the vertex result if it's present, + * else copy the current attrib. + */ +static void +update_attrib(GLcontext *ctx, const GLuint *outputMapping, + const struct vertex_header *vert, + GLfloat *dest, + GLuint result, GLuint defaultAttrib) +{ + const GLfloat *src; + const GLuint k = outputMapping[result]; + if (k != ~0U) + src = vert->data[k]; + else + src = ctx->Current.Attrib[defaultAttrib]; + COPY_4V(dest, src); +} + + +/** + * Normally, this function would render a GL_POINT. + */ +static void +rastpos_point(struct draw_stage *stage, struct prim_header *prim) +{ + struct rastpos_stage *rs = rastpos_stage(stage); + GLcontext *ctx = rs->ctx; + struct st_context *st = ctx->st; + const GLfloat height = (GLfloat) ctx->DrawBuffer->Height; + const GLuint *outputMapping = st->vertex_result_to_slot; + const GLfloat *pos; + GLuint i; + + /* if we get here, we didn't get clipped */ + ctx->Current.RasterPosValid = GL_TRUE; + + /* update raster pos */ + pos = prim->v[0]->data[0]; + ctx->Current.RasterPos[0] = pos[0]; + ctx->Current.RasterPos[1] = height - pos[1]; /* invert Y */ + ctx->Current.RasterPos[2] = pos[2]; + ctx->Current.RasterPos[3] = pos[3]; + + /* update other raster attribs */ + update_attrib(ctx, outputMapping, prim->v[0], + ctx->Current.RasterColor, + VERT_RESULT_COL0, VERT_ATTRIB_COLOR0); + + update_attrib(ctx, outputMapping, prim->v[0], + ctx->Current.RasterSecondaryColor, + VERT_RESULT_COL1, VERT_ATTRIB_COLOR1); + + for (i = 0; i < MAX_TEXTURE_UNITS; i++) { + update_attrib(ctx, outputMapping, prim->v[0], + ctx->Current.RasterTexCoords[i], + VERT_RESULT_TEX0 + i, VERT_ATTRIB_TEX0 + i); + } + + if (ctx->RenderMode == GL_SELECT) { + _mesa_update_hitflag( ctx, ctx->Current.RasterPos[2] ); + } +} + + +/** + * Create rasterpos "drawing" stage. + */ +static struct rastpos_stage * +new_draw_rastpos_stage(GLcontext *ctx, struct draw_context *draw) +{ + struct rastpos_stage *rs = CALLOC_STRUCT(rastpos_stage); + GLuint i; + + rs->stage.draw = draw; + rs->stage.next = NULL; + rs->stage.point = rastpos_point; + rs->stage.line = rastpos_line; + rs->stage.tri = rastpos_tri; + rs->stage.flush = rastpos_flush; + rs->stage.destroy = rastpos_destroy; + rs->stage.reset_stipple_counter = rastpos_reset_stipple_counter; + rs->stage.destroy = rastpos_destroy; + rs->ctx = ctx; + + for (i = 0; i < VERT_ATTRIB_MAX; i++) { + rs->array[i].Size = 4; + rs->array[i].Type = GL_FLOAT; + rs->array[i].Stride = 0; + rs->array[i].StrideB = 0; + rs->array[i].Ptr = (GLubyte *) ctx->Current.Attrib[i]; + rs->array[i].Enabled = GL_TRUE; + rs->array[i].Normalized = GL_TRUE; + rs->array[i].BufferObj = NULL; + rs->arrays[i] = &rs->array[i]; + } + + rs->prim.mode = GL_POINTS; + rs->prim.indexed = 0; + rs->prim.begin = 1; + rs->prim.end = 1; + rs->prim.weak = 0; + rs->prim.start = 0; + rs->prim.count = 1; + + return rs; +} + + +static void +st_RasterPos(GLcontext *ctx, const GLfloat v[4]) +{ + struct st_context *st = ctx->st; + struct draw_context *draw = st->draw; + struct rastpos_stage *rs; + + if (st->rastpos_stage) { + /* get rastpos stage info */ + rs = rastpos_stage(st->rastpos_stage); + } + else { + /* create rastpos draw stage */ + rs = new_draw_rastpos_stage(ctx, draw); + st->rastpos_stage = &rs->stage; + } + + /* plug our rastpos stage into the draw module */ + draw_set_rasterize_stage(st->draw, st->rastpos_stage); + + /* make sure everything's up to date */ + st_validate_state(ctx->st); + + /* This will get set only if rastpos_point(), above, gets called */ + ctx->Current.RasterPosValid = GL_FALSE; + + /* All vertex attribs but position were previously initialized above. + * Just plug in position pointer now. + */ + rs->array[0].Ptr = (GLubyte *) v; + + /* draw the point */ + st_feedback_draw_vbo(ctx, rs->arrays, &rs->prim, 1, NULL, 0, 1); +} + + + +void st_init_rasterpos_functions(struct dd_function_table *functions) +{ + functions->RasterPos = st_RasterPos; +} diff --git a/src/mesa/state_tracker/st_cb_rasterpos.h b/src/mesa/state_tracker/st_cb_rasterpos.h new file mode 100644 index 0000000000..2b992e1405 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_rasterpos.h @@ -0,0 +1,33 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef ST_CB_RASTERPOS_H +#define ST_CB_RASTERPOS_H + +extern void st_init_rasterpos_functions(struct dd_function_table *functions); + +#endif diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c new file mode 100644 index 0000000000..c801532788 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -0,0 +1,466 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * glReadPixels interface to pipe + * + * \author Brian Paul + */ + + +#include "main/imports.h" +#include "main/bufferobj.h" +#include "main/context.h" +#include "main/image.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" +#include "st_context.h" +#include "st_cb_bitmap.h" +#include "st_cb_readpixels.h" +#include "st_cb_fbo.h" +#include "st_format.h" +#include "st_public.h" + + +/** + * Special case for reading stencil buffer. + * For color/depth we use get_tile(). For stencil, map the stencil buffer. + */ +void +st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, + GLsizei width, GLsizei height, GLenum type, + const struct gl_pixelstore_attrib *packing, + GLvoid *pixels) +{ + struct gl_framebuffer *fb = ctx->ReadBuffer; + struct pipe_screen *screen = ctx->st->pipe->screen; + struct st_renderbuffer *strb = st_renderbuffer(fb->_StencilBuffer); + struct pipe_surface *ps; + ubyte *stmap; + GLint j; + + /* Create a CPU-READ surface/view into the renderbuffer's texture */ + ps = screen->get_tex_surface(screen, strb->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_READ); + + /* map the stencil buffer */ + stmap = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); + + /* width should never be > MAX_WIDTH since we did clipping earlier */ + ASSERT(width <= MAX_WIDTH); + + /* process image row by row */ + for (j = 0; j < height; j++, y++) { + GLvoid *dest; + GLstencil values[MAX_WIDTH]; + GLint srcY; + + if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { + srcY = ctx->DrawBuffer->Height - y - 1; + } + else { + srcY = y; + } + + /* get stencil values */ + switch (ps->format) { + case PIPE_FORMAT_S8_UNORM: + { + const ubyte *src = stmap + srcY * ps->stride + x; + memcpy(values, src, width); + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + { + const uint *src = (uint *) (stmap + srcY * ps->stride + x*4); + GLint k; + for (k = 0; k < width; k++) { + values[k] = src[k] >> 24; + } + } + break; + case PIPE_FORMAT_Z24S8_UNORM: + { + const uint *src = (uint *) (stmap + srcY * ps->stride + x*4); + GLint k; + for (k = 0; k < width; k++) { + values[k] = src[k] & 0xff; + } + } + break; + default: + assert(0); + } + + /* store */ + dest = _mesa_image_address2d(packing, pixels, width, height, + GL_STENCIL_INDEX, type, j, 0); + + _mesa_pack_stencil_span(ctx, width, type, dest, values, packing); + } + + + /* unmap the stencil buffer */ + screen->surface_unmap(screen, ps); + pipe_surface_reference(&ps, NULL); +} + + +/** + * Return renderbuffer to use for reading color pixels for glRead/CopyPixel + * commands. + * Special care is needed for the front buffer. + */ +struct st_renderbuffer * +st_get_color_read_renderbuffer(GLcontext *ctx) +{ + struct gl_framebuffer *fb = ctx->ReadBuffer; + struct st_renderbuffer *strb = + st_renderbuffer(fb->_ColorReadBuffer); + struct st_renderbuffer *front = + st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); + + if (strb == front + && ctx->st->frontbuffer_status == FRONT_STATUS_COPY_OF_BACK) { + /* reading from front color buffer, which is a logical copy of the + * back color buffer. + */ + struct st_renderbuffer *back = + st_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer); + strb = back; + } + + return strb; +} + + +/** + * Try to do glReadPixels in a fast manner for common cases. + * \return GL_TRUE for success, GL_FALSE for failure + */ +static GLboolean +st_fast_readpixels(GLcontext *ctx, struct st_renderbuffer *strb, + GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, + GLvoid *dest) +{ + enum combination { + A8R8G8B8_UNORM_TO_RGBA_UBYTE, + A8R8G8B8_UNORM_TO_RGB_UBYTE, + A8R8G8B8_UNORM_TO_BGRA_UINT + } combo; + + if (ctx->_ImageTransferState) + return GL_FALSE; + + if (strb->format == PIPE_FORMAT_A8R8G8B8_UNORM && + format == GL_RGBA && type == GL_UNSIGNED_BYTE) { + combo = A8R8G8B8_UNORM_TO_RGBA_UBYTE; + } + else if (strb->format == PIPE_FORMAT_A8R8G8B8_UNORM && + format == GL_RGB && type == GL_UNSIGNED_BYTE) { + combo = A8R8G8B8_UNORM_TO_RGB_UBYTE; + } + else if (strb->format == PIPE_FORMAT_A8R8G8B8_UNORM && + format == GL_BGRA && type == GL_UNSIGNED_INT_8_8_8_8_REV) { + combo = A8R8G8B8_UNORM_TO_BGRA_UINT; + } + else { + return GL_FALSE; + } + + /*printf("st_fast_readpixels combo %d\n", (GLint) combo);*/ + + { + struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_surface *surf; + const GLubyte *map; + GLubyte *dst; + GLint row, col, dy, dstStride; + + surf = screen->get_tex_surface(screen, strb->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_READ); + if (!surf) { + return GL_FALSE; + } + + map = screen->surface_map(screen, surf, PIPE_BUFFER_USAGE_CPU_READ); + if (!map) { + pipe_surface_reference(&surf, NULL); + return GL_FALSE; + } + + if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { + y = surf->height - y - 1; + dy = -1; + } + else { + dy = 1; + } + + dst = _mesa_image_address2d(pack, dest, width, height, + format, type, 0, 0); + dstStride = _mesa_image_row_stride(pack, width, format, type); + + switch (combo) { + case A8R8G8B8_UNORM_TO_RGBA_UBYTE: + for (row = 0; row < height; row++) { + const GLubyte *src = map + y * surf->stride + x * 4; + for (col = 0; col < width; col++) { + GLuint pixel = ((GLuint *) src)[col]; + dst[col*4+0] = (pixel >> 16) & 0xff; + dst[col*4+1] = (pixel >> 8) & 0xff; + dst[col*4+2] = (pixel >> 0) & 0xff; + dst[col*4+3] = (pixel >> 24) & 0xff; + } + dst += dstStride; + y += dy; + } + break; + case A8R8G8B8_UNORM_TO_RGB_UBYTE: + for (row = 0; row < height; row++) { + const GLubyte *src = map + y * surf->stride + x * 4; + for (col = 0; col < width; col++) { + GLuint pixel = ((GLuint *) src)[col]; + dst[col*3+0] = (pixel >> 16) & 0xff; + dst[col*3+1] = (pixel >> 8) & 0xff; + dst[col*3+2] = (pixel >> 0) & 0xff; + } + dst += dstStride; + y += dy; + } + break; + case A8R8G8B8_UNORM_TO_BGRA_UINT: + for (row = 0; row < height; row++) { + const GLubyte *src = map + y * surf->stride + x * 4; + memcpy(dst, src, 4 * width); + dst += dstStride; + y += dy; + } + break; + default: + ; /* nothing */ + } + + screen->surface_unmap(screen, surf); + pipe_surface_reference(&surf, NULL); + } + + return GL_TRUE; +} + + +/** + * Do glReadPixels by getting rows from the framebuffer surface with + * get_tile(). Convert to requested format/type with Mesa image routines. + * Image transfer ops are done in software too. + */ +static void +st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, + GLenum format, GLenum type, + const struct gl_pixelstore_attrib *pack, + GLvoid *dest) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; + GLfloat temp[MAX_WIDTH][4]; + const GLbitfield transferOps = ctx->_ImageTransferState; + GLsizei i, j; + GLint yStep, dfStride; + GLfloat *df; + struct st_renderbuffer *strb; + struct gl_pixelstore_attrib clippedPacking = *pack; + struct pipe_surface *surf; + + assert(ctx->ReadBuffer->Width > 0); + + /* XXX convolution not done yet */ + assert((transferOps & IMAGE_CONVOLUTION_BIT) == 0); + + /* Do all needed clipping here, so that we can forget about it later */ + if (!_mesa_clip_readpixels(ctx, &x, &y, &width, &height, &clippedPacking)) { + /* The ReadPixels surface is totally outside the window bounds */ + return; + } + + dest = _mesa_map_readpix_pbo(ctx, &clippedPacking, dest); + if (!dest) + return; + + st_flush_bitmap_cache(ctx->st); + + /* make sure rendering has completed */ + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + + if (format == GL_STENCIL_INDEX) { + st_read_stencil_pixels(ctx, x, y, width, height, type, pack, dest); + return; + } + else if (format == GL_DEPTH_COMPONENT) { + strb = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer); + } + else { + /* Read color buffer */ + strb = st_get_color_read_renderbuffer(ctx); + } + + if (!strb) + return; + + /* try a fast-path readpixels before anything else */ + if (st_fast_readpixels(ctx, strb, x, y, width, height, + format, type, pack, dest)) { + /* success! */ + _mesa_unmap_readpix_pbo(ctx, &clippedPacking); + return; + } + + if (format == GL_RGBA && type == GL_FLOAT) { + /* write tile(row) directly into user's buffer */ + df = (GLfloat *) _mesa_image_address2d(&clippedPacking, dest, width, + height, format, type, 0, 0); + dfStride = width * 4; + } + else { + /* write tile(row) into temp row buffer */ + df = (GLfloat *) temp; + dfStride = 0; + } + + /* determine bottom-to-top vs. top-to-bottom order */ + if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { + y = strb->Base.Height - 1 - y; + yStep = -1; + } + else { + yStep = 1; + } + + /* Create a CPU-READ surface/view into the renderbuffer's texture */ + surf = screen->get_tex_surface(screen, strb->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_READ); + + /* + * Copy pixels from pipe_surface to user memory + */ + { + /* dest of first pixel in client memory */ + GLubyte *dst = _mesa_image_address2d(&clippedPacking, dest, width, + height, format, type, 0, 0); + /* dest row stride */ + const GLint dstStride = _mesa_image_row_stride(&clippedPacking, width, + format, type); + + if (surf->format == PIPE_FORMAT_S8Z24_UNORM || + surf->format == PIPE_FORMAT_X8Z24_UNORM) { + if (format == GL_DEPTH_COMPONENT) { + for (i = 0; i < height; i++) { + GLuint ztemp[MAX_WIDTH]; + GLfloat zfloat[MAX_WIDTH]; + const double scale = 1.0 / ((1 << 24) - 1); + pipe_get_tile_raw(surf, x, y, width, 1, ztemp, 0); + y += yStep; + for (j = 0; j < width; j++) { + zfloat[j] = (float) (scale * (ztemp[j] & 0xffffff)); + } + _mesa_pack_depth_span(ctx, width, dst, type, + zfloat, &clippedPacking); + dst += dstStride; + } + } + else { + /* untested, but simple: */ + assert(format == GL_DEPTH_STENCIL_EXT); + for (i = 0; i < height; i++) { + pipe_get_tile_raw(surf, x, y, width, 1, dst, 0); + y += yStep; + dst += dstStride; + } + } + } + else if (surf->format == PIPE_FORMAT_Z16_UNORM) { + for (i = 0; i < height; i++) { + GLushort ztemp[MAX_WIDTH]; + GLfloat zfloat[MAX_WIDTH]; + const double scale = 1.0 / 0xffff; + pipe_get_tile_raw(surf, x, y, width, 1, ztemp, 0); + y += yStep; + for (j = 0; j < width; j++) { + zfloat[j] = (float) (scale * ztemp[j]); + } + _mesa_pack_depth_span(ctx, width, dst, type, + zfloat, &clippedPacking); + dst += dstStride; + } + } + else if (surf->format == PIPE_FORMAT_Z32_UNORM) { + for (i = 0; i < height; i++) { + GLuint ztemp[MAX_WIDTH]; + GLfloat zfloat[MAX_WIDTH]; + const double scale = 1.0 / 0xffffffff; + pipe_get_tile_raw(surf, x, y, width, 1, ztemp, 0); + y += yStep; + for (j = 0; j < width; j++) { + zfloat[j] = (float) (scale * ztemp[j]); + } + _mesa_pack_depth_span(ctx, width, dst, type, + zfloat, &clippedPacking); + dst += dstStride; + } + } + else { + /* RGBA format */ + /* Do a row at a time to flip image data vertically */ + for (i = 0; i < height; i++) { + pipe_get_tile_rgba(surf, x, y, width, 1, df); + y += yStep; + df += dfStride; + if (!dfStride) { + _mesa_pack_rgba_span_float(ctx, width, temp, format, type, dst, + &clippedPacking, transferOps); + dst += dstStride; + } + } + } + } + + pipe_surface_reference(&surf, NULL); + + _mesa_unmap_readpix_pbo(ctx, &clippedPacking); +} + + +void st_init_readpixels_functions(struct dd_function_table *functions) +{ + functions->ReadPixels = st_readpixels; +} diff --git a/src/mesa/state_tracker/st_cb_readpixels.h b/src/mesa/state_tracker/st_cb_readpixels.h new file mode 100644 index 0000000000..9e151be51f --- /dev/null +++ b/src/mesa/state_tracker/st_cb_readpixels.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_CB_READPIXELS_H +#define ST_CB_READPIXELS_H + +extern struct st_renderbuffer * +st_get_color_read_renderbuffer(GLcontext *ctx); + +extern void +st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, + GLsizei width, GLsizei height, GLenum type, + const struct gl_pixelstore_attrib *packing, + GLvoid *pixels); + +extern void +st_init_readpixels_functions(struct dd_function_table *functions); + + +#endif /* ST_CB_READPIXELS_H */ diff --git a/src/mesa/state_tracker/st_cb_strings.c b/src/mesa/state_tracker/st_cb_strings.c new file mode 100644 index 0000000000..09545aa8fb --- /dev/null +++ b/src/mesa/state_tracker/st_cb_strings.c @@ -0,0 +1,86 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "main/glheader.h" +#include "main/macros.h" +#include "main/version.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_winsys.h" +#include "st_context.h" +#include "st_cb_strings.h" + +#define ST_VERSION_STRING "0.2" + +static const GLubyte * +st_get_string(GLcontext * ctx, GLenum name) +{ + struct st_context *st = st_context(ctx); + struct pipe_screen *screen = st->pipe->screen; + + switch (name) { + case GL_VENDOR: { + const char *vendor = screen->get_vendor( screen ); + const char *tungsten = "Tungsten Graphics, Inc."; + + /* Tungsten Graphics, Inc. developed the state_tracker module + * (and much of Mesa), but the driver itself may come from elsewhere. + * The additional string allows "and XyzCorp" to reflect this. + */ + if (vendor && strcmp(vendor, tungsten) != 0) + util_snprintf(st->vendor, sizeof(st->vendor), + "%s and %s", tungsten, vendor); + else + util_snprintf(st->vendor, sizeof(st->vendor), "%s", tungsten); + + return (GLubyte *) st->vendor; + } + + case GL_RENDERER: + util_snprintf(st->renderer, sizeof(st->renderer), "Gallium %s, %s on %s", + ST_VERSION_STRING, + screen->get_name( screen ), + screen->winsys->get_name( screen->winsys )); + + return (GLubyte *) st->renderer; + + default: + return NULL; + } +} + + +void st_init_string_functions(struct dd_function_table *functions) +{ + functions->GetString = st_get_string; +} diff --git a/src/mesa/state_tracker/st_cb_strings.h b/src/mesa/state_tracker/st_cb_strings.h new file mode 100644 index 0000000000..3b765aaa59 --- /dev/null +++ b/src/mesa/state_tracker/st_cb_strings.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_CB_STRINGS_H +#define ST_CB_STRINGS_H + + +extern void +st_init_string_functions(struct dd_function_table *functions); + + +#endif /* ST_CB_CLEAR_H */ + diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c new file mode 100644 index 0000000000..d08229b57a --- /dev/null +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -0,0 +1,1551 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/imports.h" +#if FEATURE_convolve +#include "main/convolve.h" +#endif +#include "main/enums.h" +#include "main/image.h" +#include "main/macros.h" +#include "main/mipmap.h" +#include "main/pixel.h" +#include "main/texcompress.h" +#include "main/texformat.h" +#include "main/teximage.h" +#include "main/texobj.h" +#include "main/texstore.h" + +#include "state_tracker/st_context.h" +#include "state_tracker/st_cb_fbo.h" +#include "state_tracker/st_cb_texture.h" +#include "state_tracker/st_format.h" +#include "state_tracker/st_public.h" +#include "state_tracker/st_texture.h" +#include "state_tracker/st_gen_mipmap.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" +#include "util/u_blit.h" + + +#define DBG if (0) printf + + +static enum pipe_texture_target +gl_target_to_pipe(GLenum target) +{ + switch (target) { + case GL_TEXTURE_1D: + return PIPE_TEXTURE_1D; + + case GL_TEXTURE_2D: + case GL_TEXTURE_RECTANGLE_NV: + return PIPE_TEXTURE_2D; + + case GL_TEXTURE_3D: + return PIPE_TEXTURE_3D; + + case GL_TEXTURE_CUBE_MAP_ARB: + return PIPE_TEXTURE_CUBE; + + default: + assert(0); + return 0; + } +} + + +/** + * Return nominal bytes per texel for a compressed format, 0 for non-compressed + * format. + */ +static int +compressed_num_bytes(GLuint mesaFormat) +{ + switch(mesaFormat) { +#if FEATURE_texture_fxt1 + case MESA_FORMAT_RGB_FXT1: + case MESA_FORMAT_RGBA_FXT1: +#endif +#if FEATURE_texture_s3tc + case MESA_FORMAT_RGB_DXT1: + case MESA_FORMAT_RGBA_DXT1: + return 2; + case MESA_FORMAT_RGBA_DXT3: + case MESA_FORMAT_RGBA_DXT5: + return 4; +#endif + default: + return 0; + } +} + + +/** called via ctx->Driver.NewTextureImage() */ +static struct gl_texture_image * +st_NewTextureImage(GLcontext * ctx) +{ + DBG("%s\n", __FUNCTION__); + (void) ctx; + return (struct gl_texture_image *) CALLOC_STRUCT(st_texture_image); +} + + +/** called via ctx->Driver.NewTextureObject() */ +static struct gl_texture_object * +st_NewTextureObject(GLcontext * ctx, GLuint name, GLenum target) +{ + struct st_texture_object *obj = CALLOC_STRUCT(st_texture_object); + + DBG("%s\n", __FUNCTION__); + _mesa_initialize_texture_object(&obj->base, name, target); + + return &obj->base; +} + +/** called via ctx->Driver.DeleteTextureImage() */ +static void +st_DeleteTextureObject(GLcontext *ctx, + struct gl_texture_object *texObj) +{ + struct st_texture_object *stObj = st_texture_object(texObj); + if (stObj->pt) + pipe_texture_reference(&stObj->pt, NULL); + + _mesa_delete_texture_object(ctx, texObj); +} + + +/** called via ctx->Driver.FreeTexImageData() */ +static void +st_FreeTextureImageData(GLcontext * ctx, struct gl_texture_image *texImage) +{ + struct st_texture_image *stImage = st_texture_image(texImage); + + DBG("%s\n", __FUNCTION__); + + if (stImage->pt) { + pipe_texture_reference(&stImage->pt, NULL); + } + + if (texImage->Data) { + _mesa_align_free(texImage->Data); + texImage->Data = NULL; + } +} + + +/** + * From linux kernel i386 header files, copes with odd sizes better + * than COPY_DWORDS would: + * XXX Put this in src/mesa/main/imports.h ??? + */ +#if defined(i386) || defined(__i386__) +static INLINE void * +__memcpy(void *to, const void *from, size_t n) +{ + int d0, d1, d2; + __asm__ __volatile__("rep ; movsl\n\t" + "testb $2,%b4\n\t" + "je 1f\n\t" + "movsw\n" + "1:\ttestb $1,%b4\n\t" + "je 2f\n\t" + "movsb\n" "2:":"=&c"(d0), "=&D"(d1), "=&S"(d2) + :"0"(n / 4), "q"(n), "1"((long) to), "2"((long) from) + :"memory"); + return (to); +} +#else +#define __memcpy(a,b,c) memcpy(a,b,c) +#endif + + +/** + * The system memcpy (at least on ubuntu 5.10) has problems copying + * to agp (writecombined) memory from a source which isn't 64-byte + * aligned - there is a 4x performance falloff. + * + * The x86 __memcpy is immune to this but is slightly slower + * (10%-ish) than the system memcpy. + * + * The sse_memcpy seems to have a slight cliff at 64/32 bytes, but + * isn't much faster than x86_memcpy for agp copies. + * + * TODO: switch dynamically. + */ +static void * +do_memcpy(void *dest, const void *src, size_t n) +{ + if ((((unsigned) src) & 63) || (((unsigned) dest) & 63)) { + return __memcpy(dest, src, n); + } + else + return memcpy(dest, src, n); +} + + +static int +logbase2(int n) +{ + GLint i = 1, log2 = 0; + while (n > i) { + i *= 2; + log2++; + } + return log2; +} + + +/** + * Allocate a pipe_texture object for the given st_texture_object using + * the given st_texture_image to guess the mipmap size/levels. + * + * [comments...] + * Otherwise, store it in memory if (Border != 0) or (any dimension == + * 1). + * + * Otherwise, if max_level >= level >= min_level, create texture with + * space for images from min_level down to max_level. + * + * Otherwise, create texture with space for images from (level 0)..(1x1). + * Consider pruning this texture at a validation if the saving is worth it. + */ +static void +guess_and_alloc_texture(struct st_context *st, + struct st_texture_object *stObj, + const struct st_texture_image *stImage) +{ + GLuint firstLevel; + GLuint lastLevel; + GLuint width = stImage->base.Width2; /* size w/out border */ + GLuint height = stImage->base.Height2; + GLuint depth = stImage->base.Depth2; + GLuint i, comp_byte = 0; + enum pipe_format fmt; + + DBG("%s\n", __FUNCTION__); + + assert(!stObj->pt); + + if (stObj->pt && + (GLint) stImage->level > stObj->base.BaseLevel && + (stImage->base.Width == 1 || + (stObj->base.Target != GL_TEXTURE_1D && + stImage->base.Height == 1) || + (stObj->base.Target == GL_TEXTURE_3D && + stImage->base.Depth == 1))) + return; + + /* If this image disrespects BaseLevel, allocate from level zero. + * Usually BaseLevel == 0, so it's unlikely to happen. + */ + if ((GLint) stImage->level < stObj->base.BaseLevel) + firstLevel = 0; + else + firstLevel = stObj->base.BaseLevel; + + + /* Figure out image dimensions at start level. + */ + for (i = stImage->level; i > firstLevel; i--) { + if (width != 1) + width <<= 1; + if (height != 1) + height <<= 1; + if (depth != 1) + depth <<= 1; + } + + if (width == 0 || height == 0 || depth == 0) { + /* no texture needed */ + return; + } + + /* Guess a reasonable value for lastLevel. This is probably going + * to be wrong fairly often and might mean that we have to look at + * resizable buffers, or require that buffers implement lazy + * pagetable arrangements. + */ + if ((stObj->base.MinFilter == GL_NEAREST || + stObj->base.MinFilter == GL_LINEAR) && + stImage->level == firstLevel) { + lastLevel = firstLevel; + } + else { + GLuint l2width = logbase2(width); + GLuint l2height = logbase2(height); + GLuint l2depth = logbase2(depth); + lastLevel = firstLevel + MAX2(MAX2(l2width, l2height), l2depth); + } + + if (stImage->base.IsCompressed) + comp_byte = compressed_num_bytes(stImage->base.TexFormat->MesaFormat); + + fmt = st_mesa_format_to_pipe_format(stImage->base.TexFormat->MesaFormat); + stObj->pt = st_texture_create(st, + gl_target_to_pipe(stObj->base.Target), + fmt, + lastLevel, + width, + height, + depth, + comp_byte, + ( (pf_is_depth_stencil(fmt) ? + PIPE_TEXTURE_USAGE_DEPTH_STENCIL : + PIPE_TEXTURE_USAGE_RENDER_TARGET) | + PIPE_TEXTURE_USAGE_SAMPLER )); + + DBG("%s - success\n", __FUNCTION__); +} + + +/** + * Adjust pixel unpack params and image dimensions to strip off the + * texture border. + * Gallium doesn't support texture borders. They've seldem been used + * and seldom been implemented correctly anyway. + * \param unpackNew returns the new pixel unpack parameters + */ +static void +strip_texture_border(GLint border, + GLint *width, GLint *height, GLint *depth, + const struct gl_pixelstore_attrib *unpack, + struct gl_pixelstore_attrib *unpackNew) +{ + assert(border > 0); /* sanity check */ + + *unpackNew = *unpack; + + if (unpackNew->RowLength == 0) + unpackNew->RowLength = *width; + + if (depth && unpackNew->ImageHeight == 0) + unpackNew->ImageHeight = *height; + + unpackNew->SkipPixels += border; + if (height) + unpackNew->SkipRows += border; + if (depth) + unpackNew->SkipImages += border; + + assert(*width >= 3); + *width = *width - 2 * border; + if (height && *height >= 3) + *height = *height - 2 * border; + if (depth && *depth >= 3) + *depth = *depth - 2 * border; +} + + +/** + * Do glTexImage1/2/3D(). + */ +static void +st_TexImage(GLcontext * ctx, + GLint dims, + GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint depth, + GLint border, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *unpack, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, + GLsizei imageSize, int compressed) +{ + struct st_texture_object *stObj = st_texture_object(texObj); + struct st_texture_image *stImage = st_texture_image(texImage); + GLint postConvWidth, postConvHeight; + GLint texelBytes, sizeInBytes; + GLuint dstRowStride; + struct gl_pixelstore_attrib unpackNB; + + DBG("%s target %s level %d %dx%dx%d border %d\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(target), level, width, height, depth, border); + + /* gallium does not support texture borders, strip it off */ + if (border) { + strip_texture_border(border, &width, &height, &depth, + unpack, &unpackNB); + unpack = &unpackNB; + texImage->Width = width; + texImage->Height = height; + texImage->Depth = depth; + texImage->Border = 0; + border = 0; + } + + postConvWidth = width; + postConvHeight = height; + + stImage->face = _mesa_tex_target_to_face(target); + stImage->level = level; + +#if FEATURE_convolve + if (ctx->_ImageTransferState & IMAGE_CONVOLUTION_BIT) { + _mesa_adjust_image_for_convolution(ctx, dims, &postConvWidth, + &postConvHeight); + } +#endif + + /* choose the texture format */ + texImage->TexFormat = st_ChooseTextureFormat(ctx, internalFormat, + format, type); + + _mesa_set_fetch_functions(texImage, dims); + + if (texImage->TexFormat->TexelBytes == 0) { + /* must be a compressed format */ + texelBytes = 0; + texImage->IsCompressed = GL_TRUE; + texImage->CompressedSize = + ctx->Driver.CompressedTextureSize(ctx, texImage->Width, + texImage->Height, texImage->Depth, + texImage->TexFormat->MesaFormat); + } + else { + texelBytes = texImage->TexFormat->TexelBytes; + + /* Minimum pitch of 32 bytes */ + if (postConvWidth * texelBytes < 32) { + postConvWidth = 32 / texelBytes; + texImage->RowStride = postConvWidth; + } + + /* we'll set RowStride elsewhere when the texture is a "mapped" state */ + /*assert(texImage->RowStride == postConvWidth);*/ + } + + /* Release the reference to a potentially orphaned buffer. + * Release any old malloced memory. + */ + if (stImage->pt) { + pipe_texture_reference(&stImage->pt, NULL); + assert(!texImage->Data); + } + else if (texImage->Data) { + _mesa_align_free(texImage->Data); + } + + if (width == 0 || height == 0 || depth == 0) { + /* stop after freeing old image */ + return; + } + + /* If this is the only mipmap level in the texture, could call + * bmBufferData with NULL data to free the old block and avoid + * waiting on any outstanding fences. + */ + if (stObj->pt && + (stObj->teximage_realloc || + (/*stObj->pt->first_level == level &&*/ + stObj->pt->last_level == level && + stObj->pt->target != PIPE_TEXTURE_CUBE && + !st_texture_match_image(stObj->pt, &stImage->base, + stImage->face, stImage->level)))) { + + DBG("release it\n"); + pipe_texture_reference(&stObj->pt, NULL); + assert(!stObj->pt); + stObj->teximage_realloc = FALSE; + } + + if (!stObj->pt) { + guess_and_alloc_texture(ctx->st, stObj, stImage); + if (!stObj->pt) { + /* Probably out of memory. + * Try flushing any pending rendering, then retry. + */ + st_finish(ctx->st); + guess_and_alloc_texture(ctx->st, stObj, stImage); + if (!stObj->pt) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); + return; + } + } + } + + assert(!stImage->pt); + + if (stObj->pt && + st_texture_match_image(stObj->pt, &stImage->base, + stImage->face, stImage->level)) { + + pipe_texture_reference(&stImage->pt, stObj->pt); + assert(stImage->pt); + } + + if (!stImage->pt) + DBG("XXX: Image did not fit into texture - storing in local memory!\n"); + + /* st_CopyTexImage calls this function with pixels == NULL, with + * the expectation that the texture will be set up but nothing + * more will be done. This is where those calls return: + */ + if (compressed) { + pixels = _mesa_validate_pbo_compressed_teximage(ctx, imageSize, pixels, + unpack, + "glCompressedTexImage"); + } else { + pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, 1, + format, type, + pixels, unpack, "glTexImage"); + } + if (!pixels) + return; + + if (stImage->pt) { + texImage->Data = st_texture_image_map(ctx->st, stImage, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + if (stImage->surface) + dstRowStride = stImage->surface->stride; + } + else { + /* Allocate regular memory and store the image there temporarily. */ + if (texImage->IsCompressed) { + sizeInBytes = texImage->CompressedSize; + dstRowStride = + _mesa_compressed_row_stride(texImage->TexFormat->MesaFormat, width); + assert(dims != 3); + } + else { + dstRowStride = postConvWidth * texelBytes; + sizeInBytes = depth * dstRowStride * postConvHeight; + } + + texImage->Data = _mesa_align_malloc(sizeInBytes, 16); + } + + if (!texImage->Data) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); + return; + } + + DBG("Upload image %dx%dx%d row_len %x pitch %x\n", + width, height, depth, width * texelBytes, dstRowStride); + + /* Copy data. Would like to know when it's ok for us to eg. use + * the blitter to copy. Or, use the hardware to do the format + * conversion and copy: + */ + if (compressed) { + memcpy(texImage->Data, pixels, imageSize); + } + else { + GLuint srcImageStride = _mesa_image_image_stride(unpack, width, height, + format, type); + int i; + const GLubyte *src = (const GLubyte *) pixels; + + for (i = 0; i++ < depth;) { + if (!texImage->TexFormat->StoreImage(ctx, dims, + texImage->_BaseFormat, + texImage->TexFormat, + texImage->Data, + 0, 0, 0, /* dstX/Y/Zoffset */ + dstRowStride, + texImage->ImageOffsets, + width, height, 1, + format, type, src, unpack)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); + } + + if (stImage->pt && i < depth) { + st_texture_image_unmap(ctx->st, stImage); + texImage->Data = st_texture_image_map(ctx->st, stImage, i, + PIPE_BUFFER_USAGE_CPU_WRITE); + src += srcImageStride; + } + } + } + + _mesa_unmap_teximage_pbo(ctx, unpack); + + if (stImage->pt) { + st_texture_image_unmap(ctx->st, stImage); + texImage->Data = NULL; + } + + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + ctx->Driver.GenerateMipmap(ctx, target, texObj); + } +} + + +static void +st_TexImage3D(GLcontext * ctx, + GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint depth, + GLint border, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *unpack, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + st_TexImage(ctx, 3, target, level, + internalFormat, width, height, depth, border, + format, type, pixels, unpack, texObj, texImage, 0, 0); +} + + +static void +st_TexImage2D(GLcontext * ctx, + GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *unpack, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + st_TexImage(ctx, 2, target, level, + internalFormat, width, height, 1, border, + format, type, pixels, unpack, texObj, texImage, 0, 0); +} + + +static void +st_TexImage1D(GLcontext * ctx, + GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint border, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *unpack, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + st_TexImage(ctx, 1, target, level, + internalFormat, width, 1, 1, border, + format, type, pixels, unpack, texObj, texImage, 0, 0); +} + + +static void +st_CompressedTexImage2D(GLcontext *ctx, GLenum target, GLint level, + GLint internalFormat, + GLint width, GLint height, GLint border, + GLsizei imageSize, const GLvoid *data, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + st_TexImage(ctx, 2, target, level, + internalFormat, width, height, 1, border, + 0, 0, data, &ctx->Unpack, texObj, texImage, imageSize, 1); +} + + +/** + * Need to map texture image into memory before copying image data, + * then unmap it. + */ +static void +st_get_tex_image(GLcontext * ctx, GLenum target, GLint level, + GLenum format, GLenum type, GLvoid * pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage, int compressed) +{ + struct st_texture_image *stImage = st_texture_image(texImage); + GLuint dstImageStride = _mesa_image_image_stride(&ctx->Pack, + texImage->Width, + texImage->Height, + format, type); + GLuint depth; + GLuint i; + GLubyte *dest; + + /* Map */ + if (stImage->pt) { + /* Image is stored in hardware format in a buffer managed by the + * kernel. Need to explicitly map and unmap it. + */ + texImage->Data = st_texture_image_map(ctx->st, stImage, 0, + PIPE_BUFFER_USAGE_CPU_READ); + texImage->RowStride = stImage->surface->stride / stImage->pt->block.size; + } + else { + /* Otherwise, the image should actually be stored in + * texImage->Data. This is pretty confusing for + * everybody, I'd much prefer to separate the two functions of + * texImage->Data - storage for texture images in main memory + * and access (ie mappings) of images. In other words, we'd + * create a new texImage->Map field and leave Data simply for + * storage. + */ + assert(texImage->Data); + } + + depth = texImage->Depth; + texImage->Depth = 1; + + dest = (GLubyte *) pixels; + + for (i = 0; i++ < depth;) { + if (compressed) { + _mesa_get_compressed_teximage(ctx, target, level, dest, + texObj, texImage); + } else { + _mesa_get_teximage(ctx, target, level, format, type, dest, + texObj, texImage); + } + + if (stImage->pt && i < depth) { + st_texture_image_unmap(ctx->st, stImage); + texImage->Data = st_texture_image_map(ctx->st, stImage, i, + PIPE_BUFFER_USAGE_CPU_READ); + dest += dstImageStride; + } + } + + texImage->Depth = depth; + + /* Unmap */ + if (stImage->pt) { + st_texture_image_unmap(ctx->st, stImage); + texImage->Data = NULL; + } +} + + +static void +st_GetTexImage(GLcontext * ctx, GLenum target, GLint level, + GLenum format, GLenum type, GLvoid * pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + st_get_tex_image(ctx, target, level, format, type, pixels, + texObj, texImage, 0); +} + + +static void +st_GetCompressedTexImage(GLcontext *ctx, GLenum target, GLint level, + GLvoid *pixels, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + st_get_tex_image(ctx, target, level, 0, 0, pixels, + (struct gl_texture_object *) texObj, + (struct gl_texture_image *) texImage, 1); +} + + + +static void +st_TexSubimage(GLcontext * ctx, + GLint dims, + GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLint width, GLint height, GLint depth, + GLenum format, GLenum type, const void *pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + struct st_texture_image *stImage = st_texture_image(texImage); + GLuint dstRowStride; + GLuint srcImageStride = _mesa_image_image_stride(packing, width, height, + format, type); + int i; + const GLubyte *src; + + DBG("%s target %s level %d offset %d,%d %dx%d\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(target), + level, xoffset, yoffset, width, height); + + pixels = + _mesa_validate_pbo_teximage(ctx, dims, width, height, depth, format, + type, pixels, packing, "glTexSubImage2D"); + if (!pixels) + return; + + /* Map buffer if necessary. Need to lock to prevent other contexts + * from uploading the buffer under us. + */ + if (stImage->pt) { + texImage->Data = st_texture_image_map(ctx->st, stImage, zoffset, + PIPE_BUFFER_USAGE_CPU_WRITE); + if (stImage->surface) + dstRowStride = stImage->surface->stride; + } + + if (!texImage->Data) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage"); + return; + } + + src = (const GLubyte *) pixels; + + for (i = 0; i++ < depth;) { + if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, + texImage->TexFormat, + texImage->Data, + xoffset, yoffset, 0, + dstRowStride, + texImage->ImageOffsets, + width, height, 1, + format, type, src, packing)) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage"); + } + + if (stImage->pt && i < depth) { + /* map next slice of 3D texture */ + st_texture_image_unmap(ctx->st, stImage); + texImage->Data = st_texture_image_map(ctx->st, stImage, zoffset + i, + PIPE_BUFFER_USAGE_CPU_WRITE); + src += srcImageStride; + } + } + + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + ctx->Driver.GenerateMipmap(ctx, target, texObj); + } + + _mesa_unmap_teximage_pbo(ctx, packing); + + if (stImage->pt) { + st_texture_image_unmap(ctx->st, stImage); + texImage->Data = NULL; + } +} + + + +static void +st_TexSubImage3D(GLcontext * ctx, + GLenum target, + GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLsizei width, GLsizei height, GLsizei depth, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + st_TexSubimage(ctx, 3, target, level, + xoffset, yoffset, zoffset, + width, height, depth, + format, type, pixels, packing, texObj, texImage); +} + + +static void +st_TexSubImage2D(GLcontext * ctx, + GLenum target, + GLint level, + GLint xoffset, GLint yoffset, + GLsizei width, GLsizei height, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + st_TexSubimage(ctx, 2, target, level, + xoffset, yoffset, 0, + width, height, 1, + format, type, pixels, packing, texObj, texImage); +} + + +static void +st_TexSubImage1D(GLcontext * ctx, + GLenum target, + GLint level, + GLint xoffset, + GLsizei width, + GLenum format, GLenum type, + const GLvoid * pixels, + const struct gl_pixelstore_attrib *packing, + struct gl_texture_object *texObj, + struct gl_texture_image *texImage) +{ + st_TexSubimage(ctx, 1, target, level, + xoffset, 0, 0, + width, 1, 1, + format, type, pixels, packing, texObj, texImage); +} + + + +/** + * Return 0 for GL_TEXTURE_CUBE_MAP_POSITIVE_X, + * 1 for GL_TEXTURE_CUBE_MAP_NEGATIVE_X, + * etc. + * XXX duplicated from main/teximage.c + */ +static uint +texture_face(GLenum target) +{ + if (target >= GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB && + target <= GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB) + return (GLuint) target - (GLuint) GL_TEXTURE_CUBE_MAP_POSITIVE_X; + else + return 0; +} + + + +/** + * Do a CopyTexSubImage operation by mapping the source surface and + * dest surface and using get_tile()/put_tile() to access the pixels/texels. + * + * Note: srcY=0=TOP of renderbuffer + */ +static void +fallback_copy_texsubimage(GLcontext *ctx, + GLenum target, + GLint level, + struct st_renderbuffer *strb, + struct st_texture_image *stImage, + GLenum baseFormat, + GLint destX, GLint destY, GLint destZ, + GLint srcX, GLint srcY, + GLsizei width, GLsizei height) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; + const uint face = texture_face(target); + struct pipe_texture *pt = stImage->pt; + struct pipe_surface *src_surf, *dest_surf; + + /* We'd use strb->surface, here but it's created for GPU read/write only */ + src_surf = pipe->screen->get_tex_surface( pipe->screen, + strb->texture, + 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_READ); + + dest_surf = screen->get_tex_surface(screen, pt, face, level, destZ, + PIPE_BUFFER_USAGE_CPU_WRITE); + + assert(width <= MAX_WIDTH); + + if (baseFormat == GL_DEPTH_COMPONENT) { + const GLboolean scaleOrBias = (ctx->Pixel.DepthScale != 1.0F || + ctx->Pixel.DepthBias != 0.0F); + GLint row, yStep; + + /* determine bottom-to-top vs. top-to-bottom order for src buffer */ + if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { + srcY = strb->Base.Height - 1 - srcY; + yStep = -1; + } + else { + yStep = 1; + } + + /* To avoid a large temp memory allocation, do copy row by row */ + for (row = 0; row < height; row++, srcY += yStep, destY++) { + uint data[MAX_WIDTH]; + pipe_get_tile_z(src_surf, srcX, srcY, width, 1, data); + if (scaleOrBias) { + _mesa_scale_and_bias_depth_uint(ctx, width, data); + } + pipe_put_tile_z(dest_surf, destX, destY, width, 1, data); + } + } + else { + /* RGBA format */ + GLfloat *tempSrc = + (GLfloat *) _mesa_malloc(width * height * 4 * sizeof(GLfloat)); + GLvoid *texDest = + st_texture_image_map(ctx->st, stImage, 0,PIPE_BUFFER_USAGE_CPU_WRITE); + + if (tempSrc && texDest) { + const GLint dims = 2; + struct gl_texture_image *texImage = &stImage->base; + GLint dstRowStride = stImage->surface->stride; + struct gl_pixelstore_attrib unpack = ctx->DefaultPacking; + + if (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP) { + /* need to invert src */ + srcY = strb->Base.Height - srcY - height; + unpack.Invert = GL_TRUE; + } + + /* get float/RGBA image from framebuffer */ + /* XXX this usually involves a lot of int/float conversion. + * try to avoid that someday. + */ + pipe_get_tile_rgba(src_surf, srcX, srcY, width, height, tempSrc); + + /* Store into texture memory. + * Note that this does some special things such as pixel transfer + * ops and format conversion. In particular, if the dest tex format + * is actually RGBA but the user created the texture as GL_RGB we + * need to fill-in/override the alpha channel with 1.0. + */ + texImage->TexFormat->StoreImage(ctx, dims, + texImage->_BaseFormat, + texImage->TexFormat, + texDest, + destX, destY, destZ, + dstRowStride, + texImage->ImageOffsets, + width, height, 1, + GL_RGBA, GL_FLOAT, tempSrc, /* src */ + &unpack); + } + else { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexSubImage"); + } + + if (tempSrc) + _mesa_free(tempSrc); + if (texDest) + st_texture_image_unmap(ctx->st, stImage); + } + + screen->tex_surface_release(screen, &dest_surf); + screen->tex_surface_release(screen, &src_surf); +} + + +/** + * Do a CopyTex[Sub]Image1/2/3D() using a hardware (blit) path if possible. + * Note that the region to copy has already been clipped so we know we + * won't read from outside the source renderbuffer's bounds. + * + * Note: srcY=0=Bottom of renderbuffer (GL convention) + */ +static void +st_copy_texsubimage(GLcontext *ctx, + GLenum target, GLint level, + GLint destX, GLint destY, GLint destZ, + GLint srcX, GLint srcY, + GLsizei width, GLsizei height) +{ + struct gl_texture_unit *texUnit = + &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; + struct gl_texture_object *texObj = + _mesa_select_tex_object(ctx, texUnit, target); + struct gl_texture_image *texImage = + _mesa_select_tex_image(ctx, texObj, target, level); + struct st_texture_image *stImage = st_texture_image(texImage); + const GLenum texBaseFormat = texImage->InternalFormat; + struct gl_framebuffer *fb = ctx->ReadBuffer; + struct st_renderbuffer *strb; + struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; + enum pipe_format dest_format, src_format; + GLboolean use_fallback = GL_TRUE; + GLboolean matching_base_formats; + + /* any rendering in progress must complete before we grab the fb image */ + st_finish(ctx->st); + + /* determine if copying depth or color data */ + if (texBaseFormat == GL_DEPTH_COMPONENT) { + strb = st_renderbuffer(fb->_DepthBuffer); + } + else if (texBaseFormat == GL_DEPTH_STENCIL_EXT) { + strb = st_renderbuffer(fb->_StencilBuffer); + } + else { + /* texBaseFormat == GL_RGB, GL_RGBA, GL_ALPHA, etc */ + strb = st_renderbuffer(fb->_ColorReadBuffer); + } + + assert(strb); + assert(strb->surface); + assert(stImage->pt); + + src_format = strb->surface->format; + dest_format = stImage->pt->format; + + /* + * Determine if the src framebuffer and dest texture have the same + * base format. We need this to detect a case such as the framebuffer + * being GL_RGBA but the texture being GL_RGB. If the actual hardware + * texture format stores RGBA we need to set A=1 (overriding the + * framebuffer's alpha values). We can't do that with the blit or + * textured-quad paths. + */ + matching_base_formats = (strb->Base._BaseFormat == texImage->_BaseFormat); + + if (matching_base_formats && ctx->_ImageTransferState == 0x0) { + /* try potential hardware path */ + struct pipe_surface *dest_surface = NULL; + + if (src_format == dest_format) { + /* use surface_copy() / blit */ + boolean do_flip = (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP); + + dest_surface = screen->get_tex_surface(screen, stImage->pt, + stImage->face, stImage->level, + destZ, + PIPE_BUFFER_USAGE_GPU_WRITE); + if (do_flip) + srcY = strb->surface->height - srcY - height; + + /* for surface_copy(), y=0=top, always */ + pipe->surface_copy(pipe, + do_flip, + /* dest */ + dest_surface, + destX, destY, + /* src */ + strb->surface, + srcX, srcY, + /* size */ + width, height); + use_fallback = GL_FALSE; + } + else if (screen->is_format_supported(screen, src_format, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, + 0) && + screen->is_format_supported(screen, dest_format, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, + 0)) { + /* draw textured quad to do the copy */ + boolean do_flip = (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP); + int srcY0, srcY1; + + dest_surface = screen->get_tex_surface(screen, stImage->pt, + stImage->face, stImage->level, + destZ, + PIPE_BUFFER_USAGE_GPU_WRITE); + + if (do_flip) { + srcY1 = strb->Base.Height - srcY - height; + srcY0 = srcY1 + height; + } + else { + srcY0 = srcY; + srcY1 = srcY0 + height; + } + util_blit_pixels(ctx->st->blit, + strb->surface, + srcX, srcY0, + srcX + width, srcY1, + dest_surface, + destX, destY, + destX + width, destY + height, + 0.0, PIPE_TEX_MIPFILTER_NEAREST); + use_fallback = GL_FALSE; + } + + if (dest_surface) + pipe_surface_reference(&dest_surface, NULL); + } + + if (use_fallback) { + /* software fallback */ + fallback_copy_texsubimage(ctx, target, level, + strb, stImage, texBaseFormat, + destX, destY, destZ, + srcX, srcY, width, height); + } + + if (level == texObj->BaseLevel && texObj->GenerateMipmap) { + ctx->Driver.GenerateMipmap(ctx, target, texObj); + } +} + + + +static void +st_CopyTexImage1D(GLcontext * ctx, GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLint border) +{ + struct gl_texture_unit *texUnit = + &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; + struct gl_texture_object *texObj = + _mesa_select_tex_object(ctx, texUnit, target); + struct gl_texture_image *texImage = + _mesa_select_tex_image(ctx, texObj, target, level); + +#if 0 + if (border) + goto fail; +#endif + + /* Setup or redefine the texture object, texture and texture + * image. Don't populate yet. + */ + ctx->Driver.TexImage1D(ctx, target, level, internalFormat, + width, border, + GL_RGBA, CHAN_TYPE, NULL, + &ctx->DefaultPacking, texObj, texImage); + + st_copy_texsubimage(ctx, target, level, + 0, 0, 0, /* destX,Y,Z */ + x, y, width, 1); /* src X, Y, size */ +} + + +static void +st_CopyTexImage2D(GLcontext * ctx, GLenum target, GLint level, + GLenum internalFormat, + GLint x, GLint y, GLsizei width, GLsizei height, + GLint border) +{ + struct gl_texture_unit *texUnit = + &ctx->Texture.Unit[ctx->Texture.CurrentUnit]; + struct gl_texture_object *texObj = + _mesa_select_tex_object(ctx, texUnit, target); + struct gl_texture_image *texImage = + _mesa_select_tex_image(ctx, texObj, target, level); + + /* Setup or redefine the texture object, texture and texture + * image. Don't populate yet. + */ + ctx->Driver.TexImage2D(ctx, target, level, internalFormat, + width, height, border, + GL_RGBA, CHAN_TYPE, NULL, + &ctx->DefaultPacking, texObj, texImage); + + st_copy_texsubimage(ctx, target, level, + 0, 0, 0, /* destX,Y,Z */ + x, y, width, height); /* src X, Y, size */ +} + + +static void +st_CopyTexSubImage1D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint x, GLint y, GLsizei width) +{ + const GLint yoffset = 0, zoffset = 0; + const GLsizei height = 1; + st_copy_texsubimage(ctx, target, level, + xoffset, yoffset, zoffset, /* destX,Y,Z */ + x, y, width, height); /* src X, Y, size */ +} + + +static void +st_CopyTexSubImage2D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, + GLint x, GLint y, GLsizei width, GLsizei height) +{ + const GLint zoffset = 0; + st_copy_texsubimage(ctx, target, level, + xoffset, yoffset, zoffset, /* destX,Y,Z */ + x, y, width, height); /* src X, Y, size */ +} + + +static void +st_CopyTexSubImage3D(GLcontext * ctx, GLenum target, GLint level, + GLint xoffset, GLint yoffset, GLint zoffset, + GLint x, GLint y, GLsizei width, GLsizei height) +{ + st_copy_texsubimage(ctx, target, level, + xoffset, yoffset, zoffset, /* destX,Y,Z */ + x, y, width, height); /* src X, Y, size */ +} + + +/** + * Compute which mipmap levels that really need to be sent to the hardware. + * This depends on the base image size, GL_TEXTURE_MIN_LOD, + * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL. + */ +static void +calculate_first_last_level(struct st_texture_object *stObj) +{ + struct gl_texture_object *tObj = &stObj->base; + + /* These must be signed values. MinLod and MaxLod can be negative numbers, + * and having firstLevel and lastLevel as signed prevents the need for + * extra sign checks. + */ + int firstLevel; + int lastLevel; + + /* Yes, this looks overly complicated, but it's all needed. + */ + switch (tObj->Target) { + case GL_TEXTURE_1D: + case GL_TEXTURE_2D: + case GL_TEXTURE_3D: + case GL_TEXTURE_CUBE_MAP: + if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { + /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL. + */ + firstLevel = lastLevel = tObj->BaseLevel; + } + else { + firstLevel = 0; + lastLevel = MIN2(tObj->MaxLevel, + (int) tObj->Image[0][tObj->BaseLevel]->WidthLog2); + } + break; + case GL_TEXTURE_RECTANGLE_NV: + case GL_TEXTURE_4D_SGIS: + firstLevel = lastLevel = 0; + break; + default: + return; + } + + stObj->lastLevel = lastLevel; +} + + +static void +copy_image_data_to_texture(struct st_context *st, + struct st_texture_object *stObj, + GLuint dstLevel, + struct st_texture_image *stImage) +{ + if (stImage->pt) { + /* Copy potentially with the blitter: + */ + st_texture_image_copy(st->pipe, + stObj->pt, dstLevel, /* dest texture, level */ + stImage->pt, /* src texture */ + stImage->face + ); + + pipe_texture_reference(&stImage->pt, NULL); + } + else if (stImage->base.Data) { + assert(stImage->base.Data != NULL); + + /* More straightforward upload. + */ + st_texture_image_data(st->pipe, + stObj->pt, + stImage->face, + dstLevel, + stImage->base.Data, + stImage->base.RowStride * + stObj->pt->block.size, + stImage->base.RowStride * + stImage->base.Height * + stObj->pt->block.size); + _mesa_align_free(stImage->base.Data); + stImage->base.Data = NULL; + } + + pipe_texture_reference(&stImage->pt, stObj->pt); +} + + +/** + * Called during state validation. When this function is finished, + * the texture object should be ready for rendering. + * \return GL_TRUE for success, GL_FALSE for failure (out of mem) + */ +GLboolean +st_finalize_texture(GLcontext *ctx, + struct pipe_context *pipe, + struct gl_texture_object *tObj, + GLboolean *needFlush) +{ + struct st_texture_object *stObj = st_texture_object(tObj); + const GLuint nr_faces = (stObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; + int comp_byte = 0; + int cpp; + GLuint face; + struct st_texture_image *firstImage; + + *needFlush = GL_FALSE; + + /* We know/require this is true by now: + */ + assert(stObj->base._Complete); + + /* What levels must the texture include at a minimum? + */ + calculate_first_last_level(stObj); + firstImage = st_texture_image(stObj->base.Image[0][stObj->base.BaseLevel]); + + /* If both firstImage and stObj point to a texture which can contain + * all active images, favour firstImage. Note that because of the + * completeness requirement, we know that the image dimensions + * will match. + */ + if (firstImage->pt && + firstImage->pt != stObj->pt && + firstImage->pt->last_level >= stObj->lastLevel) { + + pipe_texture_reference(&stObj->pt, firstImage->pt); + } + + /* FIXME: determine format block instead of cpp */ + if (firstImage->base.IsCompressed) { + comp_byte = compressed_num_bytes(firstImage->base.TexFormat->MesaFormat); + cpp = comp_byte; + } + else { + cpp = firstImage->base.TexFormat->TexelBytes; + } + + /* If we already have a gallium texture, check that it matches the texture + * object's format, target, size, num_levels, etc. + */ + if (stObj->pt) { + const enum pipe_format fmt = + st_mesa_format_to_pipe_format(firstImage->base.TexFormat->MesaFormat); + if (stObj->pt->target != gl_target_to_pipe(stObj->base.Target) || + stObj->pt->format != fmt || + stObj->pt->last_level < stObj->lastLevel || + stObj->pt->width[0] != firstImage->base.Width2 || + stObj->pt->height[0] != firstImage->base.Height2 || + stObj->pt->depth[0] != firstImage->base.Depth2 || + stObj->pt->block.size != cpp || + stObj->pt->block.width != 1 || + stObj->pt->block.height != 1 || + stObj->pt->compressed != firstImage->base.IsCompressed) { + pipe_texture_release(&stObj->pt); + ctx->st->dirty.st |= ST_NEW_FRAMEBUFFER; + } + } + + /* May need to create a new gallium texture: + */ + if (!stObj->pt) { + const enum pipe_format fmt = + st_mesa_format_to_pipe_format(firstImage->base.TexFormat->MesaFormat); + stObj->pt = st_texture_create(ctx->st, + gl_target_to_pipe(stObj->base.Target), + fmt, + stObj->lastLevel, + firstImage->base.Width2, + firstImage->base.Height2, + firstImage->base.Depth2, + comp_byte, + ( (pf_is_depth_stencil(fmt) ? + PIPE_TEXTURE_USAGE_DEPTH_STENCIL : + PIPE_TEXTURE_USAGE_RENDER_TARGET) | + PIPE_TEXTURE_USAGE_SAMPLER )); + + if (!stObj->pt) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glTexImage"); + return GL_FALSE; + } + } + + /* Pull in any images not in the object's texture: + */ + for (face = 0; face < nr_faces; face++) { + GLuint level; + for (level = 0; level <= stObj->lastLevel; level++) { + struct st_texture_image *stImage = + st_texture_image(stObj->base.Image[face][stObj->base.BaseLevel + level]); + + /* Need to import images in main memory or held in other textures. + */ + if (stImage && stObj->pt != stImage->pt) { + copy_image_data_to_texture(ctx->st, stObj, level, stImage); + *needFlush = GL_TRUE; + } + } + } + + return GL_TRUE; +} + + +/** + * Returns pointer to a default/dummy texture. + * This is typically used when the current shader has tex/sample instructions + * but the user has not provided a (any) texture(s). + */ +struct gl_texture_object * +st_get_default_texture(struct st_context *st) +{ + if (!st->default_texture) { + static const GLenum target = GL_TEXTURE_2D; + GLubyte pixels[16][16][4]; + struct gl_texture_object *texObj; + struct gl_texture_image *texImg; + + /* init image to gray */ + memset(pixels, 127, sizeof(pixels)); + + texObj = st->ctx->Driver.NewTextureObject(st->ctx, 0, target); + + texImg = _mesa_get_tex_image(st->ctx, texObj, target, 0); + + _mesa_init_teximage_fields(st->ctx, target, texImg, + 16, 16, 1, 0, /* w, h, d, border */ + GL_RGBA); + + st_TexImage(st->ctx, 2, target, + 0, GL_RGBA, /* level, intformat */ + 16, 16, 1, 0, /* w, h, d, border */ + GL_RGBA, GL_UNSIGNED_BYTE, pixels, + &st->ctx->DefaultPacking, + texObj, texImg, + 0, 0); + + texObj->MinFilter = GL_NEAREST; + texObj->MagFilter = GL_NEAREST; + texObj->_Complete = GL_TRUE; + + st->default_texture = texObj; + } + return st->default_texture; +} + + +void +st_init_texture_functions(struct dd_function_table *functions) +{ + functions->ChooseTextureFormat = st_ChooseTextureFormat; + functions->TexImage1D = st_TexImage1D; + functions->TexImage2D = st_TexImage2D; + functions->TexImage3D = st_TexImage3D; + functions->TexSubImage1D = st_TexSubImage1D; + functions->TexSubImage2D = st_TexSubImage2D; + functions->TexSubImage3D = st_TexSubImage3D; + functions->CopyTexImage1D = st_CopyTexImage1D; + functions->CopyTexImage2D = st_CopyTexImage2D; + functions->CopyTexSubImage1D = st_CopyTexSubImage1D; + functions->CopyTexSubImage2D = st_CopyTexSubImage2D; + functions->CopyTexSubImage3D = st_CopyTexSubImage3D; + functions->GenerateMipmap = st_generate_mipmap; + + functions->GetTexImage = st_GetTexImage; + + /* compressed texture functions */ + functions->CompressedTexImage2D = st_CompressedTexImage2D; + functions->GetCompressedTexImage = st_GetCompressedTexImage; + functions->CompressedTextureSize = _mesa_compressed_texture_size; + + functions->NewTextureObject = st_NewTextureObject; + functions->NewTextureImage = st_NewTextureImage; + functions->DeleteTexture = st_DeleteTextureObject; + functions->FreeTexImageData = st_FreeTextureImageData; + functions->UpdateTexturePalette = 0; + + functions->TextureMemCpy = do_memcpy; + + /* XXX Temporary until we can query pipe's texture sizes */ + functions->TestProxyTexImage = _mesa_test_proxy_teximage; +} diff --git a/src/mesa/state_tracker/st_cb_texture.h b/src/mesa/state_tracker/st_cb_texture.h new file mode 100644 index 0000000000..f1fe0339cd --- /dev/null +++ b/src/mesa/state_tracker/st_cb_texture.h @@ -0,0 +1,48 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_CB_TEXTURE_H +#define ST_CB_TEXTURE_H + + +extern GLboolean +st_finalize_texture(GLcontext *ctx, + struct pipe_context *pipe, + struct gl_texture_object *tObj, + GLboolean *needFlush); + + +extern struct gl_texture_object * +st_get_default_texture(struct st_context *st); + + +extern void +st_init_texture_functions(struct dd_function_table *functions); + + +#endif /* ST_CB_TEXTURE_H */ diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c new file mode 100644 index 0000000000..cca808d328 --- /dev/null +++ b/src/mesa/state_tracker/st_context.c @@ -0,0 +1,326 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/imports.h" +#include "main/context.h" +#include "main/extensions.h" +#include "main/matrix.h" +#include "main/buffers.h" +#include "main/scissor.h" +#include "vbo/vbo.h" +#include "shader/shader_api.h" +#include "glapi/glapi.h" +#include "st_public.h" +#include "st_context.h" +#include "st_cb_accum.h" +#include "st_cb_bitmap.h" +#include "st_cb_blit.h" +#include "st_cb_bufferobjects.h" +#include "st_cb_clear.h" +#if FEATURE_drawpix +#include "st_cb_drawpixels.h" +#include "st_cb_rasterpos.h" +#endif +#ifdef FEATURE_OES_draw_texture +#include "st_cb_drawtex.h" +#endif +#include "st_cb_fbo.h" +#include "st_cb_get.h" +#if FEATURE_feedback +#include "st_cb_feedback.h" +#endif +#include "st_cb_program.h" +#include "st_cb_queryobj.h" +#include "st_cb_readpixels.h" +#include "st_cb_texture.h" +#include "st_cb_flush.h" +#include "st_cb_strings.h" +#include "st_atom.h" +#include "st_draw.h" +#include "st_extensions.h" +#include "st_gen_mipmap.h" +#include "st_program.h" +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" +#include "draw/draw_context.h" +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_context.h" + + +/** + * Called via ctx->Driver.UpdateState() + */ +void st_invalidate_state(GLcontext * ctx, GLuint new_state) +{ + struct st_context *st = st_context(ctx); + + st->dirty.mesa |= new_state; + st->dirty.st |= ST_NEW_MESA; + + /* This is the only core Mesa module we depend upon. + * No longer use swrast, swsetup, tnl. + */ + _vbo_InvalidateState(ctx, new_state); +} + + +/** + * Check for multisample env var override. + */ +int +st_get_msaa(void) +{ + const char *msaa = _mesa_getenv("__GL_FSAA_MODE"); + if (msaa) + return atoi(msaa); + return 0; +} + + +static struct st_context * +st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe ) +{ + uint i; + struct st_context *st = CALLOC_STRUCT( st_context ); + + ctx->st = st; + + st->ctx = ctx; + st->pipe = pipe; + + /* state tracker needs the VBO module */ + _vbo_CreateContext(ctx); + +#if FEATURE_feedback || FEATURE_drawpix + st->draw = draw_create(); /* for selection/feedback */ + + /* Disable draw options that might convert points/lines to tris, etc. + * as that would foul-up feedback/selection mode. + */ + draw_wide_line_threshold(st->draw, 1000.0f); + draw_wide_point_threshold(st->draw, 1000.0f); + draw_enable_line_stipple(st->draw, FALSE); + draw_enable_point_sprites(st->draw, FALSE); +#endif + + st->dirty.mesa = ~0; + st->dirty.st = ~0; + + st->cso_context = cso_create_context(pipe); + + st_init_atoms( st ); + st_init_bitmap(st); + st_init_clear(st); + st_init_draw( st ); + st_init_generate_mipmap(st); + st_init_blit(st); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + st->state.sampler_list[i] = &st->state.samplers[i]; + + /* we want all vertex data to be placed in buffer objects */ + vbo_use_buffer_objects(ctx); + + /* Need these flags: + */ + st->ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; + st->ctx->FragmentProgram._UseTexEnvProgram = GL_TRUE; + + st->ctx->VertexProgram._MaintainTnlProgram = GL_TRUE; + + st->pixel_xfer.cache = _mesa_new_program_cache(); + + st->force_msaa = st_get_msaa(); + + /* GL limits and extensions */ + st_init_limits(st); + st_init_extensions(st); + + return st; +} + + +struct st_context *st_create_context(struct pipe_context *pipe, + const __GLcontextModes *visual, + struct st_context *share) +{ + GLcontext *ctx; + GLcontext *shareCtx = share ? share->ctx : NULL; + struct dd_function_table funcs; + + memset(&funcs, 0, sizeof(funcs)); + st_init_driver_functions(&funcs); + + ctx = _mesa_create_context(visual, shareCtx, &funcs, NULL); + + return st_create_context_priv(ctx, pipe); +} + + +static void st_destroy_context_priv( struct st_context *st ) +{ + uint i; + +#if FEATURE_feedback || FEATURE_drawpix + draw_destroy(st->draw); +#endif + st_destroy_atoms( st ); + st_destroy_draw( st ); + st_destroy_generate_mipmap(st); +#if FEATURE_EXT_framebuffer_blit + st_destroy_blit(st); +#endif + st_destroy_clear(st); +#if FEATURE_drawpix + st_destroy_bitmap(st); + st_destroy_drawpix(st); +#endif +#ifdef FEATURE_OES_draw_texture + st_destroy_drawtex(st); +#endif + + for (i = 0; i < Elements(st->state.sampler_texture); i++) { + pipe_texture_reference(&st->state.sampler_texture[i], NULL); + } + + for (i = 0; i < Elements(st->state.constants); i++) { + if (st->state.constants[i].buffer) { + pipe_buffer_reference(st->pipe->screen, &st->state.constants[i].buffer, NULL); + } + } + + if (st->default_texture) { + st->ctx->Driver.DeleteTexture(st->ctx, st->default_texture); + st->default_texture = NULL; + } + + free( st ); +} + + +void st_destroy_context( struct st_context *st ) +{ + struct pipe_context *pipe = st->pipe; + struct cso_context *cso = st->cso_context; + GLcontext *ctx = st->ctx; + + /* need to unbind and destroy CSO objects before anything else */ + cso_release_all(st->cso_context); + + st_reference_fragprog(st, &st->fp, NULL); + st_reference_vertprog(st, &st->vp, NULL); + + _mesa_delete_program_cache(st->ctx, st->pixel_xfer.cache); + + _vbo_DestroyContext(st->ctx); + + _mesa_free_context_data(ctx); + + st_destroy_context_priv(st); + + cso_destroy_context(cso); + + pipe->destroy( pipe ); + + free(ctx); +} + + +void st_make_current(struct st_context *st, + struct st_framebuffer *draw, + struct st_framebuffer *read) +{ + if (st) { + GLboolean firstTime = st->ctx->FirstTimeCurrent; + _mesa_make_current(st->ctx, &draw->Base, &read->Base); + /* Need to initialize viewport here since draw->Base->Width/Height + * will still be zero at this point. + * This could be improved, but would require rather extensive work + * elsewhere (allocate rb surface storage sooner) + */ + if (firstTime) { + GLuint w = draw->InitWidth, h = draw->InitHeight; + _mesa_set_viewport(st->ctx, 0, 0, w, h); + _mesa_set_scissor(st->ctx, 0, 0, w, h); + + } + } + else { + _mesa_make_current(NULL, NULL, NULL); + } +} + + +void st_copy_context_state(struct st_context *dst, + struct st_context *src, + uint mask) +{ + _mesa_copy_context(dst->ctx, src->ctx, mask); +} + + + +st_proc st_get_proc_address(const char *procname) +{ + return (st_proc) _glapi_get_proc_address(procname); +} + + + +void st_init_driver_functions(struct dd_function_table *functions) +{ + _mesa_init_glsl_driver_functions(functions); + +#if FEATURE_accum + st_init_accum_functions(functions); +#endif +#if FEATURE_EXT_framebuffer_blit + st_init_blit_functions(functions); +#endif + st_init_bufferobject_functions(functions); + st_init_clear_functions(functions); +#if FEATURE_drawpix + st_init_bitmap_functions(functions); + st_init_drawpixels_functions(functions); + st_init_rasterpos_functions(functions); +#endif + st_init_fbo_functions(functions); + st_init_get_functions(functions); +#if FEATURE_feedback + st_init_feedback_functions(functions); +#endif + st_init_program_functions(functions); +#if FEATURE_ARB_occlusion_query + st_init_query_functions(functions); +#endif + st_init_readpixels_functions(functions); + st_init_texture_functions(functions); + st_init_flush_functions(functions); + st_init_string_functions(functions); + + functions->UpdateState = st_invalidate_state; +} diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h new file mode 100644 index 0000000000..1d1aca3111 --- /dev/null +++ b/src/mesa/state_tracker/st_context.h @@ -0,0 +1,247 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef ST_CONTEXT_H +#define ST_CONTEXT_H + +#include "main/mtypes.h" +#include "shader/prog_cache.h" +#include "pipe/p_state.h" + + +struct st_context; +struct st_texture_object; +struct st_fragment_program; +struct draw_context; +struct draw_stage; +struct cso_cache; +struct cso_blend; +struct gen_mipmap_state; +struct blit_state; +struct bitmap_cache; + + +#define FRONT_STATUS_UNDEFINED 0 +#define FRONT_STATUS_DIRTY 1 +#define FRONT_STATUS_COPY_OF_BACK 2 + + +#define ST_NEW_MESA 0x1 /* Mesa state has changed */ +#define ST_NEW_FRAGMENT_PROGRAM 0x2 +#define ST_NEW_VERTEX_PROGRAM 0x4 +#define ST_NEW_FRAMEBUFFER 0x8 + + +struct st_state_flags { + GLuint mesa; + GLuint st; +}; + +struct st_tracked_state { + const char *name; + struct st_state_flags dirty; + void (*update)( struct st_context *st ); +}; + + + +struct st_context +{ + GLcontext *ctx; + + struct pipe_context *pipe; + + struct draw_context *draw; /**< For selection/feedback/rastpos only */ + struct draw_stage *feedback_stage; /**< For GL_FEEDBACK rendermode */ + struct draw_stage *selection_stage; /**< For GL_SELECT rendermode */ + struct draw_stage *rastpos_stage; /**< For glRasterPos */ + + /* Some state is contained in constant objects. + * Other state is just parameter values. + */ + struct { + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depth_stencil; + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state samplers[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *sampler_list[PIPE_MAX_SAMPLERS]; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[2]; + struct pipe_framebuffer_state framebuffer; + struct pipe_texture *sampler_texture[PIPE_MAX_SAMPLERS]; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + + GLuint num_samplers; + GLuint num_textures; + } state; + + struct { + struct st_tracked_state tracked_state[2]; + } constants; + + /* XXX unused: */ + struct { + struct gl_fragment_program *fragment_program; + } cb; + + GLuint frontbuffer_status; /**< one of FRONT_STATUS_ */ + + char vendor[100]; + char renderer[100]; + + /* State to be validated: + */ + struct st_tracked_state **atoms; + GLuint nr_atoms; + + struct st_state_flags dirty; + + GLboolean missing_textures; + + GLfloat polygon_offset_scale; /* ?? */ + + /** Mapping from VERT_RESULT_x to post-transformed vertex slot */ + const GLuint *vertex_result_to_slot; + + struct st_vertex_program *vp; /**< Currently bound vertex program */ + struct st_fragment_program *fp; /**< Currently bound fragment program */ + + struct gl_texture_object *default_texture; + + struct { + struct gl_program_cache *cache; + struct st_fragment_program *program; /**< cur pixel transfer prog */ + GLuint xfer_prog_sn; /**< pixel xfer program serial no. */ + GLuint user_prog_sn; /**< user fragment program serial no. */ + struct st_fragment_program *combined_prog; + GLuint combined_prog_sn; + struct pipe_texture *pixelmap_texture; + boolean pixelmap_enabled; /**< use the pixelmap texture? */ + } pixel_xfer; + + /** for glBitmap */ + struct { + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state sampler; + struct pipe_shader_state vert_shader; + enum pipe_format tex_format; + void *vs; + float vertices[4][3][4]; /**< vertex pos + color + texcoord */ + struct pipe_buffer *vbuf; + struct bitmap_cache *cache; + } bitmap; + + /** for glDraw/CopyPixels */ + struct { + struct st_fragment_program *z_shader; + struct st_vertex_program *vert_shaders[2]; + } drawpix; + + /** for glClear */ + struct { + struct pipe_shader_state vert_shader; + struct pipe_shader_state frag_shader; + struct pipe_rasterizer_state raster; + struct pipe_viewport_state viewport; + void *vs; + void *fs; + float vertices[4][2][4]; /**< vertex pos + color */ + struct pipe_buffer *vbuf; + } clear; + + void *passthrough_fs; /**< simple pass-through frag shader */ + + struct gen_mipmap_state *gen_mipmap; + struct blit_state *blit; + + struct cso_context *cso_context; + + int force_msaa; +}; + + +/* Need this so that we can implement Mesa callbacks in this module. + */ +static INLINE struct st_context *st_context(GLcontext *ctx) +{ + return ctx->st; +} + + +/** + * Wrapper for GLframebuffer. + * This is an opaque type to the outside world. + */ +struct st_framebuffer +{ + GLframebuffer Base; + void *Private; + GLuint InitWidth, InitHeight; +}; + + +extern void st_init_driver_functions(struct dd_function_table *functions); + +void st_invalidate_state(GLcontext * ctx, GLuint new_state); + + + +#define Y_0_TOP 1 +#define Y_0_BOTTOM 2 + +static INLINE GLuint +st_fb_orientation(const struct gl_framebuffer *fb) +{ + if (fb && fb->Name == 0) { + /* Drawing into a window (on-screen buffer). + * + * Negate Y scale to flip image vertically. + * The NDC Y coords prior to viewport transformation are in the range + * [y=-1=bottom, y=1=top] + * Hardware window coords are in the range [y=0=top, y=H-1=bottom] where + * H is the window height. + * Use the viewport transformation to invert Y. + */ + return Y_0_TOP; + } + else { + /* Drawing into user-created FBO (very likely a texture). + * + * For textures, T=0=Bottom, so by extension Y=0=Bottom for rendering. + */ + return Y_0_BOTTOM; + } +} + + +extern int +st_get_msaa(void); + + +#endif diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c new file mode 100644 index 0000000000..c7d26ce33c --- /dev/null +++ b/src/mesa/state_tracker/st_debug.c @@ -0,0 +1,70 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/context.h" +#include "shader/prog_print.h" + +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_dump.h" + +#include "cso_cache/cso_cache.h" + +#include "st_context.h" +#include "st_debug.h" +#include "st_program.h" + + + +/** + * Print current state. May be called from inside gdb to see currently + * bound vertex/fragment shaders and associated constants. + */ +void +st_print_current(void) +{ + GET_CURRENT_CONTEXT(ctx); + struct st_context *st = ctx->st; + +#if 0 + int i; + + printf("Vertex Transform Inputs:\n"); + for (i = 0; i < st->vp->state.num_inputs; i++) { + printf(" Slot %d: VERT_ATTRIB_%d\n", i, st->vp->index_to_input[i]); + } +#endif + + tgsi_dump( st->vp->state.tokens, 0 ); + if (st->vp->Base.Base.Parameters) + _mesa_print_parameter_list(st->vp->Base.Base.Parameters); + + tgsi_dump( st->fp->state.tokens, 0 ); + if (st->fp->Base.Base.Parameters) + _mesa_print_parameter_list(st->fp->Base.Base.Parameters); +} diff --git a/src/mesa/state_tracker/st_debug.h b/src/mesa/state_tracker/st_debug.h new file mode 100644 index 0000000000..49d752e1b2 --- /dev/null +++ b/src/mesa/state_tracker/st_debug.h @@ -0,0 +1,36 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_DEBUG_H +#define ST_DEBUG_H + +extern void +st_print_current(void); + + +#endif /* ST_DEBUG_H */ diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c new file mode 100644 index 0000000000..61949a9388 --- /dev/null +++ b/src/mesa/state_tracker/st_draw.c @@ -0,0 +1,875 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#include "main/imports.h" +#include "main/image.h" +#include "main/macros.h" + +#include "vbo/vbo.h" + +#include "st_context.h" +#include "st_atom.h" +#include "st_cb_bufferobjects.h" +#include "st_draw.h" +#include "st_program.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "draw/draw_private.h" +#include "draw/draw_context.h" + + +static GLuint double_types[4] = { + PIPE_FORMAT_R64_FLOAT, + PIPE_FORMAT_R64G64_FLOAT, + PIPE_FORMAT_R64G64B64_FLOAT, + PIPE_FORMAT_R64G64B64A64_FLOAT +}; + +static GLuint float_types[4] = { + PIPE_FORMAT_R32_FLOAT, + PIPE_FORMAT_R32G32_FLOAT, + PIPE_FORMAT_R32G32B32_FLOAT, + PIPE_FORMAT_R32G32B32A32_FLOAT +}; + +static GLuint uint_types_norm[4] = { + PIPE_FORMAT_R32_UNORM, + PIPE_FORMAT_R32G32_UNORM, + PIPE_FORMAT_R32G32B32_UNORM, + PIPE_FORMAT_R32G32B32A32_UNORM +}; + +static GLuint uint_types_scale[4] = { + PIPE_FORMAT_R32_USCALED, + PIPE_FORMAT_R32G32_USCALED, + PIPE_FORMAT_R32G32B32_USCALED, + PIPE_FORMAT_R32G32B32A32_USCALED +}; + +static GLuint int_types_norm[4] = { + PIPE_FORMAT_R32_SNORM, + PIPE_FORMAT_R32G32_SNORM, + PIPE_FORMAT_R32G32B32_SNORM, + PIPE_FORMAT_R32G32B32A32_SNORM +}; + +static GLuint int_types_scale[4] = { + PIPE_FORMAT_R32_SSCALED, + PIPE_FORMAT_R32G32_SSCALED, + PIPE_FORMAT_R32G32B32_SSCALED, + PIPE_FORMAT_R32G32B32A32_SSCALED +}; + +static GLuint ushort_types_norm[4] = { + PIPE_FORMAT_R16_UNORM, + PIPE_FORMAT_R16G16_UNORM, + PIPE_FORMAT_R16G16B16_UNORM, + PIPE_FORMAT_R16G16B16A16_UNORM +}; + +static GLuint ushort_types_scale[4] = { + PIPE_FORMAT_R16_USCALED, + PIPE_FORMAT_R16G16_USCALED, + PIPE_FORMAT_R16G16B16_USCALED, + PIPE_FORMAT_R16G16B16A16_USCALED +}; + +static GLuint short_types_norm[4] = { + PIPE_FORMAT_R16_SNORM, + PIPE_FORMAT_R16G16_SNORM, + PIPE_FORMAT_R16G16B16_SNORM, + PIPE_FORMAT_R16G16B16A16_SNORM +}; + +static GLuint short_types_scale[4] = { + PIPE_FORMAT_R16_SSCALED, + PIPE_FORMAT_R16G16_SSCALED, + PIPE_FORMAT_R16G16B16_SSCALED, + PIPE_FORMAT_R16G16B16A16_SSCALED +}; + +static GLuint ubyte_types_norm[4] = { + PIPE_FORMAT_R8_UNORM, + PIPE_FORMAT_R8G8_UNORM, + PIPE_FORMAT_R8G8B8_UNORM, + PIPE_FORMAT_R8G8B8A8_UNORM +}; + +static GLuint ubyte_types_scale[4] = { + PIPE_FORMAT_R8_USCALED, + PIPE_FORMAT_R8G8_USCALED, + PIPE_FORMAT_R8G8B8_USCALED, + PIPE_FORMAT_R8G8B8A8_USCALED +}; + +static GLuint byte_types_norm[4] = { + PIPE_FORMAT_R8_SNORM, + PIPE_FORMAT_R8G8_SNORM, + PIPE_FORMAT_R8G8B8_SNORM, + PIPE_FORMAT_R8G8B8A8_SNORM +}; + +static GLuint byte_types_scale[4] = { + PIPE_FORMAT_R8_SSCALED, + PIPE_FORMAT_R8G8_SSCALED, + PIPE_FORMAT_R8G8B8_SSCALED, + PIPE_FORMAT_R8G8B8A8_SSCALED +}; + +static GLuint fixed_types[4] = { + PIPE_FORMAT_R32_FIXED, + PIPE_FORMAT_R32G32_FIXED, + PIPE_FORMAT_R32G32B32_FIXED, + PIPE_FORMAT_R32G32B32A32_FIXED +}; + + + +/** + * Return a PIPE_FORMAT_x for the given GL datatype and size. + */ +static GLuint +pipe_vertex_format(GLenum type, GLuint size, GLboolean normalized) +{ + assert((type >= GL_BYTE && type <= GL_DOUBLE) || + type == GL_FIXED); + assert(size >= 1); + assert(size <= 4); + + if (normalized) { + switch (type) { + case GL_DOUBLE: return double_types[size-1]; + case GL_FLOAT: return float_types[size-1]; + case GL_INT: return int_types_norm[size-1]; + case GL_SHORT: return short_types_norm[size-1]; + case GL_BYTE: return byte_types_norm[size-1]; + case GL_UNSIGNED_INT: return uint_types_norm[size-1]; + case GL_UNSIGNED_SHORT: return ushort_types_norm[size-1]; + case GL_UNSIGNED_BYTE: return ubyte_types_norm[size-1]; + case GL_FIXED: return fixed_types[size-1]; + default: assert(0); return 0; + } + } + else { + switch (type) { + case GL_DOUBLE: return double_types[size-1]; + case GL_FLOAT: return float_types[size-1]; + case GL_INT: return int_types_scale[size-1]; + case GL_SHORT: return short_types_scale[size-1]; + case GL_BYTE: return byte_types_scale[size-1]; + case GL_UNSIGNED_INT: return uint_types_scale[size-1]; + case GL_UNSIGNED_SHORT: return ushort_types_scale[size-1]; + case GL_UNSIGNED_BYTE: return ubyte_types_scale[size-1]; + case GL_FIXED: return fixed_types[size-1]; + default: assert(0); return 0; + } + } + return 0; /* silence compiler warning */ +} + + +/* + * If edge flags are needed, setup an bitvector of flags and call + * pipe->set_edgeflags(). + * XXX memleak: need to free the returned pointer at some point + */ +static void * +setup_edgeflags(GLcontext *ctx, GLenum primMode, GLint start, GLint count, + const struct gl_client_array *array) +{ + struct pipe_context *pipe = ctx->st->pipe; + + if ((primMode == GL_TRIANGLES || + primMode == GL_QUADS || + primMode == GL_POLYGON) && + (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL)) { + /* need edge flags */ + GLint i; + unsigned *vec; + struct st_buffer_object *stobj = st_buffer_object(array->BufferObj); + ubyte *map; + + if (!stobj) + return NULL; + + vec = (unsigned *) calloc(sizeof(unsigned), (count + 31) / 32); + if (!vec) + return NULL; + + map = pipe_buffer_map(pipe->screen, stobj->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = ADD_POINTERS(map, array->Ptr); + + for (i = 0; i < count; i++) { + if (*((float *) map)) + vec[i/32] |= 1 << (i % 32); + + map += array->StrideB; + } + + pipe_buffer_unmap(pipe->screen, stobj->buffer); + + pipe->set_edgeflags(pipe, vec); + + return vec; + } + else { + /* edge flags not needed */ + pipe->set_edgeflags(pipe, NULL); + return NULL; + } +} + + +/** + * Examine the active arrays to determine if we have interleaved + * vertex arrays all living in one VBO, or all living in user space. + * \param userSpace returns whether the arrays are in user space. + */ +static GLboolean +is_interleaved_arrays(const struct st_vertex_program *vp, + const struct gl_client_array **arrays, + GLboolean *userSpace) +{ + GLuint attr; + const struct gl_buffer_object *firstBufObj = NULL; + GLint firstStride = -1; + GLuint num_client_arrays = 0; + const GLubyte *client_addr = NULL; + + for (attr = 0; attr < vp->num_inputs; attr++) { + const GLuint mesaAttr = vp->index_to_input[attr]; + const struct gl_buffer_object *bufObj = arrays[mesaAttr]->BufferObj; + const GLsizei stride = arrays[mesaAttr]->StrideB; /* in bytes */ + + if (firstStride < 0) { + firstStride = stride; + } + else if (firstStride != stride) { + return GL_FALSE; + } + + if (!bufObj || !bufObj->Name) { + num_client_arrays++; + /* Try to detect if the client-space arrays are + * "close" to each other. + */ + if (!client_addr) { + client_addr = arrays[mesaAttr]->Ptr; + } + else if (abs(arrays[mesaAttr]->Ptr - client_addr) > firstStride) { + /* arrays start too far apart */ + return GL_FALSE; + } + } + else if (!firstBufObj) { + firstBufObj = bufObj; + } + else if (bufObj != firstBufObj) { + return GL_FALSE; + } + } + + *userSpace = (num_client_arrays == vp->num_inputs); + /*printf("user space: %d\n", (int) *userSpace);*/ + + return GL_TRUE; +} + + +/** + * Once we know all the arrays are in user space, this function + * computes the memory range occupied by the arrays. + */ +static void +get_user_arrays_bounds(const struct st_vertex_program *vp, + const struct gl_client_array **arrays, + GLuint max_index, + const GLubyte **low, const GLubyte **high) +{ + const GLubyte *low_addr = NULL; + GLuint attr; + GLint stride; + + for (attr = 0; attr < vp->num_inputs; attr++) { + const GLuint mesaAttr = vp->index_to_input[attr]; + const GLubyte *start = arrays[mesaAttr]->Ptr; + stride = arrays[mesaAttr]->StrideB; + if (attr == 0) { + low_addr = start; + } + else { + low_addr = MIN2(low_addr, start); + } + } + + *low = low_addr; + *high = low_addr + (max_index + 1) * stride; +} + + +/** + * Set up for drawing interleaved arrays that all live in one VBO + * or all live in user space. + * \param vbuffer returns vertex buffer info + * \param velements returns vertex element info + */ +static void +setup_interleaved_attribs(GLcontext *ctx, + const struct st_vertex_program *vp, + const struct gl_client_array **arrays, + GLuint max_index, + GLboolean userSpace, + struct pipe_vertex_buffer *vbuffer, + struct pipe_vertex_element velements[]) +{ + struct pipe_context *pipe = ctx->st->pipe; + GLuint attr; + const GLubyte *offset0; + + for (attr = 0; attr < vp->num_inputs; attr++) { + const GLuint mesaAttr = vp->index_to_input[attr]; + struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; + struct st_buffer_object *stobj = st_buffer_object(bufobj); + GLsizei stride = arrays[mesaAttr]->StrideB; + + /*printf("stobj %u = %p\n", attr, (void*)stobj);*/ + + if (attr == 0) { + if (userSpace) { + const GLubyte *low, *high; + get_user_arrays_bounds(vp, arrays, max_index, &low, &high); + /*printf("user buffer range: %p %p %d\n", low, high, high-low);*/ + vbuffer->buffer = + pipe_user_buffer_create(pipe->screen, (void *) low, high - low); + vbuffer->buffer_offset = 0; + offset0 = low; + } + else { + vbuffer->buffer = NULL; + pipe_buffer_reference(pipe->screen, &vbuffer->buffer, stobj->buffer); + vbuffer->buffer_offset = (unsigned) arrays[mesaAttr]->Ptr; + offset0 = arrays[mesaAttr]->Ptr; + } + vbuffer->pitch = stride; /* in bytes */ + vbuffer->max_index = max_index; + } + + velements[attr].src_offset = + (unsigned) (arrays[mesaAttr]->Ptr - offset0); + velements[attr].vertex_buffer_index = 0; + velements[attr].nr_components = arrays[mesaAttr]->Size; + velements[attr].src_format = + pipe_vertex_format(arrays[mesaAttr]->Type, + arrays[mesaAttr]->Size, + arrays[mesaAttr]->Normalized); + assert(velements[attr].src_format); + } +} + + +/** + * Set up a separate pipe_vertex_buffer and pipe_vertex_element for each + * vertex attribute. + * \param vbuffer returns vertex buffer info + * \param velements returns vertex element info + */ +static void +setup_non_interleaved_attribs(GLcontext *ctx, + const struct st_vertex_program *vp, + const struct gl_client_array **arrays, + GLuint max_index, + struct pipe_vertex_buffer vbuffer[], + struct pipe_vertex_element velements[]) +{ + struct pipe_context *pipe = ctx->st->pipe; + GLuint attr; + + for (attr = 0; attr < vp->num_inputs; attr++) { + const GLuint mesaAttr = vp->index_to_input[attr]; + struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; + GLsizei stride = arrays[mesaAttr]->StrideB; + + if (bufobj && bufobj->Name) { + /* Attribute data is in a VBO. + * Recall that for VBOs, the gl_client_array->Ptr field is + * really an offset from the start of the VBO, not a pointer. + */ + struct st_buffer_object *stobj = st_buffer_object(bufobj); + assert(stobj->buffer); + /*printf("stobj %u = %p\n", attr, (void*) stobj);*/ + + vbuffer[attr].buffer = NULL; + pipe_buffer_reference(pipe->screen, &vbuffer[attr].buffer, stobj->buffer); + vbuffer[attr].buffer_offset = (unsigned) arrays[mesaAttr]->Ptr; + velements[attr].src_offset = 0; + } + else { + /* attribute data is in user-space memory, not a VBO */ + uint bytes; + /*printf("user-space array %d stride %d\n", attr, stride);*/ + + /* wrap user data */ + if (arrays[mesaAttr]->Ptr) { + /* user's vertex array */ + if (arrays[mesaAttr]->StrideB) { + bytes = arrays[mesaAttr]->StrideB * (max_index + 1); + } + else { + bytes = arrays[mesaAttr]->Size + * _mesa_sizeof_type(arrays[mesaAttr]->Type); + } + vbuffer[attr].buffer = pipe_user_buffer_create(pipe->screen, + (void *) arrays[mesaAttr]->Ptr, bytes); + } + else { + /* no array, use ctx->Current.Attrib[] value */ + bytes = sizeof(ctx->Current.Attrib[0]); + vbuffer[attr].buffer = pipe_user_buffer_create(pipe->screen, + (void *) ctx->Current.Attrib[mesaAttr], bytes); + stride = 0; + } + + vbuffer[attr].buffer_offset = 0; + velements[attr].src_offset = 0; + } + + assert(velements[attr].src_offset <= 2048); /* 11-bit field */ + + /* common-case setup */ + vbuffer[attr].pitch = stride; /* in bytes */ + vbuffer[attr].max_index = max_index; + velements[attr].vertex_buffer_index = attr; + velements[attr].nr_components = arrays[mesaAttr]->Size; + velements[attr].src_format + = pipe_vertex_format(arrays[mesaAttr]->Type, + arrays[mesaAttr]->Size, + arrays[mesaAttr]->Normalized); + assert(velements[attr].src_format); + } +} + + + + +/** + * This function gets plugged into the VBO module and is called when + * we have something to render. + * Basically, translate the information into the format expected by gallium. + */ +void +st_draw_vbo(GLcontext *ctx, + const struct gl_client_array **arrays, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index) +{ + struct pipe_context *pipe = ctx->st->pipe; + const struct st_vertex_program *vp; + const struct pipe_shader_state *vs; + struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS]; + GLuint attr; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; + unsigned num_vbuffers, num_velements; + GLboolean userSpace; + + /* sanity check for pointer arithmetic below */ + assert(sizeof(arrays[0]->Ptr[0]) == 1); + + st_validate_state(ctx->st); + + /* must get these after state validation! */ + vp = ctx->st->vp; + vs = &ctx->st->vp->state; + + /* + * Setup the vbuffer[] and velements[] arrays. + */ + if (is_interleaved_arrays(vp, arrays, &userSpace)) { + /*printf("Draw interleaved\n");*/ + setup_interleaved_attribs(ctx, vp, arrays, max_index, userSpace, + vbuffer, velements); + num_vbuffers = 1; + num_velements = vp->num_inputs; + if (num_velements == 0) + num_vbuffers = 0; + } + else { + /*printf("Draw non-interleaved\n");*/ + setup_non_interleaved_attribs(ctx, vp, arrays, max_index, + vbuffer, velements); + num_vbuffers = vp->num_inputs; + num_velements = vp->num_inputs; + } + +#if 0 + { + GLuint i; + for (i = 0; i < num_vbuffers; i++) { + printf("buffers[%d].pitch = %u\n", i, vbuffer[i].pitch); + printf("buffers[%d].max_index = %u\n", i, vbuffer[i].max_index); + printf("buffers[%d].buffer_offset = %u\n", i, vbuffer[i].buffer_offset); + printf("buffers[%d].buffer = %p\n", i, (void*) vbuffer[i].buffer); + } + for (i = 0; i < num_velements; i++) { + printf("vlements[%d].vbuffer_index = %u\n", i, velements[i].vertex_buffer_index); + printf("vlements[%d].src_offset = %u\n", i, velements[i].src_offset); + printf("vlements[%d].nr_comps = %u\n", i, velements[i].nr_components); + printf("vlements[%d].format = %s\n", i, pf_name(velements[i].src_format)); + } + } +#endif + + pipe->set_vertex_buffers(pipe, num_vbuffers, vbuffer); + pipe->set_vertex_elements(pipe, num_velements, velements); + + /* do actual drawing */ + if (ib) { + /* indexed primitive */ + struct gl_buffer_object *bufobj = ib->obj; + struct pipe_buffer *indexBuf = NULL; + unsigned indexSize, indexOffset, i; + + switch (ib->type) { + case GL_UNSIGNED_INT: + indexSize = 4; + break; + case GL_UNSIGNED_SHORT: + indexSize = 2; + break; + case GL_UNSIGNED_BYTE: + indexSize = 1; + break; + default: + assert(0); + return; + } + + /* get/create the index buffer object */ + if (bufobj && bufobj->Name) { + /* elements/indexes are in a real VBO */ + struct st_buffer_object *stobj = st_buffer_object(bufobj); + pipe_buffer_reference(pipe->screen, &indexBuf, stobj->buffer); + indexOffset = (unsigned) ib->ptr / indexSize; + } + else { + /* element/indicies are in user space memory */ + indexBuf = pipe_user_buffer_create(pipe->screen, (void *) ib->ptr, + ib->count * indexSize); + indexOffset = 0; + } + + /* draw */ + if (nr_prims == 1 && pipe->draw_range_elements != NULL) { + i = 0; + + /* XXX: exercise temporary path to pass min/max directly + * through to driver & draw module. These interfaces still + * need a bit of work... + */ + setup_edgeflags(ctx, prims[i].mode, + prims[i].start + indexOffset, prims[i].count, + arrays[VERT_ATTRIB_EDGEFLAG]); + + pipe->draw_range_elements(pipe, indexBuf, indexSize, + min_index, + max_index, + prims[i].mode, + prims[i].start + indexOffset, prims[i].count); + } + else { + for (i = 0; i < nr_prims; i++) { + setup_edgeflags(ctx, prims[i].mode, + prims[i].start + indexOffset, prims[i].count, + arrays[VERT_ATTRIB_EDGEFLAG]); + + pipe->draw_elements(pipe, indexBuf, indexSize, + prims[i].mode, + prims[i].start + indexOffset, prims[i].count); + } + } + + pipe_buffer_reference(pipe->screen, &indexBuf, NULL); + } + else { + /* non-indexed */ + GLuint i; + for (i = 0; i < nr_prims; i++) { + setup_edgeflags(ctx, prims[i].mode, + prims[i].start, prims[i].count, + arrays[VERT_ATTRIB_EDGEFLAG]); + + pipe->draw_arrays(pipe, prims[i].mode, prims[i].start, prims[i].count); + } + } + + /* unreference buffers (frees wrapped user-space buffer objects) */ + for (attr = 0; attr < num_vbuffers; attr++) { + pipe_buffer_reference(pipe->screen, &vbuffer[attr].buffer, NULL); + assert(!vbuffer[attr].buffer); + } + pipe->set_vertex_buffers(pipe, vp->num_inputs, vbuffer); +} + + +#if FEATURE_feedback || FEATURE_drawpix + +/** + * Set the (private) draw module's post-transformed vertex format when in + * GL_SELECT or GL_FEEDBACK mode or for glRasterPos. + */ +static void +set_feedback_vertex_format(GLcontext *ctx) +{ +#if 0 + struct st_context *st = ctx->st; + struct vertex_info vinfo; + GLuint i; + + memset(&vinfo, 0, sizeof(vinfo)); + + if (ctx->RenderMode == GL_SELECT) { + assert(ctx->RenderMode == GL_SELECT); + vinfo.num_attribs = 1; + vinfo.format[0] = FORMAT_4F; + vinfo.interp_mode[0] = INTERP_LINEAR; + } + else { + /* GL_FEEDBACK, or glRasterPos */ + /* emit all attribs (pos, color, texcoord) as GLfloat[4] */ + vinfo.num_attribs = st->state.vs->cso->state.num_outputs; + for (i = 0; i < vinfo.num_attribs; i++) { + vinfo.format[i] = FORMAT_4F; + vinfo.interp_mode[i] = INTERP_LINEAR; + } + } + + draw_set_vertex_info(st->draw, &vinfo); +#endif +} + + +/** + * Called by VBO to draw arrays when in selection or feedback mode and + * to implement glRasterPos. + * This is very much like the normal draw_vbo() function above. + * Look at code refactoring some day. + * Might move this into the failover module some day. + */ +void +st_feedback_draw_vbo(GLcontext *ctx, + const struct gl_client_array **arrays, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index) +{ + struct st_context *st = ctx->st; + struct pipe_context *pipe = st->pipe; + struct draw_context *draw = st->draw; + const struct st_vertex_program *vp; + const struct pipe_shader_state *vs; + struct pipe_buffer *index_buffer_handle = 0; + struct pipe_vertex_buffer vbuffers[PIPE_MAX_SHADER_INPUTS]; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; + GLuint attr, i; + ubyte *mapped_constants; + + assert(draw); + + st_validate_state(ctx->st); + + /* must get these after state validation! */ + vp = ctx->st->vp; + vs = &st->vp->state; + + if (!st->vp->draw_shader) { + st->vp->draw_shader = draw_create_vertex_shader(draw, vs); + } + + /* + * Set up the draw module's state. + * + * We'd like to do this less frequently, but the normal state-update + * code sends state updates to the pipe, not to our private draw module. + */ + assert(draw); + draw_set_viewport_state(draw, &st->state.viewport); + draw_set_clip_state(draw, &st->state.clip); + draw_set_rasterizer_state(draw, &st->state.rasterizer); + draw_bind_vertex_shader(draw, st->vp->draw_shader); + set_feedback_vertex_format(ctx); + + /* loop over TGSI shader inputs to determine vertex buffer + * and attribute info + */ + for (attr = 0; attr < vp->num_inputs; attr++) { + const GLuint mesaAttr = vp->index_to_input[attr]; + struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; + void *map; + + if (bufobj && bufobj->Name) { + /* Attribute data is in a VBO. + * Recall that for VBOs, the gl_client_array->Ptr field is + * really an offset from the start of the VBO, not a pointer. + */ + struct st_buffer_object *stobj = st_buffer_object(bufobj); + assert(stobj->buffer); + + vbuffers[attr].buffer = NULL; + pipe_buffer_reference(pipe->screen, &vbuffers[attr].buffer, stobj->buffer); + vbuffers[attr].buffer_offset = (unsigned) arrays[0]->Ptr;/* in bytes */ + velements[attr].src_offset = arrays[mesaAttr]->Ptr - arrays[0]->Ptr; + } + else { + /* attribute data is in user-space memory, not a VBO */ + uint bytes = (arrays[mesaAttr]->Size + * _mesa_sizeof_type(arrays[mesaAttr]->Type) + * (max_index + 1)); + + /* wrap user data */ + vbuffers[attr].buffer + = pipe_user_buffer_create(pipe->screen, (void *) arrays[mesaAttr]->Ptr, + bytes); + vbuffers[attr].buffer_offset = 0; + velements[attr].src_offset = 0; + } + + /* common-case setup */ + vbuffers[attr].pitch = arrays[mesaAttr]->StrideB; /* in bytes */ + vbuffers[attr].max_index = max_index; + velements[attr].vertex_buffer_index = attr; + velements[attr].nr_components = arrays[mesaAttr]->Size; + velements[attr].src_format = pipe_vertex_format(arrays[mesaAttr]->Type, + arrays[mesaAttr]->Size, + arrays[mesaAttr]->Normalized); + assert(velements[attr].src_format); + + /* tell draw about this attribute */ +#if 0 + draw_set_vertex_buffer(draw, attr, &vbuffer[attr]); +#endif + + /* map the attrib buffer */ + map = pipe_buffer_map(pipe->screen, vbuffers[attr].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, attr, map); + } + + draw_set_vertex_buffers(draw, vp->num_inputs, vbuffers); + draw_set_vertex_elements(draw, vp->num_inputs, velements); + + if (ib) { + unsigned indexSize; + struct gl_buffer_object *bufobj = ib->obj; + struct st_buffer_object *stobj = st_buffer_object(bufobj); + void *map; + + index_buffer_handle = stobj->buffer; + + switch (ib->type) { + case GL_UNSIGNED_INT: + indexSize = 4; + break; + case GL_UNSIGNED_SHORT: + indexSize = 2; + break; + default: + assert(0); + return; + } + + map = pipe_buffer_map(pipe->screen, index_buffer_handle, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, map); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + + /* map constant buffers */ + mapped_constants = pipe_buffer_map(pipe->screen, + st->state.constants[PIPE_SHADER_VERTEX].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_constant_buffer(st->draw, mapped_constants, + st->state.constants[PIPE_SHADER_VERTEX].buffer->size); + + + /* draw here */ + for (i = 0; i < nr_prims; i++) { + draw_arrays(draw, prims[i].mode, prims[i].start, prims[i].count); + } + + + /* unmap constant buffers */ + pipe_buffer_unmap(pipe->screen, st->state.constants[PIPE_SHADER_VERTEX].buffer); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (draw->pt.vertex_buffer[i].buffer) { + pipe_buffer_unmap(pipe->screen, draw->pt.vertex_buffer[i].buffer); + pipe_buffer_reference(pipe->screen, &draw->pt.vertex_buffer[i].buffer, NULL); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + } + if (ib) { + pipe_buffer_unmap(pipe->screen, index_buffer_handle); + draw_set_mapped_element_buffer(draw, 0, NULL); + } +} + +#endif /* FEATURE_feedback || FEATURE_drawpix */ + + +void st_init_draw( struct st_context *st ) +{ + GLcontext *ctx = st->ctx; + + vbo_set_draw_func(ctx, st_draw_vbo); +} + + +void st_destroy_draw( struct st_context *st ) +{ +} + + diff --git a/src/mesa/state_tracker/st_draw.h b/src/mesa/state_tracker/st_draw.h new file mode 100644 index 0000000000..c81f2b25da --- /dev/null +++ b/src/mesa/state_tracker/st_draw.h @@ -0,0 +1,62 @@ +/************************************************************************** + * + * Copyright 2004 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef ST_DRAW_H +#define ST_DRAW_H + +struct _mesa_prim; +struct _mesa_index_buffer; + +void st_init_draw( struct st_context *st ); + +void st_destroy_draw( struct st_context *st ); + +extern void +st_draw_vbo(GLcontext *ctx, + const struct gl_client_array **arrays, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index); + +extern void +st_feedback_draw_vbo(GLcontext *ctx, + const struct gl_client_array **arrays, + const struct _mesa_prim *prims, + GLuint nr_prims, + const struct _mesa_index_buffer *ib, + GLuint min_index, + GLuint max_index); + +#endif diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c new file mode 100644 index 0000000000..60fd440ef7 --- /dev/null +++ b/src/mesa/state_tracker/st_extensions.c @@ -0,0 +1,263 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "main/imports.h" +#include "main/context.h" +#include "main/extensions.h" +#include "main/macros.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" + +#include "st_context.h" +#include "st_extensions.h" + + +static int _min(int a, int b) +{ + return (a < b) ? a : b; +} + +static float _maxf(float a, float b) +{ + return (a > b) ? a : b; +} + +static int _clamp(int a, int min, int max) +{ + if (a < min) + return min; + else if (a > max) + return max; + else + return a; +} + + +/** + * Query driver to get implementation limits. + * Note that we have to limit/clamp against Mesa's internal limits too. + */ +void st_init_limits(struct st_context *st) +{ + struct pipe_screen *screen = st->pipe->screen; + struct gl_constants *c = &st->ctx->Const; + + c->MaxTextureLevels + = _min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_LEVELS), + MAX_TEXTURE_LEVELS); + + c->Max3DTextureLevels + = _min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_3D_LEVELS), + MAX_3D_TEXTURE_LEVELS); + + c->MaxCubeTextureLevels + = _min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS), + MAX_CUBE_TEXTURE_LEVELS); + + c->MaxTextureRectSize + = _min(1 << (c->MaxTextureLevels - 1), MAX_TEXTURE_RECT_SIZE); + + c->MaxTextureUnits + = c->MaxTextureImageUnits + = c->MaxTextureCoordUnits + = _min(screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS), + MAX_TEXTURE_IMAGE_UNITS); + + c->MaxDrawBuffers + = _clamp(screen->get_param(screen, PIPE_CAP_MAX_RENDER_TARGETS), + 1, MAX_DRAW_BUFFERS); + + c->MaxLineWidth + = _maxf(1.0f, screen->get_paramf(screen, PIPE_CAP_MAX_LINE_WIDTH)); + c->MaxLineWidthAA + = _maxf(1.0f, screen->get_paramf(screen, PIPE_CAP_MAX_LINE_WIDTH_AA)); + + c->MaxPointSize + = _maxf(1.0f, screen->get_paramf(screen, PIPE_CAP_MAX_POINT_WIDTH)); + c->MaxPointSizeAA + = _maxf(1.0f, screen->get_paramf(screen, PIPE_CAP_MAX_POINT_WIDTH_AA)); + + c->MaxTextureMaxAnisotropy + = _maxf(2.0f, screen->get_paramf(screen, PIPE_CAP_MAX_TEXTURE_ANISOTROPY)); + + c->MaxTextureLodBias + = screen->get_paramf(screen, PIPE_CAP_MAX_TEXTURE_LOD_BIAS); + + c->MaxDrawBuffers + = CLAMP(screen->get_param(screen, PIPE_CAP_MAX_RENDER_TARGETS), + 1, MAX_DRAW_BUFFERS); +} + + +/** + * XXX this needs careful review + */ +void st_init_extensions(struct st_context *st) +{ + struct pipe_screen *screen = st->pipe->screen; + GLcontext *ctx = st->ctx; + + /* + * Extensions that are supported by all Gallium drivers: + */ + ctx->Extensions.ARB_multisample = GL_TRUE; /* API support */ + ctx->Extensions.ARB_fragment_program = GL_TRUE; + ctx->Extensions.ARB_texture_border_clamp = GL_TRUE; /* XXX temp */ + ctx->Extensions.ARB_texture_compression = GL_TRUE; /* API support only */ + ctx->Extensions.ARB_texture_cube_map = GL_TRUE; + ctx->Extensions.ARB_texture_env_combine = GL_TRUE; + ctx->Extensions.ARB_texture_env_crossbar = GL_TRUE; + ctx->Extensions.ARB_texture_env_dot3 = GL_TRUE; + ctx->Extensions.ARB_vertex_program = GL_TRUE; + ctx->Extensions.ARB_vertex_buffer_object = GL_TRUE; + + ctx->Extensions.EXT_blend_color = GL_TRUE; + ctx->Extensions.EXT_blend_equation_separate = GL_TRUE; + ctx->Extensions.EXT_blend_func_separate = GL_TRUE; + ctx->Extensions.EXT_blend_logic_op = GL_TRUE; + ctx->Extensions.EXT_blend_minmax = GL_TRUE; + ctx->Extensions.EXT_blend_subtract = GL_TRUE; + ctx->Extensions.EXT_framebuffer_blit = GL_TRUE; + ctx->Extensions.EXT_framebuffer_object = GL_TRUE; + ctx->Extensions.EXT_fog_coord = GL_TRUE; + ctx->Extensions.EXT_multi_draw_arrays = GL_TRUE; + ctx->Extensions.EXT_pixel_buffer_object = GL_TRUE; + ctx->Extensions.EXT_point_parameters = GL_TRUE; + ctx->Extensions.EXT_secondary_color = GL_TRUE; + ctx->Extensions.EXT_stencil_wrap = GL_TRUE; + ctx->Extensions.EXT_texture_env_add = GL_TRUE; + ctx->Extensions.EXT_texture_env_combine = GL_TRUE; + ctx->Extensions.EXT_texture_env_dot3 = GL_TRUE; + ctx->Extensions.EXT_texture_lod_bias = GL_TRUE; + + ctx->Extensions.NV_blend_square = GL_TRUE; + ctx->Extensions.NV_texgen_reflection = GL_TRUE; + + ctx->Extensions.SGI_color_matrix = GL_TRUE; + ctx->Extensions.SGIS_generate_mipmap = GL_TRUE; /* XXX temp */ + + /* + * Extensions that depend on the driver/hardware: + */ + if (screen->get_param(screen, PIPE_CAP_MAX_RENDER_TARGETS) > 0) { + ctx->Extensions.ARB_draw_buffers = GL_TRUE; + } + + if (screen->get_param(screen, PIPE_CAP_GLSL)) { + ctx->Extensions.ARB_fragment_shader = GL_TRUE; + ctx->Extensions.ARB_vertex_shader = GL_TRUE; + ctx->Extensions.ARB_shader_objects = GL_TRUE; + ctx->Extensions.ARB_shading_language_100 = GL_TRUE; + ctx->Extensions.ARB_shading_language_120 = GL_TRUE; + } + + if (screen->get_param(screen, PIPE_CAP_TEXTURE_MIRROR_REPEAT) > 0) { + ctx->Extensions.ARB_texture_mirrored_repeat = GL_TRUE; + } + + if (screen->get_param(screen, PIPE_CAP_TEXTURE_MIRROR_CLAMP) > 0) { + ctx->Extensions.EXT_texture_mirror_clamp = GL_TRUE; + } + + if (screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES)) { + ctx->Extensions.ARB_texture_non_power_of_two = GL_TRUE; + ctx->Extensions.NV_texture_rectangle = GL_TRUE; + } + + if (screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS) > 1) { + ctx->Extensions.ARB_multitexture = GL_TRUE; + } + + if (screen->get_param(screen, PIPE_CAP_TWO_SIDED_STENCIL)) { + ctx->Extensions.ATI_separate_stencil = GL_TRUE; + } + + if (screen->get_param(screen, PIPE_CAP_ANISOTROPIC_FILTER)) { + ctx->Extensions.EXT_texture_filter_anisotropic = GL_TRUE; + } + + if (screen->get_param(screen, PIPE_CAP_POINT_SPRITE)) { + ctx->Extensions.ARB_point_sprite = GL_TRUE; + ctx->Extensions.NV_point_sprite = GL_TRUE; + } + + if (screen->get_param(screen, PIPE_CAP_OCCLUSION_QUERY)) { + ctx->Extensions.ARB_occlusion_query = GL_TRUE; + } + + if (screen->get_param(screen, PIPE_CAP_TEXTURE_SHADOW_MAP)) { + ctx->Extensions.ARB_depth_texture = GL_TRUE; + ctx->Extensions.ARB_shadow = GL_TRUE; + ctx->Extensions.EXT_shadow_funcs = GL_TRUE; + /*ctx->Extensions.ARB_shadow_ambient = GL_TRUE;*/ + } + + /* GL_EXT_packed_depth_stencil requires both the ability to render to + * a depth/stencil buffer and texture from depth/stencil source. + */ + if (screen->is_format_supported(screen, PIPE_FORMAT_Z24S8_UNORM, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_DEPTH_STENCIL, 0) && + screen->is_format_supported(screen, PIPE_FORMAT_Z24S8_UNORM, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { + ctx->Extensions.EXT_packed_depth_stencil = GL_TRUE; + } + else if (screen->is_format_supported(screen, PIPE_FORMAT_S8Z24_UNORM, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_DEPTH_STENCIL, 0) && + screen->is_format_supported(screen, PIPE_FORMAT_S8Z24_UNORM, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { + ctx->Extensions.EXT_packed_depth_stencil = GL_TRUE; + } + + if (screen->is_format_supported(screen, PIPE_FORMAT_R8G8B8A8_SRGB, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { + ctx->Extensions.EXT_texture_sRGB = GL_TRUE; + } + +#if 01 + if (screen->is_format_supported(screen, PIPE_FORMAT_DXT5_RGBA, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { + ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE; + } +#endif + if (screen->is_format_supported(screen, PIPE_FORMAT_YCBCR, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0) || + screen->is_format_supported(screen, PIPE_FORMAT_YCBCR_REV, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { + ctx->Extensions.MESA_ycbcr_texture = GL_TRUE; + } + +} diff --git a/src/mesa/state_tracker/st_extensions.h b/src/mesa/state_tracker/st_extensions.h new file mode 100644 index 0000000000..2994f16dd3 --- /dev/null +++ b/src/mesa/state_tracker/st_extensions.h @@ -0,0 +1,38 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_EXTENSIONS_H +#define ST_EXTENSIONS_H + + +extern void st_init_limits(struct st_context *st); + +extern void st_init_extensions(struct st_context *st); + + +#endif /* ST_EXTENSIONS_H */ diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c new file mode 100644 index 0000000000..a8ae30a454 --- /dev/null +++ b/src/mesa/state_tracker/st_format.c @@ -0,0 +1,621 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Texture Image-related functions. + * \author Brian Paul + */ + +#include "main/imports.h" +#include "main/context.h" +#include "main/texstore.h" +#include "main/texformat.h" +#include "main/enums.h" +#include "main/macros.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_screen.h" +#include "st_context.h" +#include "st_format.h" + +static GLuint +format_bits( + pipe_format_rgbazs_t info, + GLuint comp ) +{ + return pf_get_component_bits( (enum pipe_format) info, comp ); +} + +static GLuint +format_max_bits( + pipe_format_rgbazs_t info ) +{ + GLuint size = format_bits( info, PIPE_FORMAT_COMP_R ); + + size = MAX2( size, format_bits( info, PIPE_FORMAT_COMP_G ) ); + size = MAX2( size, format_bits( info, PIPE_FORMAT_COMP_B ) ); + size = MAX2( size, format_bits( info, PIPE_FORMAT_COMP_A ) ); + size = MAX2( size, format_bits( info, PIPE_FORMAT_COMP_Z ) ); + size = MAX2( size, format_bits( info, PIPE_FORMAT_COMP_S ) ); + return size; +} + +static GLuint +format_size( + pipe_format_rgbazs_t info ) +{ + return + format_bits( info, PIPE_FORMAT_COMP_R ) + + format_bits( info, PIPE_FORMAT_COMP_G ) + + format_bits( info, PIPE_FORMAT_COMP_B ) + + format_bits( info, PIPE_FORMAT_COMP_A ) + + format_bits( info, PIPE_FORMAT_COMP_Z ) + + format_bits( info, PIPE_FORMAT_COMP_S ); +} + +/* + * XXX temporary here + */ +GLboolean +st_get_format_info(enum pipe_format format, struct pipe_format_info *pinfo) +{ + if (pf_layout(format) == PIPE_FORMAT_LAYOUT_RGBAZS) { + pipe_format_rgbazs_t info; + + info = format; + +#if 0 + printf("%s\n", pf_name( format ) ); +#endif + + /* Data type */ + if (format == PIPE_FORMAT_A1R5G5B5_UNORM || format == PIPE_FORMAT_R5G6B5_UNORM) { + pinfo->datatype = GL_UNSIGNED_SHORT; + } + else { + GLuint size; + + size = format_max_bits( info ); + if (size == 8) { + if (pf_type(info) == PIPE_FORMAT_TYPE_UNORM) + pinfo->datatype = GL_UNSIGNED_BYTE; + else + pinfo->datatype = GL_BYTE; + } + else if (size == 16) { + if (pf_type(info) == PIPE_FORMAT_TYPE_UNORM) + pinfo->datatype = GL_UNSIGNED_SHORT; + else + pinfo->datatype = GL_SHORT; + } + else { + assert( size <= 32 ); + if (pf_type(info) == PIPE_FORMAT_TYPE_UNORM) + pinfo->datatype = GL_UNSIGNED_INT; + else + pinfo->datatype = GL_INT; + } + } + + /* Component bits */ + pinfo->red_bits = format_bits( info, PIPE_FORMAT_COMP_R ); + pinfo->green_bits = format_bits( info, PIPE_FORMAT_COMP_G ); + pinfo->blue_bits = format_bits( info, PIPE_FORMAT_COMP_B ); + pinfo->alpha_bits = format_bits( info, PIPE_FORMAT_COMP_A ); + pinfo->depth_bits = format_bits( info, PIPE_FORMAT_COMP_Z ); + pinfo->stencil_bits = format_bits( info, PIPE_FORMAT_COMP_S ); + pinfo->luminance_bits = 0; + pinfo->intensity_bits = 0; + + /* Format size */ + pinfo->size = format_size( info ) / 8; + + /* Luminance & Intensity bits */ + if( pf_swizzle_x(info) == PIPE_FORMAT_COMP_R && + pf_swizzle_y(info) == PIPE_FORMAT_COMP_R && + pf_swizzle_z(info) == PIPE_FORMAT_COMP_R ) { + if( pf_swizzle_w(info) == PIPE_FORMAT_COMP_R ) { + pinfo->intensity_bits = pinfo->red_bits; + } + else { + pinfo->luminance_bits = pinfo->red_bits; + } + pinfo->red_bits = 0; + } + + /* Base format */ + if (pinfo->depth_bits) { + if (pinfo->stencil_bits) { + pinfo->base_format = GL_DEPTH_STENCIL_EXT; + } + else { + pinfo->base_format = GL_DEPTH_COMPONENT; + } + } + else if (pinfo->stencil_bits) { + pinfo->base_format = GL_STENCIL_INDEX; + } + else { + pinfo->base_format = GL_RGBA; + } + } + else if (pf_layout(format) == PIPE_FORMAT_LAYOUT_YCBCR) { + pinfo->base_format = GL_YCBCR_MESA; + pinfo->datatype = GL_UNSIGNED_SHORT; + pinfo->size = 2; /* two bytes per "texel" */ + } + else { + /* compressed format? */ + assert(0); + } + +#if 0 + printf( + "ST_FORMAT: R(%u), G(%u), B(%u), A(%u), Z(%u), S(%u)\n", + pinfo->red_bits, + pinfo->green_bits, + pinfo->blue_bits, + pinfo->alpha_bits, + pinfo->depth_bits, + pinfo->stencil_bits ); +#endif + + pinfo->format = format; + + return GL_TRUE; +} + + +/** + * Return bytes per pixel for the given format. + */ +GLuint +st_sizeof_format(enum pipe_format format) +{ + struct pipe_format_info info; + if (!st_get_format_info( format, &info )) { + assert( 0 ); + return 0; + } + return info.size; +} + + +/** + * Return bytes per pixel for the given format. + */ +GLenum +st_format_datatype(enum pipe_format format) +{ + struct pipe_format_info info; + if (!st_get_format_info( format, &info )) { + assert( 0 ); + return 0; + } + return info.datatype; +} + + +enum pipe_format +st_mesa_format_to_pipe_format(GLuint mesaFormat) +{ + switch (mesaFormat) { + /* fix this */ + case MESA_FORMAT_ARGB8888_REV: + case MESA_FORMAT_ARGB8888: + return PIPE_FORMAT_A8R8G8B8_UNORM; + case MESA_FORMAT_ARGB1555: + return PIPE_FORMAT_A1R5G5B5_UNORM; + case MESA_FORMAT_ARGB4444: + return PIPE_FORMAT_A4R4G4B4_UNORM; + case MESA_FORMAT_RGB565: + return PIPE_FORMAT_R5G6B5_UNORM; + case MESA_FORMAT_AL88: + return PIPE_FORMAT_A8L8_UNORM; + case MESA_FORMAT_A8: + return PIPE_FORMAT_A8_UNORM; + case MESA_FORMAT_L8: + return PIPE_FORMAT_L8_UNORM; + case MESA_FORMAT_I8: + return PIPE_FORMAT_I8_UNORM; + case MESA_FORMAT_Z16: + return PIPE_FORMAT_Z16_UNORM; + case MESA_FORMAT_Z32: + return PIPE_FORMAT_Z32_UNORM; + case MESA_FORMAT_Z24_S8: + return PIPE_FORMAT_Z24S8_UNORM; + case MESA_FORMAT_S8_Z24: + return PIPE_FORMAT_S8Z24_UNORM; + case MESA_FORMAT_YCBCR: + return PIPE_FORMAT_YCBCR; +#if FEATURE_texture_s3tc + case MESA_FORMAT_RGB_DXT1: + return PIPE_FORMAT_DXT1_RGB; + case MESA_FORMAT_RGBA_DXT1: + return PIPE_FORMAT_DXT1_RGBA; + case MESA_FORMAT_RGBA_DXT3: + return PIPE_FORMAT_DXT3_RGBA; + case MESA_FORMAT_RGBA_DXT5: + return PIPE_FORMAT_DXT5_RGBA; +#endif + default: + assert(0); + return 0; + } +} + +/** + * Find an RGBA format supported by the context/winsys. + */ +static enum pipe_format +default_rgba_format(struct pipe_screen *screen, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) +{ + static const enum pipe_format colorFormats[] = { + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM + }; + uint i; + for (i = 0; i < Elements(colorFormats); i++) { + if (screen->is_format_supported( screen, colorFormats[i], target, tex_usage, geom_flags )) { + return colorFormats[i]; + } + } + return PIPE_FORMAT_NONE; +} + + +/** + * Search list of formats for first RGBA format with >8 bits/channel. + */ +static enum pipe_format +default_deep_rgba_format(struct pipe_screen *screen, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) +{ + if (screen->is_format_supported(screen, PIPE_FORMAT_R16G16B16A16_SNORM, target, tex_usage, geom_flags)) { + return PIPE_FORMAT_R16G16B16A16_SNORM; + } + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + return default_rgba_format(screen, target, tex_usage, geom_flags); + else + return PIPE_FORMAT_NONE; +} + + +/** + * Find an Z format supported by the context/winsys. + */ +static enum pipe_format +default_depth_format(struct pipe_screen *screen, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) +{ + static const enum pipe_format zFormats[] = { + PIPE_FORMAT_Z16_UNORM, + PIPE_FORMAT_Z32_UNORM, + PIPE_FORMAT_S8Z24_UNORM, + PIPE_FORMAT_Z24S8_UNORM + }; + uint i; + for (i = 0; i < Elements(zFormats); i++) { + if (screen->is_format_supported( screen, zFormats[i], target, tex_usage, geom_flags )) { + return zFormats[i]; + } + } + return PIPE_FORMAT_NONE; +} + + +/** + * Given an OpenGL internalFormat value for a texture or surface, return + * the best matching PIPE_FORMAT_x, or PIPE_FORMAT_NONE if there's no match. + * \param target one of PIPE_TEXTURE_x + * \param tex_usage either PIPE_TEXTURE_USAGE_RENDER_TARGET + * or PIPE_TEXTURE_USAGE_SAMPLER + */ +enum pipe_format +st_choose_format(struct pipe_context *pipe, GLint internalFormat, + enum pipe_texture_target target, unsigned tex_usage) +{ + struct pipe_screen *screen = pipe->screen; + unsigned geom_flags = 0; + + switch (internalFormat) { + case 4: + case GL_RGBA: + case GL_COMPRESSED_RGBA: + case 3: + case GL_RGB: + case GL_COMPRESSED_RGB: + case GL_RGBA8: + case GL_RGB10_A2: + case GL_RGBA12: + return default_rgba_format( screen, target, tex_usage, geom_flags ); + case GL_RGBA16: + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + return default_deep_rgba_format( screen, target, tex_usage, geom_flags ); + else + return default_rgba_format( screen, target, tex_usage, geom_flags ); + + case GL_RGBA4: + case GL_RGBA2: + if (screen->is_format_supported( screen, PIPE_FORMAT_A4R4G4B4_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_A4R4G4B4_UNORM; + return default_rgba_format( screen, target, tex_usage, geom_flags ); + + case GL_RGB5_A1: + if (screen->is_format_supported( screen, PIPE_FORMAT_A1R5G5B5_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_A1R5G5B5_UNORM; + return default_rgba_format( screen, target, tex_usage, geom_flags ); + + case GL_RGB8: + case GL_RGB10: + case GL_RGB12: + case GL_RGB16: + return default_rgba_format( screen, target, tex_usage, geom_flags ); + + case GL_RGB5: + case GL_RGB4: + case GL_R3_G3_B2: + if (screen->is_format_supported( screen, PIPE_FORMAT_A1R5G5B5_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_A1R5G5B5_UNORM; + if (screen->is_format_supported( screen, PIPE_FORMAT_R5G6B5_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_R5G6B5_UNORM; + return default_rgba_format( screen, target, tex_usage, geom_flags ); + + case GL_ALPHA: + case GL_ALPHA4: + case GL_ALPHA8: + case GL_ALPHA12: + case GL_ALPHA16: + case GL_COMPRESSED_ALPHA: + if (screen->is_format_supported( screen, PIPE_FORMAT_A8_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_A8_UNORM; + return default_rgba_format( screen, target, tex_usage, geom_flags ); + + case 1: + case GL_LUMINANCE: + case GL_LUMINANCE4: + case GL_LUMINANCE8: + case GL_LUMINANCE12: + case GL_LUMINANCE16: + case GL_COMPRESSED_LUMINANCE: + if (screen->is_format_supported( screen, PIPE_FORMAT_L8_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_L8_UNORM; + return default_rgba_format( screen, target, tex_usage, geom_flags ); + + case 2: + case GL_LUMINANCE_ALPHA: + case GL_LUMINANCE4_ALPHA4: + case GL_LUMINANCE6_ALPHA2: + case GL_LUMINANCE8_ALPHA8: + case GL_LUMINANCE12_ALPHA4: + case GL_LUMINANCE12_ALPHA12: + case GL_LUMINANCE16_ALPHA16: + case GL_COMPRESSED_LUMINANCE_ALPHA: + if (screen->is_format_supported( screen, PIPE_FORMAT_A8L8_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_A8L8_UNORM; + return default_rgba_format( screen, target, tex_usage, geom_flags ); + + case GL_INTENSITY: + case GL_INTENSITY4: + case GL_INTENSITY8: + case GL_INTENSITY12: + case GL_INTENSITY16: + case GL_COMPRESSED_INTENSITY: + if (screen->is_format_supported( screen, PIPE_FORMAT_I8_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_I8_UNORM; + return default_rgba_format( screen, target, tex_usage, geom_flags ); + + case GL_YCBCR_MESA: + if (screen->is_format_supported(screen, PIPE_FORMAT_YCBCR, + target, tex_usage, geom_flags)) { + return PIPE_FORMAT_YCBCR; + } + if (screen->is_format_supported(screen, PIPE_FORMAT_YCBCR_REV, + target, tex_usage, geom_flags)) { + return PIPE_FORMAT_YCBCR_REV; + } + return PIPE_FORMAT_NONE; + + case GL_RGB_S3TC: + case GL_RGB4_S3TC: + case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: + return PIPE_FORMAT_DXT1_RGB; + + case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: + return PIPE_FORMAT_DXT1_RGBA; + + case GL_RGBA_S3TC: + case GL_RGBA4_S3TC: + case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: + return PIPE_FORMAT_DXT3_RGBA; + + case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: + return PIPE_FORMAT_DXT5_RGBA; + +#if 0 + case GL_COMPRESSED_RGB_FXT1_3DFX: + return PIPE_FORMAT_RGB_FXT1; + case GL_COMPRESSED_RGBA_FXT1_3DFX: + return PIPE_FORMAT_RGB_FXT1; +#endif + + case GL_DEPTH_COMPONENT16: + if (screen->is_format_supported( screen, PIPE_FORMAT_Z16_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_Z16_UNORM; + /* fall-through */ + case GL_DEPTH_COMPONENT24: + if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_S8Z24_UNORM; + if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_Z24S8_UNORM; + /* fall-through */ + case GL_DEPTH_COMPONENT32: + if (screen->is_format_supported( screen, PIPE_FORMAT_Z32_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_Z32_UNORM; + /* fall-through */ + case GL_DEPTH_COMPONENT: + return default_depth_format( screen, target, tex_usage, geom_flags ); + + case GL_STENCIL_INDEX: + case GL_STENCIL_INDEX1_EXT: + case GL_STENCIL_INDEX4_EXT: + case GL_STENCIL_INDEX8_EXT: + case GL_STENCIL_INDEX16_EXT: + if (screen->is_format_supported( screen, PIPE_FORMAT_S8_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_S8_UNORM; + if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_S8Z24_UNORM; + if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_Z24S8_UNORM; + return PIPE_FORMAT_NONE; + + case GL_DEPTH_STENCIL_EXT: + case GL_DEPTH24_STENCIL8_EXT: + if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_S8Z24_UNORM; + if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, target, tex_usage, geom_flags )) + return PIPE_FORMAT_Z24S8_UNORM; + return PIPE_FORMAT_NONE; + + default: + return PIPE_FORMAT_NONE; + } +} + + +static GLboolean +is_stencil_format(GLenum format) +{ + switch (format) { + case GL_STENCIL_INDEX: + case GL_STENCIL_INDEX1_EXT: + case GL_STENCIL_INDEX4_EXT: + case GL_STENCIL_INDEX8_EXT: + case GL_STENCIL_INDEX16_EXT: + case GL_DEPTH_STENCIL_EXT: + case GL_DEPTH24_STENCIL8_EXT: + return GL_TRUE; + default: + return GL_FALSE; + } +} + +/** + * Called by FBO code to choose a PIPE_FORMAT_ for drawing surfaces. + */ +enum pipe_format +st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat) +{ + uint usage; + if (is_stencil_format(internalFormat)) + usage = PIPE_TEXTURE_USAGE_DEPTH_STENCIL; + else + usage = PIPE_TEXTURE_USAGE_RENDER_TARGET; + return st_choose_format(pipe, internalFormat, PIPE_TEXTURE_2D, usage); +} + + +static const struct gl_texture_format * +translate_gallium_format_to_mesa_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + return &_mesa_texformat_argb8888; + case PIPE_FORMAT_A1R5G5B5_UNORM: + return &_mesa_texformat_argb1555; + case PIPE_FORMAT_A4R4G4B4_UNORM: + return &_mesa_texformat_argb4444; + case PIPE_FORMAT_R5G6B5_UNORM: + return &_mesa_texformat_rgb565; + case PIPE_FORMAT_A8L8_UNORM: + return &_mesa_texformat_al88; + case PIPE_FORMAT_A8_UNORM: + return &_mesa_texformat_a8; + case PIPE_FORMAT_L8_UNORM: + return &_mesa_texformat_l8; + case PIPE_FORMAT_I8_UNORM: + return &_mesa_texformat_i8; + case PIPE_FORMAT_Z16_UNORM: + return &_mesa_texformat_z16; + case PIPE_FORMAT_Z32_UNORM: + return &_mesa_texformat_z32; + case PIPE_FORMAT_Z24S8_UNORM: + return &_mesa_texformat_z24_s8; + case PIPE_FORMAT_S8Z24_UNORM: + return &_mesa_texformat_s8_z24; + case PIPE_FORMAT_YCBCR: + return &_mesa_texformat_ycbcr; + case PIPE_FORMAT_YCBCR_REV: + return &_mesa_texformat_ycbcr_rev; +#if FEATURE_texture_s3tc + case PIPE_FORMAT_DXT1_RGB: + return &_mesa_texformat_rgb_dxt1; + case PIPE_FORMAT_DXT1_RGBA: + return &_mesa_texformat_rgba_dxt1; + case PIPE_FORMAT_DXT3_RGBA: + return &_mesa_texformat_rgba_dxt3; + case PIPE_FORMAT_DXT5_RGBA: + return &_mesa_texformat_rgba_dxt5; +#endif + /* XXX add additional cases */ + default: + assert(0); + return NULL; + } +} + + +/** + * Called via ctx->Driver.chooseTextureFormat(). + */ +const struct gl_texture_format * +st_ChooseTextureFormat(GLcontext *ctx, GLint internalFormat, + GLenum format, GLenum type) +{ + enum pipe_format pFormat; + + (void) format; + (void) type; + + pFormat = st_choose_format(ctx->st->pipe, internalFormat, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER); + if (pFormat == PIPE_FORMAT_NONE) + return NULL; + + return translate_gallium_format_to_mesa_format(pFormat); +} diff --git a/src/mesa/state_tracker/st_format.h b/src/mesa/state_tracker/st_format.h new file mode 100644 index 0000000000..3f5ac3201b --- /dev/null +++ b/src/mesa/state_tracker/st_format.h @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_FORMAT_H +#define ST_FORMAT_H + + +struct pipe_format_info +{ + enum pipe_format format; + GLenum base_format; + GLenum datatype; + GLubyte red_bits; + GLubyte green_bits; + GLubyte blue_bits; + GLubyte alpha_bits; + GLubyte luminance_bits; + GLubyte intensity_bits; + GLubyte depth_bits; + GLubyte stencil_bits; + GLubyte size; /**< in bytes */ +}; + + +GLboolean +st_get_format_info(enum pipe_format format, struct pipe_format_info *pinfo); + + +extern GLuint +st_sizeof_format(enum pipe_format format); + + +extern GLenum +st_format_datatype(enum pipe_format format); + + +extern enum pipe_format +st_mesa_format_to_pipe_format(GLuint mesaFormat); + + +extern enum pipe_format +st_choose_format(struct pipe_context *pipe, GLint internalFormat, + enum pipe_texture_target target, unsigned tex_usage); + +extern enum pipe_format +st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat); + + +extern const struct gl_texture_format * +st_ChooseTextureFormat(GLcontext * ctx, GLint internalFormat, + GLenum format, GLenum type); + + +#endif /* ST_CB_TEXIMAGE_H */ diff --git a/src/mesa/state_tracker/st_framebuffer.c b/src/mesa/state_tracker/st_framebuffer.c new file mode 100644 index 0000000000..1ff7009382 --- /dev/null +++ b/src/mesa/state_tracker/st_framebuffer.c @@ -0,0 +1,310 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/imports.h" +#include "main/buffers.h" +#include "main/context.h" +#include "main/framebuffer.h" +#include "main/matrix.h" +#include "main/renderbuffer.h" +#include "main/scissor.h" +#include "st_public.h" +#include "st_context.h" +#include "st_cb_fbo.h" +#include "pipe/p_defines.h" +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" + + +struct st_framebuffer * +st_create_framebuffer( const __GLcontextModes *visual, + enum pipe_format colorFormat, + enum pipe_format depthFormat, + enum pipe_format stencilFormat, + uint width, uint height, + void *private) +{ + struct st_framebuffer *stfb = CALLOC_STRUCT(st_framebuffer); + if (stfb) { + int samples = st_get_msaa(); + + if (visual->sampleBuffers) + samples = visual->samples; + + _mesa_initialize_framebuffer(&stfb->Base, visual); + + { + /* fake frontbuffer */ + /* XXX allocation should only happen in the unusual case + it's actually needed */ + struct gl_renderbuffer *rb + = st_new_renderbuffer_fb(colorFormat, samples); + _mesa_add_renderbuffer(&stfb->Base, BUFFER_FRONT_LEFT, rb); + } + + if (visual->doubleBufferMode) { + struct gl_renderbuffer *rb + = st_new_renderbuffer_fb(colorFormat, samples); + _mesa_add_renderbuffer(&stfb->Base, BUFFER_BACK_LEFT, rb); + } + + if (depthFormat == stencilFormat && depthFormat != PIPE_FORMAT_NONE) { + /* combined depth/stencil buffer */ + struct gl_renderbuffer *depthStencilRb + = st_new_renderbuffer_fb(depthFormat, samples); + /* note: bind RB to two attachment points */ + _mesa_add_renderbuffer(&stfb->Base, BUFFER_DEPTH, depthStencilRb); + _mesa_add_renderbuffer(&stfb->Base, BUFFER_STENCIL, depthStencilRb); + } + else { + /* separate depth and/or stencil */ + + if (visual->depthBits == 32) { + /* 32-bit depth buffer */ + struct gl_renderbuffer *depthRb + = st_new_renderbuffer_fb(depthFormat, samples); + _mesa_add_renderbuffer(&stfb->Base, BUFFER_DEPTH, depthRb); + } + else if (visual->depthBits == 24) { + /* 24-bit depth buffer, ignore stencil bits */ + struct gl_renderbuffer *depthRb + = st_new_renderbuffer_fb(depthFormat, samples); + _mesa_add_renderbuffer(&stfb->Base, BUFFER_DEPTH, depthRb); + } + else if (visual->depthBits > 0) { + /* 16-bit depth buffer */ + struct gl_renderbuffer *depthRb + = st_new_renderbuffer_fb(depthFormat, samples); + _mesa_add_renderbuffer(&stfb->Base, BUFFER_DEPTH, depthRb); + } + + if (visual->stencilBits > 0) { + /* 8-bit stencil */ + struct gl_renderbuffer *stencilRb + = st_new_renderbuffer_fb(stencilFormat, samples); + _mesa_add_renderbuffer(&stfb->Base, BUFFER_STENCIL, stencilRb); + } + } + + if (visual->accumRedBits > 0) { + /* 16-bit/channel accum */ + struct gl_renderbuffer *accumRb + = st_new_renderbuffer_fb(DEFAULT_ACCUM_PIPE_FORMAT, 0); /* XXX accum isn't multisampled right? */ + _mesa_add_renderbuffer(&stfb->Base, BUFFER_ACCUM, accumRb); + } + + stfb->Base.Initialized = GL_TRUE; + stfb->InitWidth = width; + stfb->InitHeight = height; + stfb->Private = private; + } + return stfb; +} + + +void st_resize_framebuffer( struct st_framebuffer *stfb, + uint width, uint height ) +{ + if (stfb->Base.Width != width || stfb->Base.Height != height) { + GET_CURRENT_CONTEXT(ctx); + if (ctx) { + if (stfb->InitWidth == 0 && stfb->InitHeight == 0) { + /* didn't have a valid size until now */ + stfb->InitWidth = width; + stfb->InitHeight = height; + if (ctx->Viewport.Width <= 1) { + /* set context's initial viewport/scissor size */ + _mesa_set_viewport(ctx, 0, 0, width, height); + _mesa_set_scissor(ctx, 0, 0, width, height); + } + } + + _mesa_resize_framebuffer(ctx, &stfb->Base, width, height); + + assert(stfb->Base.Width == width); + assert(stfb->Base.Height == height); + } + } +} + + +void st_unreference_framebuffer( struct st_framebuffer *stfb ) +{ + _mesa_unreference_framebuffer((struct gl_framebuffer **) &stfb); +} + + + +/** + * Set/replace a framebuffer surface. + * The user of the state tracker can use this instead of + * st_resize_framebuffer() to provide new surfaces when a window is resized. + */ +void +st_set_framebuffer_surface(struct st_framebuffer *stfb, + uint surfIndex, struct pipe_surface *surf) +{ + static const GLuint invalid_size = 9999999; + struct st_renderbuffer *strb; + GLuint width, height, i; + + assert(surfIndex < BUFFER_COUNT); + + strb = st_renderbuffer(stfb->Base.Attachment[surfIndex].Renderbuffer); + assert(strb); + + /* replace the renderbuffer's surface/texture pointers */ + pipe_surface_reference( &strb->surface, surf ); + pipe_texture_reference( &strb->texture, surf->texture ); + + /* update renderbuffer's width/height */ + strb->Base.Width = surf->width; + strb->Base.Height = surf->height; + + /* Try to update the framebuffer's width/height from the renderbuffer + * sizes. Before we start drawing, all the rbs _should_ be the same size. + */ + width = height = invalid_size; + for (i = 0; i < BUFFER_COUNT; i++) { + if (stfb->Base.Attachment[i].Renderbuffer) { + if (width == invalid_size) { + width = stfb->Base.Attachment[i].Renderbuffer->Width; + height = stfb->Base.Attachment[i].Renderbuffer->Height; + } + else if (width != stfb->Base.Attachment[i].Renderbuffer->Width || + height != stfb->Base.Attachment[i].Renderbuffer->Height) { + /* inconsistant renderbuffer sizes, bail out */ + return; + } + } + } + + if (width != invalid_size) { + /* OK, the renderbuffers are of a consistant size, so update the + * parent framebuffer's size. + */ + stfb->Base.Width = width; + stfb->Base.Height = height; + } +} + + + +/** + * Return the pipe_surface for the given renderbuffer. + */ +struct pipe_surface * +st_get_framebuffer_surface(struct st_framebuffer *stfb, uint surfIndex) +{ + struct st_renderbuffer *strb; + + assert(surfIndex <= ST_SURFACE_DEPTH); + + /* sanity checks, ST tokens should match Mesa tokens */ + assert(ST_SURFACE_FRONT_LEFT == BUFFER_FRONT_LEFT); + assert(ST_SURFACE_BACK_RIGHT == BUFFER_BACK_RIGHT); + + strb = st_renderbuffer(stfb->Base.Attachment[surfIndex].Renderbuffer); + if (strb) + return strb->surface; + return NULL; +} + +struct pipe_texture * +st_get_framebuffer_texture(struct st_framebuffer *stfb, uint surfIndex) +{ + struct st_renderbuffer *strb; + + assert(surfIndex <= ST_SURFACE_DEPTH); + + /* sanity checks, ST tokens should match Mesa tokens */ + assert(ST_SURFACE_FRONT_LEFT == BUFFER_FRONT_LEFT); + assert(ST_SURFACE_BACK_RIGHT == BUFFER_BACK_RIGHT); + + strb = st_renderbuffer(stfb->Base.Attachment[surfIndex].Renderbuffer); + if (strb) + return strb->texture; + return NULL; +} + +/** + * This function is to be called prior to SwapBuffers on the given + * framebuffer. It checks if the current context is bound to the framebuffer + * and flushes rendering if needed. + */ +void +st_notify_swapbuffers(struct st_framebuffer *stfb) +{ + GET_CURRENT_CONTEXT(ctx); + + if (ctx && ctx->DrawBuffer == &stfb->Base) { + st_flush( ctx->st, + PIPE_FLUSH_RENDER_CACHE | + PIPE_FLUSH_SWAPBUFFERS | + PIPE_FLUSH_FRAME, + NULL ); + ctx->st->frontbuffer_status = FRONT_STATUS_COPY_OF_BACK; + } +} + + +/** + * Quick hack - allows the winsys to inform the driver that surface + * states are now undefined after a glXSwapBuffers or similar. + */ +void +st_notify_swapbuffers_complete(struct st_framebuffer *stfb) +{ + GET_CURRENT_CONTEXT(ctx); + + if (ctx && ctx->DrawBuffer == &stfb->Base) { + struct st_renderbuffer *strb; + int i; + + for (i = 0; i < BUFFER_COUNT; i++) { + if (stfb->Base.Attachment[i].Renderbuffer) { + strb = st_renderbuffer(stfb->Base.Attachment[i].Renderbuffer); + strb->surface->status = PIPE_SURFACE_STATUS_UNDEFINED; + } + } + } +} + + +void *st_framebuffer_private( struct st_framebuffer *stfb ) +{ + return stfb->Private; +} + +void st_get_framebuffer_dimensions( struct st_framebuffer *stfb, + uint *width, + uint *height) +{ + *width = stfb->Base.Width; + *height = stfb->Base.Height; +} diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c new file mode 100644 index 0000000000..a15faf732c --- /dev/null +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -0,0 +1,204 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "main/imports.h" +#include "main/mipmap.h" +#include "main/teximage.h" +#include "main/texformat.h" + +#include "shader/prog_instruction.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/u_gen_mipmap.h" + +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_context.h" + +#include "st_context.h" +#include "st_draw.h" +#include "st_gen_mipmap.h" +#include "st_program.h" +#include "st_texture.h" +#include "st_cb_texture.h" + + +/** + * one-time init for generate mipmap + * XXX Note: there may be other times we need no-op/simple state like this. + * In that case, some code refactoring would be good. + */ +void +st_init_generate_mipmap(struct st_context *st) +{ + st->gen_mipmap = util_create_gen_mipmap(st->pipe, st->cso_context); +} + + +void +st_destroy_generate_mipmap(struct st_context *st) +{ + util_destroy_gen_mipmap(st->gen_mipmap); + st->gen_mipmap = NULL; +} + + +/** + * Generate mipmap levels using hardware rendering. + * \return TRUE if successful, FALSE if not possible + */ +static boolean +st_render_mipmap(struct st_context *st, + GLenum target, + struct pipe_texture *pt, + uint baseLevel, uint lastLevel) +{ + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + const uint face = _mesa_tex_target_to_face(target); + + assert(target != GL_TEXTURE_3D); /* not done yet */ + + /* check if we can render in the texture's format */ + if (!screen->is_format_supported(screen, pt->format, target, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { + return FALSE; + } + + util_gen_mipmap(st->gen_mipmap, pt, face, baseLevel, lastLevel, + PIPE_TEX_FILTER_LINEAR); + + return TRUE; +} + + +static void +fallback_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_texture *pt = st_get_texobj_texture(texObj); + const uint baseLevel = texObj->BaseLevel; + const uint lastLevel = pt->last_level; + const uint face = _mesa_tex_target_to_face(target), zslice = 0; + uint dstLevel; + GLenum datatype; + GLuint comps; + + assert(target != GL_TEXTURE_3D); /* not done yet */ + + _mesa_format_to_type_and_comps(texObj->Image[face][baseLevel]->TexFormat, + &datatype, &comps); + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + struct pipe_surface *srcSurf, *dstSurf; + const ubyte *srcData; + ubyte *dstData; + + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); + + srcData = (ubyte *) pipe_buffer_map(pipe->screen, srcSurf->buffer, + PIPE_BUFFER_USAGE_CPU_READ) + + srcSurf->offset; + dstData = (ubyte *) pipe_buffer_map(pipe->screen, dstSurf->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE) + + dstSurf->offset; + + _mesa_generate_mipmap_level(target, datatype, comps, + 0 /*border*/, + pt->width[srcLevel], pt->height[srcLevel], pt->depth[srcLevel], + srcData, + srcSurf->stride, /* stride in bytes */ + pt->width[dstLevel], pt->height[dstLevel], pt->depth[dstLevel], + dstData, + dstSurf->stride); /* stride in bytes */ + + pipe_buffer_unmap(pipe->screen, srcSurf->buffer); + pipe_buffer_unmap(pipe->screen, dstSurf->buffer); + + pipe_surface_reference(&srcSurf, NULL); + pipe_surface_reference(&dstSurf, NULL); + } +} + + +void +st_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj) +{ + struct st_context *st = ctx->st; + struct pipe_texture *pt = st_get_texobj_texture(texObj); + const uint baseLevel = texObj->BaseLevel; + const uint lastLevel = pt->last_level; + uint dstLevel; + + if (!st_render_mipmap(st, target, pt, baseLevel, lastLevel)) { + fallback_generate_mipmap(ctx, target, texObj); + } + + /* Fill in the Mesa gl_texture_image fields */ + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + const struct gl_texture_image *srcImage + = _mesa_get_tex_image(ctx, texObj, target, srcLevel); + struct gl_texture_image *dstImage; + struct st_texture_image *stImage; + uint dstWidth = pt->width[dstLevel]; + uint dstHeight = pt->height[dstLevel]; + uint dstDepth = pt->depth[dstLevel]; + uint border = srcImage->Border; + + dstImage = _mesa_get_tex_image(ctx, texObj, target, dstLevel); + if (!dstImage) { + _mesa_error(ctx, GL_OUT_OF_MEMORY, "generating mipmaps"); + return; + } + + if (dstImage->ImageOffsets) + _mesa_free(dstImage->ImageOffsets); + + /* Free old image data */ + if (dstImage->Data) + ctx->Driver.FreeTexImageData(ctx, dstImage); + + /* initialize new image */ + _mesa_init_teximage_fields(ctx, target, dstImage, dstWidth, dstHeight, + dstDepth, border, srcImage->InternalFormat); + + dstImage->TexFormat = srcImage->TexFormat; + + stImage = (struct st_texture_image *) dstImage; + pipe_texture_reference(&stImage->pt, pt); + } +} diff --git a/src/mesa/state_tracker/st_gen_mipmap.h b/src/mesa/state_tracker/st_gen_mipmap.h new file mode 100644 index 0000000000..00fbae9302 --- /dev/null +++ b/src/mesa/state_tracker/st_gen_mipmap.h @@ -0,0 +1,46 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_GEN_MIPMAP_H +#define ST_GEN_MIPMAP_H + + +extern void +st_init_generate_mipmap(struct st_context *st); + + +extern void +st_destroy_generate_mipmap(struct st_context *st); + + +extern void +st_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *texObj); + + +#endif /* ST_GEN_MIPMAP_H */ diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c new file mode 100644 index 0000000000..50e638df46 --- /dev/null +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -0,0 +1,1002 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * \author + * Michal Krol + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_sanity.h" +#include "st_mesa_to_tgsi.h" +#include "shader/prog_instruction.h" +#include "shader/prog_parameter.h" +#include "shader/prog_print.h" +#include "pipe/p_debug.h" + +/* + * Map mesa register file to TGSI register file. + */ +static GLuint +map_register_file( + enum register_file file, + GLuint index, + const GLuint immediateMapping[], + GLboolean indirectAccess ) +{ + switch( file ) { + case PROGRAM_UNDEFINED: + return TGSI_FILE_NULL; + case PROGRAM_TEMPORARY: + return TGSI_FILE_TEMPORARY; + /*case PROGRAM_LOCAL_PARAM:*/ + /*case PROGRAM_ENV_PARAM:*/ + + /* Because of the longstanding problem with mesa arb shaders + * where constants, immediates and state variables are all + * bundled together as PROGRAM_STATE_VAR, we can't tell from the + * mesa register file whether this is a CONSTANT or an + * IMMEDIATE, hence we need all the other information. + */ + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_UNIFORM: + if (!indirectAccess && immediateMapping && immediateMapping[index] != ~0) + return TGSI_FILE_IMMEDIATE; + else + return TGSI_FILE_CONSTANT; + case PROGRAM_CONSTANT: + if (indirectAccess) + return TGSI_FILE_CONSTANT; + assert(immediateMapping[index] != ~0); + return TGSI_FILE_IMMEDIATE; + case PROGRAM_INPUT: + return TGSI_FILE_INPUT; + case PROGRAM_OUTPUT: + return TGSI_FILE_OUTPUT; + case PROGRAM_ADDRESS: + return TGSI_FILE_ADDRESS; + default: + assert( 0 ); + return TGSI_FILE_NULL; + } +} + +/** + * Map mesa register file index to TGSI index. + * Take special care when processing input and output indices. + * \param file one of TGSI_FILE_x + * \param index the mesa register file index + * \param inputMapping maps Mesa input indexes to TGSI input indexes + * \param outputMapping maps Mesa output indexes to TGSI output indexes + */ +static GLuint +map_register_file_index( + GLuint file, + GLuint index, + const GLuint inputMapping[], + const GLuint outputMapping[], + const GLuint immediateMapping[], + GLboolean indirectAccess ) +{ + switch( file ) { + case TGSI_FILE_INPUT: + /* inputs are mapped according to the user-defined map */ + return inputMapping[index]; + + case TGSI_FILE_OUTPUT: + return outputMapping[index]; + + case TGSI_FILE_IMMEDIATE: + if (indirectAccess) + return index; + assert(immediateMapping[index] != ~0); + return immediateMapping[index]; + + default: + return index; + } +} + +/* + * Map mesa texture target to TGSI texture target. + */ +static GLuint +map_texture_target( + GLuint textarget ) +{ + switch( textarget ) { + case TEXTURE_1D_INDEX: + return TGSI_TEXTURE_1D; + case TEXTURE_2D_INDEX: + return TGSI_TEXTURE_2D; + case TEXTURE_3D_INDEX: + return TGSI_TEXTURE_3D; + case TEXTURE_CUBE_INDEX: + return TGSI_TEXTURE_CUBE; + case TEXTURE_RECT_INDEX: + return TGSI_TEXTURE_RECT; + default: + assert( 0 ); + } + + return TGSI_TEXTURE_1D; +} + +static GLuint +convert_sat( + GLuint sat ) +{ + switch( sat ) { + case SATURATE_OFF: + return TGSI_SAT_NONE; + case SATURATE_ZERO_ONE: + return TGSI_SAT_ZERO_ONE; + case SATURATE_PLUS_MINUS_ONE: + return TGSI_SAT_MINUS_PLUS_ONE; + default: + assert( 0 ); + return TGSI_SAT_NONE; + } +} + +static GLuint +convert_writemask( + GLuint writemask ) +{ + assert( WRITEMASK_X == TGSI_WRITEMASK_X ); + assert( WRITEMASK_Y == TGSI_WRITEMASK_Y ); + assert( WRITEMASK_Z == TGSI_WRITEMASK_Z ); + assert( WRITEMASK_W == TGSI_WRITEMASK_W ); + assert( (writemask & ~TGSI_WRITEMASK_XYZW) == 0 ); + + return writemask; +} + +static struct tgsi_full_immediate +make_immediate(const float *value, uint size) +{ + struct tgsi_full_immediate imm; + + imm = tgsi_default_full_immediate(); + imm.Immediate.Size += size; + imm.Immediate.DataType = TGSI_IMM_FLOAT32; + imm.u.Pointer = value; + return imm; +} + +static void +compile_instruction( + const struct prog_instruction *inst, + struct tgsi_full_instruction *fullinst, + const GLuint inputMapping[], + const GLuint outputMapping[], + const GLuint immediateMapping[], + GLboolean indirectAccess, + GLuint preamble_size, + GLuint processor, + GLboolean *insideSubroutine) +{ + GLuint i; + struct tgsi_full_dst_register *fulldst; + struct tgsi_full_src_register *fullsrc; + + *fullinst = tgsi_default_full_instruction(); + + fullinst->Instruction.Saturate = convert_sat( inst->SaturateMode ); + fullinst->Instruction.NumDstRegs = _mesa_num_inst_dst_regs( inst->Opcode ); + fullinst->Instruction.NumSrcRegs = _mesa_num_inst_src_regs( inst->Opcode ); + + fulldst = &fullinst->FullDstRegisters[0]; + fulldst->DstRegister.File = map_register_file( inst->DstReg.File, 0, NULL, GL_FALSE ); + fulldst->DstRegister.Index = map_register_file_index( + fulldst->DstRegister.File, + inst->DstReg.Index, + inputMapping, + outputMapping, + NULL, + GL_FALSE ); + fulldst->DstRegister.WriteMask = convert_writemask( inst->DstReg.WriteMask ); + + for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { + GLuint j; + + fullsrc = &fullinst->FullSrcRegisters[i]; + fullsrc->SrcRegister.File = map_register_file( + inst->SrcReg[i].File, + inst->SrcReg[i].Index, + immediateMapping, + indirectAccess ); + fullsrc->SrcRegister.Index = map_register_file_index( + fullsrc->SrcRegister.File, + inst->SrcReg[i].Index, + inputMapping, + outputMapping, + immediateMapping, + indirectAccess ); + + + /* swizzle (ext swizzle also depends on negation) */ + { + GLuint swz[4]; + GLboolean extended = (inst->SrcReg[i].NegateBase != NEGATE_NONE && + inst->SrcReg[i].NegateBase != NEGATE_XYZW); + for( j = 0; j < 4; j++ ) { + swz[j] = GET_SWZ( inst->SrcReg[i].Swizzle, j ); + if (swz[j] > SWIZZLE_W) + extended = GL_TRUE; + } + if (extended) { + for (j = 0; j < 4; j++) { + tgsi_util_set_src_register_extswizzle(&fullsrc->SrcRegisterExtSwz, + swz[j], j); + } + } + else { + for (j = 0; j < 4; j++) { + tgsi_util_set_src_register_swizzle(&fullsrc->SrcRegister, + swz[j], j); + } + } + } + + if( inst->SrcReg[i].NegateBase == NEGATE_XYZW ) { + fullsrc->SrcRegister.Negate = 1; + } + else if( inst->SrcReg[i].NegateBase != NEGATE_NONE ) { + if( inst->SrcReg[i].NegateBase & NEGATE_X ) { + fullsrc->SrcRegisterExtSwz.NegateX = 1; + } + if( inst->SrcReg[i].NegateBase & NEGATE_Y ) { + fullsrc->SrcRegisterExtSwz.NegateY = 1; + } + if( inst->SrcReg[i].NegateBase & NEGATE_Z ) { + fullsrc->SrcRegisterExtSwz.NegateZ = 1; + } + if( inst->SrcReg[i].NegateBase & NEGATE_W ) { + fullsrc->SrcRegisterExtSwz.NegateW = 1; + } + } + + if( inst->SrcReg[i].Abs ) { + fullsrc->SrcRegisterExtMod.Absolute = 1; + } + + if( inst->SrcReg[i].NegateAbs ) { + fullsrc->SrcRegisterExtMod.Negate = 1; + } + + if( inst->SrcReg[i].RelAddr ) { + fullsrc->SrcRegister.Indirect = 1; + + fullsrc->SrcRegisterInd.File = TGSI_FILE_ADDRESS; + fullsrc->SrcRegisterInd.Index = 0; + } + } + + switch( inst->Opcode ) { + case OPCODE_ARL: + fullinst->Instruction.Opcode = TGSI_OPCODE_ARL; + break; + case OPCODE_ABS: + fullinst->Instruction.Opcode = TGSI_OPCODE_ABS; + break; + case OPCODE_ADD: + fullinst->Instruction.Opcode = TGSI_OPCODE_ADD; + break; + case OPCODE_BGNLOOP: + fullinst->Instruction.Opcode = TGSI_OPCODE_BGNLOOP2; + fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; + break; + case OPCODE_BGNSUB: + fullinst->Instruction.Opcode = TGSI_OPCODE_BGNSUB; + *insideSubroutine = GL_TRUE; + break; + case OPCODE_BRA: + fullinst->Instruction.Opcode = TGSI_OPCODE_BRA; + break; + case OPCODE_BRK: + fullinst->Instruction.Opcode = TGSI_OPCODE_BRK; + break; + case OPCODE_CAL: + fullinst->Instruction.Opcode = TGSI_OPCODE_CAL; + fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; + break; + case OPCODE_CMP: + fullinst->Instruction.Opcode = TGSI_OPCODE_CMP; + break; + case OPCODE_CONT: + fullinst->Instruction.Opcode = TGSI_OPCODE_CONT; + break; + case OPCODE_COS: + fullinst->Instruction.Opcode = TGSI_OPCODE_COS; + break; + case OPCODE_DDX: + fullinst->Instruction.Opcode = TGSI_OPCODE_DDX; + break; + case OPCODE_DDY: + fullinst->Instruction.Opcode = TGSI_OPCODE_DDY; + break; + case OPCODE_DP3: + fullinst->Instruction.Opcode = TGSI_OPCODE_DP3; + break; + case OPCODE_DP4: + fullinst->Instruction.Opcode = TGSI_OPCODE_DP4; + break; + case OPCODE_DPH: + fullinst->Instruction.Opcode = TGSI_OPCODE_DPH; + break; + case OPCODE_DST: + fullinst->Instruction.Opcode = TGSI_OPCODE_DST; + break; + case OPCODE_ELSE: + fullinst->Instruction.Opcode = TGSI_OPCODE_ELSE; + fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; + break; + case OPCODE_ENDIF: + fullinst->Instruction.Opcode = TGSI_OPCODE_ENDIF; + break; + case OPCODE_ENDLOOP: + fullinst->Instruction.Opcode = TGSI_OPCODE_ENDLOOP2; + fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; + break; + case OPCODE_ENDSUB: + fullinst->Instruction.Opcode = TGSI_OPCODE_ENDSUB; + *insideSubroutine = GL_FALSE; + break; + case OPCODE_EX2: + fullinst->Instruction.Opcode = TGSI_OPCODE_EX2; + break; + case OPCODE_EXP: + fullinst->Instruction.Opcode = TGSI_OPCODE_EXP; + break; + case OPCODE_FLR: + fullinst->Instruction.Opcode = TGSI_OPCODE_FLR; + break; + case OPCODE_FRC: + fullinst->Instruction.Opcode = TGSI_OPCODE_FRC; + break; + case OPCODE_IF: + fullinst->Instruction.Opcode = TGSI_OPCODE_IF; + fullinst->InstructionExtLabel.Label = inst->BranchTarget + preamble_size; + break; + case OPCODE_INT: + fullinst->Instruction.Opcode = TGSI_OPCODE_INT; + break; + case OPCODE_KIL: + /* conditional */ + fullinst->Instruction.Opcode = TGSI_OPCODE_KIL; + break; + case OPCODE_KIL_NV: + /* predicated */ + assert(inst->DstReg.CondMask == COND_TR); + fullinst->Instruction.Opcode = TGSI_OPCODE_KILP; + break; + case OPCODE_LG2: + fullinst->Instruction.Opcode = TGSI_OPCODE_LG2; + break; + case OPCODE_LOG: + fullinst->Instruction.Opcode = TGSI_OPCODE_LOG; + break; + case OPCODE_LIT: + fullinst->Instruction.Opcode = TGSI_OPCODE_LIT; + break; + case OPCODE_LRP: + fullinst->Instruction.Opcode = TGSI_OPCODE_LRP; + break; + case OPCODE_MAD: + fullinst->Instruction.Opcode = TGSI_OPCODE_MAD; + break; + case OPCODE_MAX: + fullinst->Instruction.Opcode = TGSI_OPCODE_MAX; + break; + case OPCODE_MIN: + fullinst->Instruction.Opcode = TGSI_OPCODE_MIN; + break; + case OPCODE_MOV: + fullinst->Instruction.Opcode = TGSI_OPCODE_MOV; + break; + case OPCODE_MUL: + fullinst->Instruction.Opcode = TGSI_OPCODE_MUL; + break; + case OPCODE_NOISE1: + fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE1; + break; + case OPCODE_NOISE2: + fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE2; + break; + case OPCODE_NOISE3: + fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE3; + break; + case OPCODE_NOISE4: + fullinst->Instruction.Opcode = TGSI_OPCODE_NOISE4; + break; + case OPCODE_NOP: + fullinst->Instruction.Opcode = TGSI_OPCODE_NOP; + break; + case OPCODE_POW: + fullinst->Instruction.Opcode = TGSI_OPCODE_POW; + break; + case OPCODE_RCP: + fullinst->Instruction.Opcode = TGSI_OPCODE_RCP; + break; + case OPCODE_RET: + /* If RET is used inside main (not a real subroutine) we may want + * to execute END instead of RET. TBD... + */ + if (1 /* *insideSubroutine */) { + fullinst->Instruction.Opcode = TGSI_OPCODE_RET; + } + else { + /* inside main() pseudo-function */ + fullinst->Instruction.Opcode = TGSI_OPCODE_END; + } + break; + case OPCODE_RSQ: + fullinst->Instruction.Opcode = TGSI_OPCODE_RSQ; + tgsi_util_set_full_src_register_sign_mode( + &fullinst->FullSrcRegisters[0], + TGSI_UTIL_SIGN_CLEAR ); + break; + case OPCODE_SCS: + fullinst->Instruction.Opcode = TGSI_OPCODE_SCS; + fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XY; + break; + case OPCODE_SEQ: + fullinst->Instruction.Opcode = TGSI_OPCODE_SEQ; + break; + case OPCODE_SGE: + fullinst->Instruction.Opcode = TGSI_OPCODE_SGE; + break; + case OPCODE_SGT: + fullinst->Instruction.Opcode = TGSI_OPCODE_SGT; + break; + case OPCODE_SIN: + fullinst->Instruction.Opcode = TGSI_OPCODE_SIN; + break; + case OPCODE_SLE: + fullinst->Instruction.Opcode = TGSI_OPCODE_SLE; + break; + case OPCODE_SLT: + fullinst->Instruction.Opcode = TGSI_OPCODE_SLT; + break; + case OPCODE_SNE: + fullinst->Instruction.Opcode = TGSI_OPCODE_SNE; + break; + case OPCODE_SUB: + fullinst->Instruction.Opcode = TGSI_OPCODE_SUB; + break; + case OPCODE_SWZ: + fullinst->Instruction.Opcode = TGSI_OPCODE_SWZ; + break; + case OPCODE_TEX: + /* ordinary texture lookup */ + fullinst->Instruction.Opcode = TGSI_OPCODE_TEX; + fullinst->Instruction.NumSrcRegs = 2; + fullinst->InstructionExtTexture.Texture = map_texture_target( inst->TexSrcTarget ); + fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit; + break; + case OPCODE_TXB: + /* texture lookup with LOD bias */ + fullinst->Instruction.Opcode = TGSI_OPCODE_TXB; + fullinst->Instruction.NumSrcRegs = 2; + fullinst->InstructionExtTexture.Texture = map_texture_target( inst->TexSrcTarget ); + fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit; + break; + case OPCODE_TXD: + /* texture lookup with explicit partial derivatives */ + fullinst->Instruction.Opcode = TGSI_OPCODE_TXD; + fullinst->Instruction.NumSrcRegs = 4; + fullinst->InstructionExtTexture.Texture = map_texture_target( inst->TexSrcTarget ); + /* src[0] = coord, src[1] = d[strq]/dx, src[2] = d[strq]/dy */ + fullinst->FullSrcRegisters[3].SrcRegister.File = TGSI_FILE_SAMPLER; + fullinst->FullSrcRegisters[3].SrcRegister.Index = inst->TexSrcUnit; + break; + case OPCODE_TXL: + /* texture lookup with explicit LOD */ + fullinst->Instruction.Opcode = TGSI_OPCODE_TXL; + fullinst->Instruction.NumSrcRegs = 2; + fullinst->InstructionExtTexture.Texture = map_texture_target( inst->TexSrcTarget ); + fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit; + break; + case OPCODE_TXP: + /* texture lookup with divide by Q component */ + /* convert to TEX w/ special flag for division */ + fullinst->Instruction.Opcode = TGSI_OPCODE_TXP; + fullinst->Instruction.NumSrcRegs = 2; + fullinst->InstructionExtTexture.Texture = map_texture_target( inst->TexSrcTarget ); + fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit; + break; + case OPCODE_XPD: + fullinst->Instruction.Opcode = TGSI_OPCODE_XPD; + fulldst->DstRegister.WriteMask &= TGSI_WRITEMASK_XYZ; + break; + case OPCODE_END: + fullinst->Instruction.Opcode = TGSI_OPCODE_END; + break; + default: + assert( 0 ); + } +} + +/** + * \param usage_mask bitfield of TGSI_WRITEMASK_{XYZW} tokens + */ +static struct tgsi_full_declaration +make_input_decl( + GLuint index, + GLboolean interpolate_info, + GLuint interpolate, + GLuint usage_mask, + GLboolean semantic_info, + GLuint semantic_name, + GLbitfield semantic_index ) +{ + struct tgsi_full_declaration decl; + + assert(semantic_name < TGSI_SEMANTIC_COUNT); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.UsageMask = usage_mask; + decl.Declaration.Semantic = semantic_info; + decl.DeclarationRange.First = index; + decl.DeclarationRange.Last = index; + if (semantic_info) { + decl.Semantic.SemanticName = semantic_name; + decl.Semantic.SemanticIndex = semantic_index; + } + if (interpolate_info) { + decl.Declaration.Interpolate = interpolate; + } + + return decl; +} + +/** + * \param usage_mask bitfield of TGSI_WRITEMASK_{XYZW} tokens + */ +static struct tgsi_full_declaration +make_output_decl( + GLuint index, + GLuint semantic_name, + GLuint semantic_index, + GLbitfield usage_mask ) +{ + struct tgsi_full_declaration decl; + + assert(semantic_name < TGSI_SEMANTIC_COUNT); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.UsageMask = usage_mask; + decl.Declaration.Semantic = 1; + decl.DeclarationRange.First = index; + decl.DeclarationRange.Last = index; + decl.Semantic.SemanticName = semantic_name; + decl.Semantic.SemanticIndex = semantic_index; + + return decl; +} + + +static struct tgsi_full_declaration +make_temp_decl( + GLuint start_index, + GLuint end_index ) +{ + struct tgsi_full_declaration decl; + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.DeclarationRange.First = start_index; + decl.DeclarationRange.Last = end_index; + return decl; +} + +static struct tgsi_full_declaration +make_addr_decl( + GLuint start_index, + GLuint end_index ) +{ + struct tgsi_full_declaration decl; + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_ADDRESS; + decl.DeclarationRange.First = start_index; + decl.DeclarationRange.Last = end_index; + return decl; +} + +static struct tgsi_full_declaration +make_sampler_decl(GLuint index) +{ + struct tgsi_full_declaration decl; + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.DeclarationRange.First = index; + decl.DeclarationRange.Last = index; + return decl; +} + +/** Reference into a constant buffer */ +static struct tgsi_full_declaration +make_constant_decl(GLuint first, GLuint last) +{ + struct tgsi_full_declaration decl; + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + return decl; +} + + + +/** + * Find the temporaries which are used in the given program. + */ +static void +find_temporaries(const struct gl_program *program, + GLboolean tempsUsed[MAX_PROGRAM_TEMPS]) +{ + GLuint i, j; + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) + tempsUsed[i] = GL_FALSE; + + for (i = 0; i < program->NumInstructions; i++) { + const struct prog_instruction *inst = program->Instructions + i; + const GLuint n = _mesa_num_inst_src_regs( inst->Opcode ); + for (j = 0; j < n; j++) { + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) + tempsUsed[inst->SrcReg[j].Index] = GL_TRUE; + if (inst->DstReg.File == PROGRAM_TEMPORARY) + tempsUsed[inst->DstReg.Index] = GL_TRUE; + } + } +} + + + + +/** + * Translate Mesa program to TGSI format. + * \param program the program to translate + * \param numInputs number of input registers used + * \param inputMapping maps Mesa fragment program inputs to TGSI generic + * input indexes + * \param inputSemanticName the TGSI_SEMANTIC flag for each input + * \param inputSemanticIndex the semantic index (ex: which texcoord) for each input + * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input + + * \param numOutputs number of output registers used + * \param outputMapping maps Mesa fragment program outputs to TGSI + * generic outputs + * \param outputSemanticName the TGSI_SEMANTIC flag for each output + * \param outputSemanticIndex the semantic index (ex: which texcoord) for each output + * \param tokens array to store translated tokens in + * \param maxTokens size of the tokens array + * + * \return number of tokens placed in 'tokens' buffer, or zero if error + */ +GLuint +st_translate_mesa_program( + uint procType, + const struct gl_program *program, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[], + struct tgsi_token *tokens, + GLuint maxTokens ) +{ + GLuint i; + GLuint ti; /* token index */ + struct tgsi_header *header; + struct tgsi_processor *processor; + struct tgsi_full_instruction fullinst; + GLuint preamble_size = 0; + GLuint immediates[1000]; + GLuint numImmediates = 0; + GLboolean insideSubroutine = GL_FALSE; + GLboolean indirectAccess = GL_FALSE; + + assert(procType == TGSI_PROCESSOR_FRAGMENT || + procType == TGSI_PROCESSOR_VERTEX); + + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + + processor = (struct tgsi_processor *) &tokens[2]; + *processor = tgsi_build_processor( procType, header ); + + ti = 3; + + /* + * Declare input attributes. + */ + if (procType == TGSI_PROCESSOR_FRAGMENT) { + for (i = 0; i < numInputs; i++) { + struct tgsi_full_declaration fulldecl; + fulldecl = make_input_decl(i, + GL_TRUE, interpMode[i], + TGSI_WRITEMASK_XYZW, + GL_TRUE, inputSemanticName[i], + inputSemanticIndex[i]); + ti += tgsi_build_full_declaration(&fulldecl, + &tokens[ti], + header, + maxTokens - ti ); + } + } + else { + /* vertex prog */ + /* XXX: this could probaby be merged with the clause above. + * the only difference is the semantic tags. + */ + for (i = 0; i < numInputs; i++) { + struct tgsi_full_declaration fulldecl; + fulldecl = make_input_decl(i, + GL_FALSE, 0, + TGSI_WRITEMASK_XYZW, + GL_FALSE, 0, 0); + ti += tgsi_build_full_declaration(&fulldecl, + &tokens[ti], + header, + maxTokens - ti ); + } + } + + /* + * Declare output attributes. + */ + if (procType == TGSI_PROCESSOR_FRAGMENT) { + for (i = 0; i < numOutputs; i++) { + struct tgsi_full_declaration fulldecl; + switch (outputSemanticName[i]) { + case TGSI_SEMANTIC_POSITION: + fulldecl = make_output_decl(i, + TGSI_SEMANTIC_POSITION, /* Z / Depth */ + outputSemanticIndex[i], + TGSI_WRITEMASK_Z ); + break; + case TGSI_SEMANTIC_COLOR: + fulldecl = make_output_decl(i, + TGSI_SEMANTIC_COLOR, + outputSemanticIndex[i], + TGSI_WRITEMASK_XYZW ); + break; + default: + assert(0); + return 0; + } + ti += tgsi_build_full_declaration(&fulldecl, + &tokens[ti], + header, + maxTokens - ti ); + } + } + else { + /* vertex prog */ + for (i = 0; i < numOutputs; i++) { + struct tgsi_full_declaration fulldecl; + fulldecl = make_output_decl(i, + outputSemanticName[i], + outputSemanticIndex[i], + TGSI_WRITEMASK_XYZW ); + ti += tgsi_build_full_declaration(&fulldecl, + &tokens[ti], + header, + maxTokens - ti ); + } + } + + /* temporary decls */ + { + GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1]; + GLboolean inside_range = GL_FALSE; + GLuint start_range = 0; + + find_temporaries(program, tempsUsed); + tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE; + for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) { + if (tempsUsed[i] && !inside_range) { + inside_range = GL_TRUE; + start_range = i; + } + else if (!tempsUsed[i] && inside_range) { + struct tgsi_full_declaration fulldecl; + + inside_range = GL_FALSE; + fulldecl = make_temp_decl( start_range, i - 1 ); + ti += tgsi_build_full_declaration( + &fulldecl, + &tokens[ti], + header, + maxTokens - ti ); + } + } + } + + /* Declare address register. + */ + if (program->NumAddressRegs > 0) { + struct tgsi_full_declaration fulldecl; + + assert( program->NumAddressRegs == 1 ); + + fulldecl = make_addr_decl( 0, 0 ); + ti += tgsi_build_full_declaration( + &fulldecl, + &tokens[ti], + header, + maxTokens - ti ); + + indirectAccess = GL_TRUE; + } + + /* immediates/literals */ + memset(immediates, ~0, sizeof(immediates)); + + /* Emit immediates only when there is no address register in use. + * FIXME: Be smarter and recognize param arrays -- indirect addressing is + * only valid within the referenced array. + */ + if (program->Parameters && !indirectAccess) { + for (i = 0; i < program->Parameters->NumParameters; i++) { + if (program->Parameters->Parameters[i].Type == PROGRAM_CONSTANT) { + struct tgsi_full_immediate fullimm; + + fullimm = make_immediate( program->Parameters->ParameterValues[i], 4 ); + ti += tgsi_build_full_immediate( + &fullimm, + &tokens[ti], + header, + maxTokens - ti ); + immediates[i] = numImmediates; + numImmediates++; + } + } + } + + /* constant buffer refs */ + if (program->Parameters) { + GLint start = -1, end = -1; + + for (i = 0; i < program->Parameters->NumParameters; i++) { + GLboolean emit = (i == program->Parameters->NumParameters - 1); + GLboolean matches; + + switch (program->Parameters->Parameters[i].Type) { + case PROGRAM_ENV_PARAM: + case PROGRAM_STATE_VAR: + case PROGRAM_NAMED_PARAM: + case PROGRAM_UNIFORM: + matches = GL_TRUE; + break; + case PROGRAM_CONSTANT: + matches = indirectAccess; + break; + default: + matches = GL_FALSE; + } + + if (matches) { + if (start == -1) { + /* begin a sequence */ + start = i; + end = i; + } + else { + /* continue sequence */ + end = i; + } + } + else { + if (start != -1) { + /* end of sequence */ + emit = GL_TRUE; + } + } + + if (emit && start >= 0) { + struct tgsi_full_declaration fulldecl; + + fulldecl = make_constant_decl( start, end ); + ti += tgsi_build_full_declaration( + &fulldecl, + &tokens[ti], + header, + maxTokens - ti ); + start = end = -1; + } + } + } + + /* texture samplers */ + for (i = 0; i < 8; i++) { + if (program->SamplersUsed & (1 << i)) { + struct tgsi_full_declaration fulldecl; + + fulldecl = make_sampler_decl( i ); + ti += tgsi_build_full_declaration( + &fulldecl, + &tokens[ti], + header, + maxTokens - ti ); + } + } + + for (i = 0; i < program->NumInstructions; i++) { + compile_instruction( + &program->Instructions[i], + &fullinst, + inputMapping, + outputMapping, + immediates, + indirectAccess, + preamble_size, + procType, + &insideSubroutine ); + + ti += tgsi_build_full_instruction( + &fullinst, + &tokens[ti], + header, + maxTokens - ti ); + } + +#if DEBUG + if(!tgsi_sanity_check(tokens)) { + debug_printf("Due to sanity check failure(s) above the following shader program is invalid:\n"); + debug_printf("\nOriginal program:\n%s", program->String); + debug_printf("\nMesa program:\n"); + _mesa_print_program(program); + debug_printf("\nTGSI program:\n"); + tgsi_dump(tokens, 0); + assert(0); + } +#endif + + return ti; +} diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h new file mode 100644 index 0000000000..77c74644b8 --- /dev/null +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h @@ -0,0 +1,64 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef ST_MESA_TO_TGSI_H +#define ST_MESA_TO_TGSI_H + +#include "main/mtypes.h" + + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_token; +struct gl_program; + +GLuint +st_translate_mesa_program( + uint procType, + const struct gl_program *program, + GLuint numInputs, + const GLuint inputMapping[], + const ubyte inputSemanticName[], + const ubyte inputSemanticIndex[], + const GLuint interpMode[], + GLuint numOutputs, + const GLuint outputMapping[], + const ubyte outputSemanticName[], + const ubyte outputSemanticIndex[], + struct tgsi_token *tokens, + GLuint maxTokens ); + + +#if defined __cplusplus +} /* extern "C" */ +#endif + +#endif /* ST_MESA_TO_TGSI_H */ + diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c new file mode 100644 index 0000000000..55b52c3745 --- /dev/null +++ b/src/mesa/state_tracker/st_program.c @@ -0,0 +1,514 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + + +#include "main/imports.h" +#include "main/mtypes.h" +#include "shader/prog_print.h" +#include "shader/programopt.h" + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw/draw_context.h" +#include "tgsi/tgsi_dump.h" + +#include "st_context.h" +#include "st_atom.h" +#include "st_program.h" +#include "st_mesa_to_tgsi.h" +#include "cso_cache/cso_context.h" + + +#define ST_MAX_SHADER_TOKENS 4096 + + +#define TGSI_DEBUG 0 + + +/** XXX we should use the version of this from u_memory.h but including + * that header causes symbol collisions. + */ +static INLINE void * +mem_dup(const void *src, uint size) +{ + void *dup = MALLOC(size); + if (dup) + memcpy(dup, src, size); + return dup; +} + + + +/** + * Translate a Mesa vertex shader into a TGSI shader. + * \param outputMapping to map vertex program output registers (VERT_RESULT_x) + * to TGSI output slots + * \param tokensOut destination for TGSI tokens + * \return pointer to cached pipe_shader object. + */ +void +st_translate_vertex_program(struct st_context *st, + struct st_vertex_program *stvp, + const GLuint outputMapping[], + const ubyte *outputSemanticName, + const ubyte *outputSemanticIndex) +{ + struct pipe_context *pipe = st->pipe; + struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; + GLuint defaultOutputMapping[VERT_RESULT_MAX]; + struct pipe_shader_state vs; + GLuint attr, i; + GLuint num_generic = 0; + GLuint num_tokens; + + ubyte vs_input_semantic_name[PIPE_MAX_SHADER_INPUTS]; + ubyte vs_input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + uint vs_num_inputs = 0; + + ubyte vs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; + ubyte vs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + uint vs_num_outputs = 0; + + memset(&vs, 0, sizeof(vs)); + + if (stvp->Base.IsPositionInvariant) + _mesa_insert_mvp_code(st->ctx, &stvp->Base); + + /* + * Determine number of inputs, the mappings between VERT_ATTRIB_x + * and TGSI generic input indexes, plus input attrib semantic info. + */ + for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) { + if (stvp->Base.Base.InputsRead & (1 << attr)) { + const GLuint slot = vs_num_inputs; + + vs_num_inputs++; + + stvp->input_to_index[attr] = slot; + stvp->index_to_input[slot] = attr; + + switch (attr) { + case VERT_ATTRIB_POS: + vs_input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; + vs_input_semantic_index[slot] = 0; + break; + case VERT_ATTRIB_WEIGHT: + /* fall-through */ + case VERT_ATTRIB_NORMAL: + /* just label as a generic */ + vs_input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + vs_input_semantic_index[slot] = 0; + break; + case VERT_ATTRIB_COLOR0: + vs_input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + vs_input_semantic_index[slot] = 0; + break; + case VERT_ATTRIB_COLOR1: + vs_input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + vs_input_semantic_index[slot] = 1; + break; + case VERT_ATTRIB_FOG: + vs_input_semantic_name[slot] = TGSI_SEMANTIC_FOG; + vs_input_semantic_index[slot] = 0; + break; + case VERT_ATTRIB_POINT_SIZE: + vs_input_semantic_name[slot] = TGSI_SEMANTIC_PSIZE; + vs_input_semantic_index[slot] = 0; + break; + case VERT_ATTRIB_TEX0: + case VERT_ATTRIB_TEX1: + case VERT_ATTRIB_TEX2: + case VERT_ATTRIB_TEX3: + case VERT_ATTRIB_TEX4: + case VERT_ATTRIB_TEX5: + case VERT_ATTRIB_TEX6: + case VERT_ATTRIB_TEX7: + vs_input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + vs_input_semantic_index[slot] = num_generic++; + break; + case VERT_ATTRIB_GENERIC0: + case VERT_ATTRIB_GENERIC1: + case VERT_ATTRIB_GENERIC2: + case VERT_ATTRIB_GENERIC3: + case VERT_ATTRIB_GENERIC4: + case VERT_ATTRIB_GENERIC5: + case VERT_ATTRIB_GENERIC6: + case VERT_ATTRIB_GENERIC7: + assert(attr < VERT_ATTRIB_MAX); + vs_input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + vs_input_semantic_index[slot] = num_generic++; + break; + default: + assert(0); + } + } + } + +#if 0 + if (outputMapping && outputSemanticName) { + printf("VERT_RESULT written out_slot semantic_name semantic_index\n"); + for (attr = 0; attr < VERT_RESULT_MAX; attr++) { + printf(" %-2d %c %3d %2d %2d\n", + attr, + ((stvp->Base.Base.OutputsWritten & (1 << attr)) ? 'Y' : ' '), + outputMapping[attr], + outputSemanticName[attr], + outputSemanticIndex[attr]); + } + } +#endif + + /* initialize output semantics to defaults */ + for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { + vs_output_semantic_name[i] = TGSI_SEMANTIC_GENERIC; + vs_output_semantic_index[i] = 0; + } + + num_generic = 0; + /* + * Determine number of outputs, the (default) output register + * mapping and the semantic information for each output. + */ + for (attr = 0; attr < VERT_RESULT_MAX; attr++) { + if (stvp->Base.Base.OutputsWritten & (1 << attr)) { + GLuint slot; + + /* XXX + * Pass in the fragment program's input's semantic info. + * Use the generic semantic indexes from there, instead of + * guessing below. + */ + + if (outputMapping) { + slot = outputMapping[attr]; + assert(slot != ~0); + } + else { + slot = vs_num_outputs; + vs_num_outputs++; + defaultOutputMapping[attr] = slot; + } + + switch (attr) { + case VERT_RESULT_HPOS: + assert(slot == 0); + vs_output_semantic_name[slot] = TGSI_SEMANTIC_POSITION; + vs_output_semantic_index[slot] = 0; + break; + case VERT_RESULT_COL0: + vs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + vs_output_semantic_index[slot] = 0; + break; + case VERT_RESULT_COL1: + vs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + vs_output_semantic_index[slot] = 1; + break; + case VERT_RESULT_BFC0: + vs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; + vs_output_semantic_index[slot] = 0; + break; + case VERT_RESULT_BFC1: + vs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; + vs_output_semantic_index[slot] = 1; + break; + case VERT_RESULT_FOGC: + vs_output_semantic_name[slot] = TGSI_SEMANTIC_FOG; + vs_output_semantic_index[slot] = 0; + break; + case VERT_RESULT_PSIZ: + vs_output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE; + vs_output_semantic_index[slot] = 0; + break; + case VERT_RESULT_EDGE: + assert(0); + break; + case VERT_RESULT_TEX0: + case VERT_RESULT_TEX1: + case VERT_RESULT_TEX2: + case VERT_RESULT_TEX3: + case VERT_RESULT_TEX4: + case VERT_RESULT_TEX5: + case VERT_RESULT_TEX6: + case VERT_RESULT_TEX7: + /* fall-through */ + case VERT_RESULT_VAR0: + /* fall-through */ + default: + if (outputSemanticName) { + /* use provided semantic into */ + assert(outputSemanticName[attr] != TGSI_SEMANTIC_COUNT); + vs_output_semantic_name[slot] = outputSemanticName[attr]; + vs_output_semantic_index[slot] = outputSemanticIndex[attr]; + } + else { + /* use default semantic info */ + vs_output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + vs_output_semantic_index[slot] = num_generic++; + } + } + } + } + + assert(vs_output_semantic_name[0] == TGSI_SEMANTIC_POSITION); + + + if (outputMapping) { + /* find max output slot referenced to compute vs_num_outputs */ + GLuint maxSlot = 0; + for (attr = 0; attr < VERT_RESULT_MAX; attr++) { + if (outputMapping[attr] != ~0 && outputMapping[attr] > maxSlot) + maxSlot = outputMapping[attr]; + } + vs_num_outputs = maxSlot + 1; + } + else { + outputMapping = defaultOutputMapping; + } + + /* free old shader state, if any */ + if (stvp->state.tokens) { + FREE((void *) stvp->state.tokens); + stvp->state.tokens = NULL; + } + if (stvp->driver_shader) { + cso_delete_vertex_shader(st->cso_context, stvp->driver_shader); + stvp->driver_shader = NULL; + } + + /* XXX: fix static allocation of tokens: + */ + num_tokens = st_translate_mesa_program( TGSI_PROCESSOR_VERTEX, + &stvp->Base.Base, + /* inputs */ + vs_num_inputs, + stvp->input_to_index, + vs_input_semantic_name, + vs_input_semantic_index, + NULL, + /* outputs */ + vs_num_outputs, + outputMapping, + vs_output_semantic_name, + vs_output_semantic_index, + /* tokenized result */ + tokens, ST_MAX_SHADER_TOKENS); + + assert(num_tokens < ST_MAX_SHADER_TOKENS); + + vs.tokens = (struct tgsi_token *) + mem_dup(tokens, num_tokens * sizeof(tokens[0])); + + stvp->num_inputs = vs_num_inputs; + stvp->state = vs; /* struct copy */ + stvp->driver_shader = pipe->create_vs_state(pipe, &vs); + + if (0) + _mesa_print_program(&stvp->Base.Base); + + if (TGSI_DEBUG) + tgsi_dump( vs.tokens, 0 ); +} + + + +/** + * Translate a Mesa fragment shader into a TGSI shader. + * \param inputMapping to map fragment program input registers to TGSI + * input slots + * \param tokensOut destination for TGSI tokens + * \return pointer to cached pipe_shader object. + */ +void +st_translate_fragment_program(struct st_context *st, + struct st_fragment_program *stfp, + const GLuint inputMapping[]) +{ + struct pipe_context *pipe = st->pipe; + struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; + GLuint outputMapping[FRAG_RESULT_MAX]; + GLuint defaultInputMapping[FRAG_ATTRIB_MAX]; + struct pipe_shader_state fs; + GLuint interpMode[16]; /* XXX size? */ + GLuint attr; + const GLbitfield inputsRead = stfp->Base.Base.InputsRead; + GLuint vslot = 0; + GLuint num_generic = 0; + GLuint num_tokens; + + uint fs_num_inputs = 0; + + ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; + ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + uint fs_num_outputs = 0; + + memset(&fs, 0, sizeof(fs)); + + /* which vertex output goes to the first fragment input: */ + if (inputsRead & FRAG_BIT_WPOS) + vslot = 0; + else + vslot = 1; + + /* + * Convert Mesa program inputs to TGSI input register semantics. + */ + for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) { + if (inputsRead & (1 << attr)) { + const GLuint slot = fs_num_inputs; + + defaultInputMapping[attr] = slot; + + stfp->input_map[slot] = vslot++; + + fs_num_inputs++; + + switch (attr) { + case FRAG_ATTRIB_WPOS: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; + stfp->input_semantic_index[slot] = 0; + interpMode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_COL0: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + stfp->input_semantic_index[slot] = 0; + interpMode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_COL1: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + stfp->input_semantic_index[slot] = 1; + interpMode[slot] = TGSI_INTERPOLATE_LINEAR; + break; + case FRAG_ATTRIB_FOGC: + if (stfp->Base.UsesPointCoord) + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + else + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_FOG; + stfp->input_semantic_index[slot] = 0; + interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; + break; + case FRAG_ATTRIB_TEX0: + case FRAG_ATTRIB_TEX1: + case FRAG_ATTRIB_TEX2: + case FRAG_ATTRIB_TEX3: + case FRAG_ATTRIB_TEX4: + case FRAG_ATTRIB_TEX5: + case FRAG_ATTRIB_TEX6: + case FRAG_ATTRIB_TEX7: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + stfp->input_semantic_index[slot] = num_generic++; + interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; + break; + case FRAG_ATTRIB_VAR0: + /* fall-through */ + default: + stfp->input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + stfp->input_semantic_index[slot] = num_generic++; + interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; + } + } + } + + /* + * Semantics and mapping for outputs + */ + { + uint numColors = 0; + GLbitfield outputsWritten = stfp->Base.Base.OutputsWritten; + + /* if z is written, emit that first */ + if (outputsWritten & (1 << FRAG_RESULT_DEPR)) { + fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION; + fs_output_semantic_index[fs_num_outputs] = 0; + outputMapping[FRAG_RESULT_DEPR] = fs_num_outputs; + fs_num_outputs++; + outputsWritten &= ~(1 << FRAG_RESULT_DEPR); + } + + /* handle remaning outputs (color) */ + for (attr = 0; attr < FRAG_RESULT_MAX; attr++) { + if (outputsWritten & (1 << attr)) { + switch (attr) { + case FRAG_RESULT_DEPR: + /* handled above */ + assert(0); + break; + case FRAG_RESULT_COLR: + fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR; + fs_output_semantic_index[fs_num_outputs] = numColors; + outputMapping[attr] = fs_num_outputs; + numColors++; + break; + default: + assert(0); + } + fs_num_outputs++; + } + } + } + + if (!inputMapping) + inputMapping = defaultInputMapping; + + /* XXX: fix static allocation of tokens: + */ + num_tokens = st_translate_mesa_program( TGSI_PROCESSOR_FRAGMENT, + &stfp->Base.Base, + /* inputs */ + fs_num_inputs, + inputMapping, + stfp->input_semantic_name, + stfp->input_semantic_index, + interpMode, + /* outputs */ + fs_num_outputs, + outputMapping, + fs_output_semantic_name, + fs_output_semantic_index, + /* tokenized result */ + tokens, ST_MAX_SHADER_TOKENS); + + assert(num_tokens < ST_MAX_SHADER_TOKENS); + + fs.tokens = (struct tgsi_token *) + mem_dup(tokens, num_tokens * sizeof(tokens[0])); + + stfp->state = fs; /* struct copy */ + stfp->driver_shader = pipe->create_fs_state(pipe, &fs); + + if (0) + _mesa_print_program(&stfp->Base.Base); + + if (TGSI_DEBUG) + tgsi_dump( fs.tokens, 0/*TGSI_DUMP_VERBOSE*/ ); +} + diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h new file mode 100644 index 0000000000..078e2c42a6 --- /dev/null +++ b/src/mesa/state_tracker/st_program.h @@ -0,0 +1,154 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + */ + + +#ifndef ST_PROGRAM_H +#define ST_PROGRAM_H + +#include "main/mtypes.h" +#include "shader/program.h" +#include "pipe/p_shader_tokens.h" + + +struct cso_fragment_shader; +struct cso_vertex_shader; +struct translated_vertex_program; + + +/** + * Derived from Mesa gl_fragment_program: + */ +struct st_fragment_program +{ + struct gl_fragment_program Base; + GLuint serialNo; + + GLuint input_to_slot[FRAG_ATTRIB_MAX]; /**< Maps FRAG_ATTRIB_x to slot */ + GLuint num_input_slots; + + /** map FP input back to VP output */ + GLuint input_map[PIPE_MAX_SHADER_INPUTS]; + + ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; + ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + + struct pipe_shader_state state; + void *driver_shader; + + GLuint param_state; + + /** List of vertex programs which have been translated such that their + * outputs match this fragment program's inputs. + */ + struct translated_vertex_program *vertex_programs; + + /** Program prefixed with glBitmap prologue */ + struct st_fragment_program *bitmap_program; + uint bitmap_sampler; +}; + + +/** + * Derived from Mesa gl_fragment_program: + */ +struct st_vertex_program +{ + struct gl_vertex_program Base; /**< The Mesa vertex program */ + GLuint serialNo; + + /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */ + GLuint input_to_index[VERT_ATTRIB_MAX]; + /** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */ + GLuint index_to_input[PIPE_MAX_SHADER_INPUTS]; + + GLuint num_inputs; + + struct pipe_shader_state state; + void *driver_shader; + + /** For using our private draw module (glRasterPos) */ + struct draw_vertex_shader *draw_shader; + + GLuint param_state; +}; + + +static INLINE struct st_fragment_program * +st_fragment_program( struct gl_fragment_program *fp ) +{ + return (struct st_fragment_program *)fp; +} + + +static INLINE struct st_vertex_program * +st_vertex_program( struct gl_vertex_program *vp ) +{ + return (struct st_vertex_program *)vp; +} + + +static INLINE void +st_reference_vertprog(struct st_context *st, + struct st_vertex_program **ptr, + struct st_vertex_program *prog) +{ + _mesa_reference_program(st->ctx, + (struct gl_program **) ptr, + (struct gl_program *) prog); +} + +static INLINE void +st_reference_fragprog(struct st_context *st, + struct st_fragment_program **ptr, + struct st_fragment_program *prog) +{ + _mesa_reference_program(st->ctx, + (struct gl_program **) ptr, + (struct gl_program *) prog); +} + + +extern void +st_translate_fragment_program(struct st_context *st, + struct st_fragment_program *fp, + const GLuint inputMapping[]); + + +extern void +st_translate_vertex_program(struct st_context *st, + struct st_vertex_program *vp, + const GLuint vert_output_to_slot[], + const ubyte *fs_input_semantic_name, + const ubyte *fs_input_semantic_index); + + +#endif diff --git a/src/mesa/state_tracker/st_public.h b/src/mesa/state_tracker/st_public.h new file mode 100644 index 0000000000..6ef30f4403 --- /dev/null +++ b/src/mesa/state_tracker/st_public.h @@ -0,0 +1,118 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef ST_PUBLIC_H +#define ST_PUBLIC_H + +#include "GL/gl.h" +#include "GL/internal/glcore.h" /* for __GLcontextModes */ + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" + + +#define ST_SURFACE_FRONT_LEFT 0 +#define ST_SURFACE_BACK_LEFT 1 +#define ST_SURFACE_FRONT_RIGHT 2 +#define ST_SURFACE_BACK_RIGHT 3 +#define ST_SURFACE_DEPTH 8 + +#define ST_TEXTURE_2D 0x2 +#define ST_TEXTURE_RGB 0x1 +#define ST_TEXTURE_RGBA 0x2 + + +struct st_context; +struct st_framebuffer; +struct pipe_context; +struct pipe_fence_handle; +struct pipe_surface; + + +struct st_context *st_create_context(struct pipe_context *pipe, + const __GLcontextModes *visual, + struct st_context *share); + +void st_destroy_context( struct st_context *st ); + +void st_copy_context_state(struct st_context *dst, struct st_context *src, + uint mask); + +struct st_framebuffer *st_create_framebuffer( const __GLcontextModes *visual, + enum pipe_format colorFormat, + enum pipe_format depthFormat, + enum pipe_format stencilFormat, + uint width, uint height, + void *privateData); + +void st_resize_framebuffer( struct st_framebuffer *stfb, + uint width, uint height ); + +void st_set_framebuffer_surface(struct st_framebuffer *stfb, + uint surfIndex, struct pipe_surface *surf); + +void st_get_framebuffer_dimensions( struct st_framebuffer *stfb, + uint *width, uint *height); + +struct pipe_surface *st_get_framebuffer_surface(struct st_framebuffer *stfb, + uint surfIndex); + +struct pipe_texture *st_get_framebuffer_texture(struct st_framebuffer *stfb, + uint surfIndex); + +void *st_framebuffer_private( struct st_framebuffer *stfb ); + +void st_unreference_framebuffer( struct st_framebuffer *stfb ); + +void st_make_current(struct st_context *st, + struct st_framebuffer *draw, + struct st_framebuffer *read); + +void st_flush( struct st_context *st, uint pipeFlushFlags, + struct pipe_fence_handle **fence ); +void st_finish( struct st_context *st ); + +void st_notify_swapbuffers(struct st_framebuffer *stfb); +void st_notify_swapbuffers_complete(struct st_framebuffer *stfb); + + +/** Redirect rendering into stfb's surface to a texture image */ +int st_bind_teximage(struct st_framebuffer *stfb, uint surfIndex, + int target, int format, int level); + +/** Undo surface-to-texture binding */ +int st_release_teximage(struct st_framebuffer *stfb, uint surfIndex, + int target, int format, int level); + + +/** Generic function type */ +typedef void (*st_proc)(); + +st_proc st_get_proc_address(const char *procname); + + +#endif diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c new file mode 100644 index 0000000000..29b1634762 --- /dev/null +++ b/src/mesa/state_tracker/st_texture.c @@ -0,0 +1,441 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "st_context.h" +#include "st_format.h" +#include "st_public.h" +#include "st_texture.h" +#include "st_cb_fbo.h" +#include "main/enums.h" +#include "main/teximage.h" + +#undef Elements /* fix re-defined macro warning */ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_inlines.h" +#include "util/u_rect.h" + + +#define DBG if(0) printf + +#if 0 +static GLenum +target_to_target(GLenum target) +{ + switch (target) { + case GL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB: + return GL_TEXTURE_CUBE_MAP_ARB; + default: + return target; + } +} +#endif + + +/** + * Allocate a new pipe_texture object + * width0, height0, depth0 are the dimensions of the level 0 image + * (the highest resolution). last_level indicates how many mipmap levels + * to allocate storage for. For non-mipmapped textures, this will be zero. + */ +struct pipe_texture * +st_texture_create(struct st_context *st, + enum pipe_texture_target target, + enum pipe_format format, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + GLuint compress_byte, + GLuint usage ) +{ + struct pipe_texture pt, *newtex; + struct pipe_screen *screen = st->pipe->screen; + + assert(target <= PIPE_TEXTURE_CUBE); + + DBG("%s target %s format %s last_level %d\n", __FUNCTION__, + _mesa_lookup_enum_by_nr(target), + _mesa_lookup_enum_by_nr(format), last_level); + + assert(format); + assert(screen->is_format_supported(screen, format, target, + PIPE_TEXTURE_USAGE_SAMPLER, 0)); + + memset(&pt, 0, sizeof(pt)); + pt.target = target; + pt.format = format; + pt.last_level = last_level; + pt.width[0] = width0; + pt.height[0] = height0; + pt.depth[0] = depth0; + pt.compressed = compress_byte ? 1 : 0; + pf_get_block(format, &pt.block); + pt.tex_usage = usage; + + newtex = screen->texture_create(screen, &pt); + + assert(!newtex || newtex->refcount == 1); + + return newtex; +} + + +/** + * Check if a texture image be pulled into a unified mipmap texture. + * This mirrors the completeness test in a lot of ways. + * + * Not sure whether I want to pass gl_texture_image here. + */ +GLboolean +st_texture_match_image(const struct pipe_texture *pt, + const struct gl_texture_image *image, + GLuint face, GLuint level) +{ + /* Images with borders are never pulled into mipmap textures. + */ + if (image->Border) + return GL_FALSE; + + if (st_mesa_format_to_pipe_format(image->TexFormat->MesaFormat) != pt->format || + image->IsCompressed != pt->compressed) + return GL_FALSE; + + /* Test image dimensions against the base level image adjusted for + * minification. This will also catch images not present in the + * texture, changed targets, etc. + */ + if (image->Width != pt->width[level] || + image->Height != pt->height[level] || + image->Depth != pt->depth[level]) + return GL_FALSE; + + return GL_TRUE; +} + + +#if 000 +/* Although we use the image_offset[] array to store relative offsets + * to cube faces, Mesa doesn't know anything about this and expects + * each cube face to be treated as a separate image. + * + * These functions present that view to mesa: + */ +const GLuint * +st_texture_depth_offsets(struct pipe_texture *pt, GLuint level) +{ + static const GLuint zero = 0; + + if (pt->target != PIPE_TEXTURE_3D || pt->level[level].nr_images == 1) + return &zero; + else + return pt->level[level].image_offset; +} + + +/** + * Return the offset to the given mipmap texture image within the + * texture memory buffer, in bytes. + */ +GLuint +st_texture_image_offset(const struct pipe_texture * pt, + GLuint face, GLuint level) +{ + if (pt->target == PIPE_TEXTURE_CUBE) + return (pt->level[level].level_offset + + pt->level[level].image_offset[face] * pt->cpp); + else + return pt->level[level].level_offset; +} +#endif + + +/** + * Map a teximage in a mipmap texture. + * \param row_stride returns row stride in bytes + * \param image_stride returns image stride in bytes (for 3D textures). + * \return address of mapping + */ +GLubyte * +st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, + GLuint zoffset, + GLuint flags ) +{ + struct pipe_screen *screen = st->pipe->screen; + struct pipe_texture *pt = stImage->pt; + DBG("%s \n", __FUNCTION__); + + stImage->surface = screen->get_tex_surface(screen, pt, stImage->face, + stImage->level, zoffset, + flags); + + if (stImage->surface) + return screen->surface_map(screen, stImage->surface, flags); + else + return NULL; +} + + +void +st_texture_image_unmap(struct st_context *st, + struct st_texture_image *stImage) +{ + struct pipe_screen *screen = st->pipe->screen; + + DBG("%s\n", __FUNCTION__); + + screen->surface_unmap(screen, stImage->surface); + + pipe_surface_reference(&stImage->surface, NULL); +} + + + +/** + * Upload data to a rectangular sub-region. Lots of choices how to do this: + * + * - memcpy by span to current destination + * - upload data as new buffer and blit + * + * Currently always memcpy. + */ +static void +st_surface_data(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + const void *src, unsigned src_stride, + unsigned srcx, unsigned srcy, unsigned width, unsigned height) +{ + struct pipe_screen *screen = pipe->screen; + void *map = screen->surface_map(screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE); + + pipe_copy_rect(map, + &dst->block, + dst->stride, + dstx, dsty, + width, height, + src, src_stride, + srcx, srcy); + + screen->surface_unmap(screen, dst); +} + + +/* Upload data for a particular image. + */ +void +st_texture_image_data(struct pipe_context *pipe, + struct pipe_texture *dst, + GLuint face, + GLuint level, + void *src, + GLuint src_row_stride, GLuint src_image_stride) +{ + struct pipe_screen *screen = pipe->screen; + GLuint depth = dst->depth[level]; + GLuint i; + const GLubyte *srcUB = src; + struct pipe_surface *dst_surface; + + DBG("%s\n", __FUNCTION__); + for (i = 0; i < depth; i++) { + dst_surface = screen->get_tex_surface(screen, dst, face, level, i, + PIPE_BUFFER_USAGE_CPU_WRITE); + + st_surface_data(pipe, dst_surface, + 0, 0, /* dstx, dsty */ + srcUB, + src_row_stride, + 0, 0, /* source x, y */ + dst->width[level], dst->height[level]); /* width, height */ + + screen->tex_surface_release(screen, &dst_surface); + + srcUB += src_image_stride; + } +} + + +/* Copy mipmap image between textures + */ +void +st_texture_image_copy(struct pipe_context *pipe, + struct pipe_texture *dst, GLuint dstLevel, + struct pipe_texture *src, + GLuint face) +{ + struct pipe_screen *screen = pipe->screen; + GLuint width = dst->width[dstLevel]; + GLuint height = dst->height[dstLevel]; + GLuint depth = dst->depth[dstLevel]; + struct pipe_surface *src_surface; + struct pipe_surface *dst_surface; + GLuint i; + + for (i = 0; i < depth; i++) { + GLuint srcLevel; + + /* find src texture level of needed size */ + for (srcLevel = 0; srcLevel <= src->last_level; srcLevel++) { + if (src->width[srcLevel] == width && + src->height[srcLevel] == height) { + break; + } + } + assert(src->width[srcLevel] == width); + assert(src->height[srcLevel] == height); + +#if 0 + { + src_surface = screen->get_tex_surface(screen, src, face, srcLevel, i, + PIPE_BUFFER_USAGE_CPU_READ); + ubyte *map = screen->surface_map(screen, src_surface, PIPE_BUFFER_USAGE_CPU_READ); + map += src_surface->width * src_surface->height * 4 / 2; + printf("%s center pixel: %d %d %d %d (pt %p[%d] -> %p[%d])\n", + __FUNCTION__, + map[0], map[1], map[2], map[3], + src, srcLevel, dst, dstLevel); + + screen->surface_unmap(screen, src_surface); + pipe_surface_reference(&src_surface, NULL); + } +#endif + + dst_surface = screen->get_tex_surface(screen, dst, face, dstLevel, i, + PIPE_BUFFER_USAGE_GPU_WRITE); + + src_surface = screen->get_tex_surface(screen, src, face, srcLevel, i, + PIPE_BUFFER_USAGE_GPU_READ); + + pipe->surface_copy(pipe, + FALSE, + dst_surface, + 0, 0, /* destX, Y */ + src_surface, + 0, 0, /* srcX, Y */ + width, height); + + screen->tex_surface_release(screen, &src_surface); + screen->tex_surface_release(screen, &dst_surface); + } +} + + +/** Redirect rendering into stfb's surface to a texture image */ +int +st_bind_teximage(struct st_framebuffer *stfb, uint surfIndex, + int target, int format, int level) +{ + GET_CURRENT_CONTEXT(ctx); + struct st_context *st = ctx->st; + struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; + const GLuint unit = ctx->Texture.CurrentUnit; + struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; + struct gl_texture_object *texObj; + struct gl_texture_image *texImage; + struct st_texture_image *stImage; + struct st_renderbuffer *strb; + GLint face = 0, slice = 0; + + assert(surfIndex <= ST_SURFACE_DEPTH); + + strb = st_renderbuffer(stfb->Base.Attachment[surfIndex].Renderbuffer); + + if (strb->texture_save || strb->surface_save) { + /* Error! */ + return 0; + } + + if (target == ST_TEXTURE_2D) { + texObj = texUnit->Current2D; + texImage = _mesa_get_tex_image(ctx, texObj, GL_TEXTURE_2D, level); + stImage = st_texture_image(texImage); + } + else { + /* unsupported target */ + return 0; + } + + st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); + + /* save the renderbuffer's surface/texture info */ + pipe_texture_reference(&strb->texture_save, strb->texture); + pipe_surface_reference(&strb->surface_save, strb->surface); + + /* plug in new surface/texture info */ + pipe_texture_reference(&strb->texture, stImage->pt); + strb->surface = screen->get_tex_surface(screen, strb->texture, + face, level, slice, + (PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE)); + + st->dirty.st |= ST_NEW_FRAMEBUFFER; + + return 1; +} + + +/** Undo surface-to-texture binding */ +int +st_release_teximage(struct st_framebuffer *stfb, uint surfIndex, + int target, int format, int level) +{ + GET_CURRENT_CONTEXT(ctx); + struct st_context *st = ctx->st; + struct st_renderbuffer *strb; + + assert(surfIndex <= ST_SURFACE_DEPTH); + + strb = st_renderbuffer(stfb->Base.Attachment[surfIndex].Renderbuffer); + + if (!strb->texture_save || !strb->surface_save) { + /* Error! */ + return 0; + } + + st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); + + /* free tex surface, restore original */ + pipe_surface_reference(&strb->surface, strb->surface_save); + pipe_texture_reference(&strb->texture, strb->texture_save); + + pipe_surface_reference(&strb->surface_save, NULL); + pipe_texture_reference(&strb->texture_save, NULL); + + st->dirty.st |= ST_NEW_FRAMEBUFFER; + + return 1; +} diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h new file mode 100644 index 0000000000..31f66ad52c --- /dev/null +++ b/src/mesa/state_tracker/st_texture.h @@ -0,0 +1,179 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef ST_TEXTURE_H +#define ST_TEXTURE_H + + +#include "main/mtypes.h" + +struct pipe_context; +struct pipe_texture; + + +struct st_texture_image +{ + struct gl_texture_image base; + + /* These aren't stored in gl_texture_image + */ + GLuint level; + GLuint face; + + /* If stImage->pt != NULL, image data is stored here. + * Else if stImage->base.Data != NULL, image is stored there. + * Else there is no image data. + */ + struct pipe_texture *pt; + + struct pipe_surface *surface; +}; + + + +struct st_texture_object +{ + struct gl_texture_object base; /* The "parent" object */ + + /* The texture must include at levels [0..lastLevel] once validated: + */ + GLuint lastLevel; + + /* On validation any active images held in main memory or in other + * textures will be copied to this texture and the old storage freed. + */ + struct pipe_texture *pt; + + GLboolean teximage_realloc; +}; + + +static INLINE struct st_texture_image * +st_texture_image(struct gl_texture_image *img) +{ + return (struct st_texture_image *) img; +} + +static INLINE struct st_texture_object * +st_texture_object(struct gl_texture_object *obj) +{ + return (struct st_texture_object *) obj; +} + + +static INLINE struct pipe_texture * +st_get_texobj_texture(struct gl_texture_object *texObj) +{ + struct st_texture_object *stObj = st_texture_object(texObj); + return stObj ? stObj->pt : NULL; +} + + +static INLINE struct pipe_texture * +st_get_stobj_texture(struct st_texture_object *stObj) +{ + return stObj ? stObj->pt : NULL; +} + +static INLINE GLboolean pf_is_depth_stencil( enum pipe_format format ) +{ + return (pf_get_component_bits( format, PIPE_FORMAT_COMP_Z ) + + pf_get_component_bits( format, PIPE_FORMAT_COMP_S )) != 0; +} + + +extern struct pipe_texture * +st_texture_create(struct st_context *st, + enum pipe_texture_target target, + enum pipe_format format, + GLuint last_level, + GLuint width0, + GLuint height0, + GLuint depth0, + GLuint compress_byte, + GLuint tex_usage ); + + +/* Check if an image fits into an existing texture object. + */ +extern GLboolean +st_texture_match_image(const struct pipe_texture *pt, + const struct gl_texture_image *image, + GLuint face, GLuint level); + +/* Return a pointer to an image within a texture. Return image stride as + * well. + */ +extern GLubyte * +st_texture_image_map(struct st_context *st, + struct st_texture_image *stImage, + GLuint zoffset, + GLuint flags); + +extern void +st_texture_image_unmap(struct st_context *st, + struct st_texture_image *stImage); + + +/* Return pointers to each 2d slice within an image. Indexed by depth + * value. + */ +extern const GLuint * +st_texture_depth_offsets(struct pipe_texture *pt, GLuint level); + + +/* Return the linear offset of an image relative to the start of its region. + */ +extern GLuint +st_texture_image_offset(const struct pipe_texture *pt, + GLuint face, GLuint level); + +extern GLuint +st_texture_texel_offset(const struct pipe_texture * pt, + GLuint face, GLuint level, + GLuint col, GLuint row, GLuint img); + + +/* Upload an image into a texture + */ +extern void +st_texture_image_data(struct pipe_context *pipe, + struct pipe_texture *dst, + GLuint face, GLuint level, void *src, + GLuint src_row_pitch, GLuint src_image_pitch); + + +/* Copy an image between two textures + */ +extern void +st_texture_image_copy(struct pipe_context *pipe, + struct pipe_texture *dst, GLuint dstLevel, + struct pipe_texture *src, + GLuint face); + + +#endif diff --git a/src/mesa/swrast/s_context.c b/src/mesa/swrast/s_context.c index 62857ddeb0..297940adbd 100644 --- a/src/mesa/swrast/s_context.c +++ b/src/mesa/swrast/s_context.c @@ -533,6 +533,9 @@ _swrast_update_texture_samplers(GLcontext *ctx) SWcontext *swrast = SWRAST_CONTEXT(ctx); GLuint u; + if (!swrast) + return; /* pipe hack */ + for (u = 0; u < ctx->Const.MaxTextureImageUnits; u++) { const struct gl_texture_object *tObj = ctx->Texture.Unit[u]._Current; /* Note: If tObj is NULL, the sample function will be a simple diff --git a/src/mesa/tnl/t_context.c b/src/mesa/tnl/t_context.c index 8977fadcca..ce37dc0428 100644 --- a/src/mesa/tnl/t_context.c +++ b/src/mesa/tnl/t_context.c @@ -78,6 +78,9 @@ _tnl_CreateContext( GLcontext *ctx ) tnl->nr_blocks = 0; + /* plug in the VBO drawing function */ + vbo_set_draw_func(ctx, _tnl_draw_prims); + return GL_TRUE; } diff --git a/src/mesa/tnl/t_vp_build.h b/src/mesa/tnl/t_vp_build.h index d1c147ff03..d6ebc66c04 100644 --- a/src/mesa/tnl/t_vp_build.h +++ b/src/mesa/tnl/t_vp_build.h @@ -29,6 +29,13 @@ #include "main/mtypes.h" +#define TNL_FIXED_FUNCTION_STATE_FLAGS (_NEW_PROGRAM | \ + _NEW_LIGHT | \ + _NEW_TEXTURE | \ + _NEW_TEXTURE_MATRIX | \ + _NEW_TRANSFORM | \ + _NEW_FOG | \ + _NEW_POINT) extern void _tnl_UpdateFixedFunctionProgram( GLcontext *ctx ); diff --git a/src/mesa/tnl/tnl.h b/src/mesa/tnl/tnl.h index 9a1717bf89..4d628aa9a6 100644 --- a/src/mesa/tnl/tnl.h +++ b/src/mesa/tnl/tnl.h @@ -81,6 +81,8 @@ _tnl_draw_prims( GLcontext *ctx, GLuint min_index, GLuint max_index); +extern void +_mesa_load_tracked_matrices(GLcontext *ctx); extern void _tnl_RasterPos(GLcontext *ctx, const GLfloat vObj[4]); diff --git a/src/mesa/vbo/vbo.h b/src/mesa/vbo/vbo.h index 4c51b44cda..5362226c2f 100644 --- a/src/mesa/vbo/vbo.h +++ b/src/mesa/vbo/vbo.h @@ -117,4 +117,22 @@ void vbo_rebase_prims( GLcontext *ctx, void vbo_use_buffer_objects(GLcontext *ctx); +void vbo_set_draw_func(GLcontext *ctx, vbo_draw_func func); + + +void GLAPIENTRY +_vbo_Color4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a); + +void GLAPIENTRY +_vbo_Normal3f(GLfloat x, GLfloat y, GLfloat z); + +void GLAPIENTRY +_vbo_MultiTexCoord4f(GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q); + +void GLAPIENTRY +_vbo_Materialfv(GLenum face, GLenum pname, const GLfloat *params); + +void GLAPIENTRY +_vbo_VertexAttrib4f(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w); + #endif diff --git a/src/mesa/vbo/vbo_context.c b/src/mesa/vbo/vbo_context.c index a73a46b9c8..bbf745b0c6 100644 --- a/src/mesa/vbo/vbo_context.c +++ b/src/mesa/vbo/vbo_context.c @@ -31,6 +31,7 @@ #include "vbo.h" #include "vbo_context.h" +#if 0 /* Reach out and grab this to use as the default: */ extern void _tnl_draw_prims( GLcontext *ctx, @@ -40,6 +41,7 @@ extern void _tnl_draw_prims( GLcontext *ctx, const struct _mesa_index_buffer *ib, GLuint min_index, GLuint max_index ); +#endif @@ -212,7 +214,9 @@ GLboolean _vbo_CreateContext( GLcontext *ctx ) /* By default: */ +#if 0 /* dead - see vbo_set_draw_func() */ vbo->draw_prims = _tnl_draw_prims; +#endif /* Hook our functions into exec and compile dispatch tables. These * will pretty much be permanently installed, which means that the @@ -240,10 +244,20 @@ void _vbo_DestroyContext( GLcontext *ctx ) ctx->aelt_context = NULL; } - vbo_exec_destroy(ctx); + if (vbo_context(ctx)) { + vbo_exec_destroy(ctx); #if FEATURE_dlist - vbo_save_destroy(ctx); + vbo_save_destroy(ctx); #endif - FREE(vbo_context(ctx)); - ctx->swtnl_im = NULL; + FREE(vbo_context(ctx)); + ctx->swtnl_im = NULL; + } +} + + +void vbo_set_draw_func(GLcontext *ctx, vbo_draw_func func) +{ + struct vbo_context *vbo = vbo_context(ctx); + vbo->draw_prims = func; } + diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index d48f5230cb..a6ce26ffed 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -143,29 +143,37 @@ static void vbo_exec_copy_to_current( struct vbo_exec_context *exec ) for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) { if (exec->vtx.attrsz[i]) { - GLfloat *current = (GLfloat *)vbo->currval[i].Ptr; - /* Note: the exec->vtx.current[i] pointers point into the * ctx->Current.Attrib and ctx->Light.Material.Attrib arrays. */ - COPY_CLEAN_4V(current, - exec->vtx.attrsz[i], - exec->vtx.attrptr[i]); + GLfloat *current = (GLfloat *)vbo->currval[i].Ptr; + GLfloat tmp[4]; + + COPY_CLEAN_4V(tmp, + exec->vtx.attrsz[i], + exec->vtx.attrptr[i]); + + if (memcmp(current, tmp, sizeof(tmp)) != 0) + { + memcpy(current, tmp, sizeof(tmp)); - /* Given that we explicitly state size here, there is no need - * for the COPY_CLEAN above, could just copy 16 bytes and be - * done. The only problem is when Mesa accesses ctx->Current - * directly. - */ - vbo->currval[i].Size = exec->vtx.attrsz[i]; - - /* This triggers rather too much recalculation of Mesa state - * that doesn't get used (eg light positions). - */ - if (i >= VBO_ATTRIB_MAT_FRONT_AMBIENT && - i <= VBO_ATTRIB_MAT_BACK_INDEXES) - ctx->NewState |= _NEW_LIGHT; + /* Given that we explicitly state size here, there is no need + * for the COPY_CLEAN above, could just copy 16 bytes and be + * done. The only problem is when Mesa accesses ctx->Current + * directly. + */ + vbo->currval[i].Size = exec->vtx.attrsz[i]; + + /* This triggers rather too much recalculation of Mesa state + * that doesn't get used (eg light positions). + */ + if (i >= VBO_ATTRIB_MAT_FRONT_AMBIENT && + i <= VBO_ATTRIB_MAT_BACK_INDEXES) + ctx->NewState |= _NEW_LIGHT; + + ctx->NewState |= _NEW_CURRENT_ATTRIB; + } } } @@ -693,7 +701,8 @@ void vbo_exec_vtx_init( struct vbo_exec_context *exec ) GLuint i; /* Allocate a buffer object. Will just reuse this object - * continuously. + * continuously, unless vbo_use_buffer_objects() is called to enable + * use of real VBOs. */ _mesa_reference_buffer_object(ctx, &exec->vtx.bufferobj, @@ -772,3 +781,36 @@ static void reset_attrfv( struct vbo_exec_context *exec ) exec->vtx.vertex_size = 0; } + +void GLAPIENTRY +_vbo_Color4f(GLfloat r, GLfloat g, GLfloat b, GLfloat a) +{ + vbo_Color4f(r, g, b, a); +} + + +void GLAPIENTRY +_vbo_Normal3f(GLfloat x, GLfloat y, GLfloat z) +{ + vbo_Normal3f(x, y, z); +} + + +void GLAPIENTRY +_vbo_MultiTexCoord4f(GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q) +{ + vbo_MultiTexCoord4f(target, s, t, r, q); +} + +void GLAPIENTRY +_vbo_Materialfv(GLenum face, GLenum pname, const GLfloat *params) +{ + vbo_Materialfv(face, pname, params); +} + + +void GLAPIENTRY +_vbo_VertexAttrib4f(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w) +{ + vbo_VertexAttrib4fARB(index, x, y, z, w); +} diff --git a/src/mesa/vbo/vbo_exec_array.c b/src/mesa/vbo/vbo_exec_array.c index 82f4db17d1..8871e10cf6 100644 --- a/src/mesa/vbo/vbo_exec_array.c +++ b/src/mesa/vbo/vbo_exec_array.c @@ -30,6 +30,8 @@ #include "main/state.h" #include "main/api_validate.h" #include "main/api_noop.h" +#include "main/varray.h" +#include "glapi/dispatch.h" #include "vbo_context.h" @@ -125,6 +127,7 @@ static void recalculate_input_bindings( GLcontext *ctx ) struct vbo_context *vbo = vbo_context(ctx); struct vbo_exec_context *exec = &vbo->exec; const struct gl_client_array **inputs = &exec->array.inputs[0]; + GLbitfield const_inputs = 0x0; GLuint i; exec->array.program_mode = get_program_mode(ctx); @@ -139,19 +142,24 @@ static void recalculate_input_bindings( GLcontext *ctx ) for (i = 0; i <= VERT_ATTRIB_TEX7; i++) { if (exec->array.legacy_array[i]->Enabled) inputs[i] = exec->array.legacy_array[i]; - else + else { inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } } for (i = 0; i < MAT_ATTRIB_MAX; i++) { inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->mat_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); } /* Could use just about anything, just to fill in the empty * slots: */ - for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) + for (i = MAT_ATTRIB_MAX; i < VERT_ATTRIB_MAX - VERT_ATTRIB_GENERIC0; i++) { inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); + } break; case VP_NV: @@ -164,15 +172,19 @@ static void recalculate_input_bindings( GLcontext *ctx ) inputs[i] = exec->array.generic_array[i]; else if (exec->array.legacy_array[i]->Enabled) inputs[i] = exec->array.legacy_array[i]; - else + else { inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } } /* Could use just about anything, just to fill in the empty * slots: */ - for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) + for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) { inputs[i] = &vbo->generic_currval[i - VERT_ATTRIB_GENERIC0]; + const_inputs |= 1 << i; + } break; case VP_ARB: @@ -187,25 +199,34 @@ static void recalculate_input_bindings( GLcontext *ctx ) inputs[0] = exec->array.generic_array[0]; else if (exec->array.legacy_array[0]->Enabled) inputs[0] = exec->array.legacy_array[0]; - else + else { inputs[0] = &vbo->legacy_currval[0]; + const_inputs |= 1 << 0; + } for (i = 1; i <= VERT_ATTRIB_TEX7; i++) { if (exec->array.legacy_array[i]->Enabled) inputs[i] = exec->array.legacy_array[i]; - else + else { inputs[i] = &vbo->legacy_currval[i]; + const_inputs |= 1 << i; + } } for (i = 0; i < 16; i++) { if (exec->array.generic_array[i]->Enabled) inputs[VERT_ATTRIB_GENERIC0 + i] = exec->array.generic_array[i]; - else + else { inputs[VERT_ATTRIB_GENERIC0 + i] = &vbo->generic_currval[i]; + const_inputs |= 1 << (VERT_ATTRIB_GENERIC0 + i); + } + } break; } + + _mesa_set_varying_vp_inputs( ctx, ~const_inputs ); } static void bind_arrays( GLcontext *ctx ) @@ -255,6 +276,11 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count) bind_arrays( ctx ); + /* Again... + */ + if (ctx->NewState) + _mesa_update_state( ctx ); + prim[0].begin = 1; prim[0].end = 1; prim[0].weak = 0; @@ -295,6 +321,9 @@ vbo_exec_DrawRangeElements(GLenum mode, bind_arrays( ctx ); + if (ctx->NewState) + _mesa_update_state( ctx ); + ib.count = count; ib.type = type; ib.obj = ctx->Array.ElementArrayBufferObj; @@ -403,3 +432,29 @@ void vbo_exec_array_destroy( struct vbo_exec_context *exec ) { /* nothing to do */ } + + +/* This API entrypoint is not ordinarily used */ +void GLAPIENTRY +_mesa_DrawArrays(GLenum mode, GLint first, GLsizei count) +{ + vbo_exec_DrawArrays(mode, first, count); +} + + +/* This API entrypoint is not ordinarily used */ +void GLAPIENTRY +_mesa_DrawElements(GLenum mode, GLsizei count, GLenum type, + const GLvoid *indices) +{ + vbo_exec_DrawElements(mode, count, type, indices); +} + + +/* This API entrypoint is not ordinarily used */ +void GLAPIENTRY +_mesa_DrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, + GLenum type, const GLvoid *indices) +{ + vbo_exec_DrawRangeElements(mode, start, end, count, type, indices); +} diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index 92356ba977..5bf3d836db 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -150,6 +150,7 @@ static void vbo_exec_bind_arrays( GLcontext *ctx ) GLubyte *data = exec->vtx.buffer_map; const GLuint *map; GLuint attr; + GLbitfield varying_inputs = 0x0; /* Install the default (ie Current) attributes first, then overlay * all active ones. @@ -211,8 +212,11 @@ static void vbo_exec_bind_arrays( GLcontext *ctx ) arrays[attr]._MaxElement = count; /* ??? */ data += exec->vtx.attrsz[src] * sizeof(GLfloat); + varying_inputs |= 1<<attr; } } + + _mesa_set_varying_vp_inputs( ctx, varying_inputs ); } @@ -242,6 +246,9 @@ void vbo_exec_vtx_flush( struct vbo_exec_context *exec ) */ vbo_exec_bind_arrays( ctx ); + if (ctx->NewState) + _mesa_update_state( ctx ); + /* if using a real VBO, unmap it before drawing */ if (exec->vtx.bufferobj->Name) { ctx->Driver.UnmapBuffer(ctx, target, exec->vtx.bufferobj); diff --git a/src/mesa/vbo/vbo_save_draw.c b/src/mesa/vbo/vbo_save_draw.c index ed82f09958..0488c5d718 100644 --- a/src/mesa/vbo/vbo_save_draw.c +++ b/src/mesa/vbo/vbo_save_draw.c @@ -64,18 +64,26 @@ static void _playback_copy_to_current( GLcontext *ctx, for (i = VBO_ATTRIB_POS+1 ; i < VBO_ATTRIB_MAX ; i++) { if (node->attrsz[i]) { GLfloat *current = (GLfloat *)vbo->currval[i].Ptr; + GLfloat tmp[4]; - COPY_CLEAN_4V(current, - node->attrsz[i], - data); + COPY_CLEAN_4V(tmp, + node->attrsz[i], + data); + + if (memcmp(current, tmp, 4 * sizeof(GLfloat)) != 0) + { + memcpy(current, tmp, 4 * sizeof(GLfloat)); - vbo->currval[i].Size = node->attrsz[i]; + vbo->currval[i].Size = node->attrsz[i]; - data += node->attrsz[i]; + if (i >= VBO_ATTRIB_FIRST_MATERIAL && + i <= VBO_ATTRIB_LAST_MATERIAL) + ctx->NewState |= _NEW_LIGHT; + + ctx->NewState |= _NEW_CURRENT_ATTRIB; + } - if (i >= VBO_ATTRIB_FIRST_MATERIAL && - i <= VBO_ATTRIB_LAST_MATERIAL) - ctx->NewState |= _NEW_LIGHT; + data += node->attrsz[i]; } } @@ -110,6 +118,7 @@ static void vbo_bind_vertex_list( GLcontext *ctx, GLuint data = node->buffer_offset; const GLuint *map; GLuint attr; + GLbitfield varying_inputs = 0x0; /* Install the default (ie Current) attributes first, then overlay * all active ones. @@ -159,8 +168,11 @@ static void vbo_bind_vertex_list( GLcontext *ctx, assert(arrays[attr].BufferObj->Name); data += node->attrsz[src] * sizeof(GLfloat); + varying_inputs |= 1<<attr; } } + + _mesa_set_varying_vp_inputs( ctx, varying_inputs ); } static void vbo_save_loopback_vertex_list( GLcontext *ctx, @@ -229,6 +241,11 @@ void vbo_save_playback_vertex_list( GLcontext *ctx, void *data ) vbo_bind_vertex_list( ctx, node ); + /* Again... + */ + if (ctx->NewState) + _mesa_update_state( ctx ); + vbo_context(ctx)->draw_prims( ctx, save->inputs, node->prim, diff --git a/src/mesa/vf/vf.c b/src/mesa/vf/vf.c new file mode 100644 index 0000000000..82f3d2b641 --- /dev/null +++ b/src/mesa/vf/vf.c @@ -0,0 +1,372 @@ +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keithw@tungstengraphics.com> + */ + +#include "main/glheader.h" +#include "main/context.h" +#include "main/colormac.h" + +#include "vf.h" + +#define DBG 0 + + + +static GLboolean match_fastpath( struct vertex_fetch *vf, + const struct vf_fastpath *fp) +{ + GLuint j; + + if (vf->attr_count != fp->attr_count) + return GL_FALSE; + + for (j = 0; j < vf->attr_count; j++) + if (vf->attr[j].format != fp->attr[j].format || + vf->attr[j].inputsize != fp->attr[j].size || + vf->attr[j].vertoffset != fp->attr[j].offset) + return GL_FALSE; + + if (fp->match_strides) { + if (vf->vertex_stride != fp->vertex_stride) + return GL_FALSE; + + for (j = 0; j < vf->attr_count; j++) + if (vf->attr[j].inputstride != fp->attr[j].stride) + return GL_FALSE; + } + + return GL_TRUE; +} + +static GLboolean search_fastpath_emit( struct vertex_fetch *vf ) +{ + struct vf_fastpath *fp = vf->fastpath; + + for ( ; fp ; fp = fp->next) { + if (match_fastpath(vf, fp)) { + vf->emit = fp->func; + return GL_TRUE; + } + } + + return GL_FALSE; +} + +void vf_register_fastpath( struct vertex_fetch *vf, + GLboolean match_strides ) +{ + struct vf_fastpath *fastpath = CALLOC_STRUCT(vf_fastpath); + GLuint i; + + fastpath->vertex_stride = vf->vertex_stride; + fastpath->attr_count = vf->attr_count; + fastpath->match_strides = match_strides; + fastpath->func = vf->emit; + fastpath->attr = (struct vf_attr_type *) + _mesa_malloc(vf->attr_count * sizeof(fastpath->attr[0])); + + for (i = 0; i < vf->attr_count; i++) { + fastpath->attr[i].format = vf->attr[i].format; + fastpath->attr[i].stride = vf->attr[i].inputstride; + fastpath->attr[i].size = vf->attr[i].inputsize; + fastpath->attr[i].offset = vf->attr[i].vertoffset; + } + + fastpath->next = vf->fastpath; + vf->fastpath = fastpath; +} + + + + +/*********************************************************************** + * Build codegen functions or return generic ones: + */ +static void choose_emit_func( struct vertex_fetch *vf, + GLuint count, + GLubyte *dest) +{ + vf->emit = NULL; + + /* Does this match an existing (hardwired, codegen or known-bad) + * fastpath? + */ + if (search_fastpath_emit(vf)) { + /* Use this result. If it is null, then it is already known + * that the current state will fail for codegen and there is no + * point trying again. + */ + } + else if (vf->codegen_emit) { + vf->codegen_emit( vf ); + } + + if (!vf->emit) { + vf_generate_hardwired_emit(vf); + } + + /* Otherwise use the generic version: + */ + if (!vf->emit) + vf->emit = vf_generic_emit; + + vf->emit( vf, count, dest ); +} + + + + + +/*********************************************************************** + * Public entrypoints, mostly dispatch to the above: + */ + + + +GLuint vf_set_vertex_attributes( struct vertex_fetch *vf, + const struct vf_attr_map *map, + GLuint nr, + GLuint vertex_stride ) +{ + GLuint offset = 0; + GLuint i, j; + + assert(nr < VF_ATTRIB_MAX); + + memset(vf->lookup, 0, sizeof(vf->lookup)); + + for (j = 0, i = 0; i < nr; i++) { + const GLuint format = map[i].format; + if (format == EMIT_PAD) { + if (DBG) + _mesa_printf("%d: pad %d, offset %d\n", i, + map[i].offset, offset); + + offset += map[i].offset; + + } + else { + assert(vf->lookup[map[i].attrib] == 0); + vf->lookup[map[i].attrib] = &vf->attr[j]; + + vf->attr[j].attrib = map[i].attrib; + vf->attr[j].format = format; + vf->attr[j].insert = vf_format_info[format].insert; + vf->attr[j].extract = vf_format_info[format].extract; + vf->attr[j].vertattrsize = vf_format_info[format].attrsize; + vf->attr[j].vertoffset = offset; + + if (DBG) + _mesa_printf("%d: %s, offset %d\n", i, + vf_format_info[format].name, + vf->attr[j].vertoffset); + + offset += vf_format_info[format].attrsize; + j++; + } + } + + vf->attr_count = j; + vf->vertex_stride = vertex_stride ? vertex_stride : offset; + vf->emit = choose_emit_func; + + assert(vf->vertex_stride >= offset); + return vf->vertex_stride; +} + + + +void vf_set_vp_matrix( struct vertex_fetch *vf, + const GLfloat *viewport ) +{ + assert(vf->allow_viewport_emits); + + /* scale */ + vf->vp[0] = viewport[MAT_SX]; + vf->vp[1] = viewport[MAT_SY]; + vf->vp[2] = viewport[MAT_SZ]; + vf->vp[3] = 1.0; + + /* translate */ + vf->vp[4] = viewport[MAT_TX]; + vf->vp[5] = viewport[MAT_TY]; + vf->vp[6] = viewport[MAT_TZ]; + vf->vp[7] = 0.0; +} + +void vf_set_vp_scale_translate( struct vertex_fetch *vf, + const GLfloat *scale, + const GLfloat *translate ) +{ + assert(vf->allow_viewport_emits); + + vf->vp[0] = scale[0]; + vf->vp[1] = scale[1]; + vf->vp[2] = scale[2]; + vf->vp[3] = scale[3]; + + vf->vp[4] = translate[0]; + vf->vp[5] = translate[1]; + vf->vp[6] = translate[2]; + vf->vp[7] = translate[3]; +} + + +/* Set attribute pointers, adjusted for start position: + */ +void vf_set_sources( struct vertex_fetch *vf, + GLvector4f * const sources[], + GLuint start ) +{ + struct vf_attr *a = vf->attr; + GLuint j; + + for (j = 0; j < vf->attr_count; j++) { + const GLvector4f *vptr = sources[a[j].attrib]; + + if ((a[j].inputstride != vptr->stride) || + (a[j].inputsize != vptr->size)) + vf->emit = choose_emit_func; + + a[j].inputstride = vptr->stride; + a[j].inputsize = vptr->size; + a[j].do_insert = a[j].insert[vptr->size - 1]; + a[j].inputptr = ((GLubyte *)vptr->data) + start * vptr->stride; + } +} + + + +/* Emit count VB vertices to dest. + */ +void vf_emit_vertices( struct vertex_fetch *vf, + GLuint count, + void *dest ) +{ + vf->emit( vf, count, (GLubyte*) dest ); +} + + +/* Extract a named attribute from a hardware vertex. Will have to + * reverse any viewport transformation, swizzling or other conversions + * which may have been applied. + * + * This is mainly required for on-the-fly vertex translations to + * swrast format. + */ +void vf_get_attr( struct vertex_fetch *vf, + const void *vertex, + GLenum attr, + const GLfloat *dflt, + GLfloat *dest ) +{ + const struct vf_attr *a = vf->attr; + const GLuint attr_count = vf->attr_count; + GLuint j; + + for (j = 0; j < attr_count; j++) { + if (a[j].attrib == attr) { + a[j].extract( &a[j], dest, (GLubyte *)vertex + a[j].vertoffset ); + return; + } + } + + /* Else return the value from ctx->Current. + */ + _mesa_memcpy( dest, dflt, 4*sizeof(GLfloat)); +} + + + + +struct vertex_fetch *vf_create( GLboolean allow_viewport_emits ) +{ + struct vertex_fetch *vf = CALLOC_STRUCT(vertex_fetch); + GLuint i; + + for (i = 0; i < VF_ATTRIB_MAX; i++) + vf->attr[i].vf = vf; + + vf->allow_viewport_emits = allow_viewport_emits; + + switch(CHAN_TYPE) { + case GL_UNSIGNED_BYTE: + vf->chan_scale[0] = 255.0; + vf->chan_scale[1] = 255.0; + vf->chan_scale[2] = 255.0; + vf->chan_scale[3] = 255.0; + break; + case GL_UNSIGNED_SHORT: + vf->chan_scale[0] = 65535.0; + vf->chan_scale[1] = 65535.0; + vf->chan_scale[2] = 65535.0; + vf->chan_scale[3] = 65535.0; + break; + default: + vf->chan_scale[0] = 1.0; + vf->chan_scale[1] = 1.0; + vf->chan_scale[2] = 1.0; + vf->chan_scale[3] = 1.0; + break; + } + + vf->identity[0] = 0.0; + vf->identity[1] = 0.0; + vf->identity[2] = 0.0; + vf->identity[3] = 1.0; + + vf->codegen_emit = NULL; + +#ifdef USE_SSE_ASM + if (!_mesa_getenv("MESA_NO_CODEGEN")) + vf->codegen_emit = vf_generate_sse_emit; +#endif + + return vf; +} + + +void vf_destroy( struct vertex_fetch *vf ) +{ + struct vf_fastpath *fp, *tmp; + + for (fp = vf->fastpath ; fp ; fp = tmp) { + tmp = fp->next; + FREE(fp->attr); + + /* KW: At the moment, fp->func is constrained to be allocated by + * _mesa_exec_alloc(), as the hardwired fastpaths in + * t_vertex_generic.c are handled specially. It would be nice + * to unify them, but this probably won't change until this + * module gets another overhaul. + */ + _mesa_exec_free((void *) fp->func); + FREE(fp); + } + + vf->fastpath = NULL; + FREE(vf); +} diff --git a/src/mesa/vf/vf.h b/src/mesa/vf/vf.h new file mode 100644 index 0000000000..83d7547619 --- /dev/null +++ b/src/mesa/vf/vf.h @@ -0,0 +1,234 @@ +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keithw@tungstengraphics.com> + */ + +#ifndef VF_VERTEX_H +#define VF_VERTEX_H + +#include "main/mtypes.h" +#include "math/m_vector.h" + +enum { + VF_ATTRIB_POS = 0, + VF_ATTRIB_WEIGHT = 1, + VF_ATTRIB_NORMAL = 2, + VF_ATTRIB_COLOR0 = 3, + VF_ATTRIB_COLOR1 = 4, + VF_ATTRIB_FOG = 5, + VF_ATTRIB_COLOR_INDEX = 6, + VF_ATTRIB_EDGEFLAG = 7, + VF_ATTRIB_TEX0 = 8, + VF_ATTRIB_TEX1 = 9, + VF_ATTRIB_TEX2 = 10, + VF_ATTRIB_TEX3 = 11, + VF_ATTRIB_TEX4 = 12, + VF_ATTRIB_TEX5 = 13, + VF_ATTRIB_TEX6 = 14, + VF_ATTRIB_TEX7 = 15, + VF_ATTRIB_VAR0 = 16, + VF_ATTRIB_VAR1 = 17, + VF_ATTRIB_VAR2 = 18, + VF_ATTRIB_VAR3 = 19, + VF_ATTRIB_VAR4 = 20, + VF_ATTRIB_VAR5 = 21, + VF_ATTRIB_VAR6 = 22, + VF_ATTRIB_VAR7 = 23, + VF_ATTRIB_POINTSIZE = 24, + VF_ATTRIB_BFC0 = 25, + VF_ATTRIB_BFC1 = 26, + VF_ATTRIB_CLIP_POS = 27, + VF_ATTRIB_VERTEX_HEADER = 28, + VF_ATTRIB_MAX = 29 +}; + + +enum vf_attr_format { + EMIT_1F, + EMIT_2F, + EMIT_3F, + EMIT_4F, + EMIT_2F_VIEWPORT, /* do viewport transform and emit */ + EMIT_3F_VIEWPORT, /* do viewport transform and emit */ + EMIT_4F_VIEWPORT, /* do viewport transform and emit */ + EMIT_3F_XYW, /* for projective texture */ + EMIT_1UB_1F, /* for fog coordinate */ + EMIT_3UB_3F_RGB, /* for specular color */ + EMIT_3UB_3F_BGR, /* for specular color */ + EMIT_4UB_4F_RGBA, /* for color */ + EMIT_4UB_4F_BGRA, /* for color */ + EMIT_4UB_4F_ARGB, /* for color */ + EMIT_4UB_4F_ABGR, /* for color */ + EMIT_4CHAN_4F_RGBA, /* for swrast color */ + EMIT_PAD, /* leave a hole of 'offset' bytes */ + EMIT_MAX +}; + +struct vf_attr_map { + GLuint attrib; + enum vf_attr_format format; + GLuint offset; +}; + +struct vertex_fetch; + +void vf_set_vp_matrix( struct vertex_fetch *vf, + const GLfloat *viewport ); + +void vf_set_vp_scale_translate( struct vertex_fetch *vf, + const GLfloat *scale, + const GLfloat *translate ); + +GLuint vf_set_vertex_attributes( struct vertex_fetch *vf, + const struct vf_attr_map *map, + GLuint nr, + GLuint vertex_stride ); + +void vf_set_sources( struct vertex_fetch *vf, + GLvector4f * const attrib[], + GLuint start ); + +void vf_emit_vertices( struct vertex_fetch *vf, + GLuint count, + void *dest ); + +void vf_get_attr( struct vertex_fetch *vf, + const void *vertex, + GLenum attr, + const GLfloat *dflt, + GLfloat *dest ); + +struct vertex_fetch *vf_create( GLboolean allow_viewport_emits ); + +void vf_destroy( struct vertex_fetch *vf ); + + + +/*********************************************************************** + * Internal functions and structs: + */ + +struct vf_attr; + +typedef void (*vf_extract_func)( const struct vf_attr *a, + GLfloat *out, + const GLubyte *v ); + +typedef void (*vf_insert_func)( const struct vf_attr *a, + GLubyte *v, + const GLfloat *in ); + +typedef void (*vf_emit_func)( struct vertex_fetch *vf, + GLuint count, + GLubyte *dest ); + + + +/* Describes how to convert/move a vertex attribute from a vertex + * array to a vertex structure. + */ +struct vf_attr +{ + struct vertex_fetch *vf; + + GLuint format; + GLuint inputsize; + GLuint inputstride; + GLuint vertoffset; /* position of the attrib in the vertex struct */ + + GLuint attrib; /* which vertex attrib (0=position, etc) */ + GLuint vertattrsize; /* size of the attribute in bytes */ + + GLubyte *inputptr; + const vf_insert_func *insert; + vf_insert_func do_insert; + vf_extract_func extract; +}; + +struct vertex_fetch +{ + struct vf_attr attr[VF_ATTRIB_MAX]; + GLuint attr_count; + GLuint vertex_stride; + + struct vf_attr *lookup[VF_ATTRIB_MAX]; + + vf_emit_func emit; + + /* Parameters and constants for codegen: + */ + GLboolean allow_viewport_emits; + GLfloat vp[8]; + GLfloat chan_scale[4]; + GLfloat identity[4]; + + struct vf_fastpath *fastpath; + + void (*codegen_emit)( struct vertex_fetch *vf ); +}; + + +struct vf_attr_type { + GLuint format; + GLuint size; + GLuint stride; + GLuint offset; +}; + +struct vf_fastpath { + GLuint vertex_stride; + GLuint attr_count; + GLboolean match_strides; + + struct vf_attr_type *attr; + + vf_emit_func func; + struct vf_fastpath *next; +}; + + +void vf_register_fastpath( struct vertex_fetch *vtx, + GLboolean match_strides ); + +void vf_generic_emit( struct vertex_fetch *vf, + GLuint count, + GLubyte *v ); + +void vf_generate_hardwired_emit( struct vertex_fetch *vf ); + +void vf_generate_sse_emit( struct vertex_fetch *vf ); + + +struct vf_format_info { + const char *name; + vf_extract_func extract; + vf_insert_func insert[4]; + const GLuint attrsize; +}; + +const struct vf_format_info vf_format_info[EMIT_MAX]; + + +#endif diff --git a/src/mesa/vf/vf_generic.c b/src/mesa/vf/vf_generic.c new file mode 100644 index 0000000000..baa00af29a --- /dev/null +++ b/src/mesa/vf/vf_generic.c @@ -0,0 +1,981 @@ + +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keithw@tungstengraphics.com> + */ + +#include "main/glheader.h" +#include "main/context.h" +#include "main/colormac.h" +#include "main/simple_list.h" + +#include "vf/vf.h" + + +/* + * These functions take the NDC coordinates pointed to by 'in', apply the + * NDC->Viewport mapping and store the results at 'v'. + */ + +static INLINE void insert_4f_viewport_4( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)v; + const GLfloat *scale = a->vf->vp; + const GLfloat *trans = a->vf->vp + 4; + + out[0] = scale[0] * in[0] + trans[0]; + out[1] = scale[1] * in[1] + trans[1]; + out[2] = scale[2] * in[2] + trans[2]; + out[3] = in[3]; +} + +static INLINE void insert_4f_viewport_3( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)v; + const GLfloat *scale = a->vf->vp; + const GLfloat *trans = a->vf->vp + 4; + + out[0] = scale[0] * in[0] + trans[0]; + out[1] = scale[1] * in[1] + trans[1]; + out[2] = scale[2] * in[2] + trans[2]; + out[3] = 1; +} + +static INLINE void insert_4f_viewport_2( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)v; + const GLfloat *scale = a->vf->vp; + const GLfloat *trans = a->vf->vp + 4; + + out[0] = scale[0] * in[0] + trans[0]; + out[1] = scale[1] * in[1] + trans[1]; + out[2] = trans[2]; + out[3] = 1; +} + +static INLINE void insert_4f_viewport_1( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)v; + const GLfloat *scale = a->vf->vp; + const GLfloat *trans = a->vf->vp + 4; + + out[0] = scale[0] * in[0] + trans[0]; + out[1] = trans[1]; + out[2] = trans[2]; + out[3] = 1; +} + +static INLINE void insert_3f_viewport_3( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)v; + const GLfloat *scale = a->vf->vp; + const GLfloat *trans = a->vf->vp + 4; + + out[0] = scale[0] * in[0] + trans[0]; + out[1] = scale[1] * in[1] + trans[1]; + out[2] = scale[2] * in[2] + trans[2]; +} + +static INLINE void insert_3f_viewport_2( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)v; + const GLfloat *scale = a->vf->vp; + const GLfloat *trans = a->vf->vp + 4; + + out[0] = scale[0] * in[0] + trans[0]; + out[1] = scale[1] * in[1] + trans[1]; + out[2] = scale[2] * in[2] + trans[2]; +} + +static INLINE void insert_3f_viewport_1( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)v; + const GLfloat *scale = a->vf->vp; + const GLfloat *trans = a->vf->vp + 4; + + out[0] = scale[0] * in[0] + trans[0]; + out[1] = trans[1]; + out[2] = trans[2]; +} + +static INLINE void insert_2f_viewport_2( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)v; + const GLfloat *scale = a->vf->vp; + const GLfloat *trans = a->vf->vp + 4; + + out[0] = scale[0] * in[0] + trans[0]; + out[1] = scale[1] * in[1] + trans[1]; +} + +static INLINE void insert_2f_viewport_1( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)v; + const GLfloat *scale = a->vf->vp; + const GLfloat *trans = a->vf->vp + 4; + + out[0] = scale[0] * in[0] + trans[0]; + out[1] = trans[1]; +} + + +/* + * These functions do the same as above, except for the viewport mapping. + */ + +static INLINE void insert_4f_4( const struct vf_attr *a, GLubyte *v, const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +static INLINE void insert_4f_3( const struct vf_attr *a, GLubyte *v, const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = 1; +} + +static INLINE void insert_4f_2( const struct vf_attr *a, GLubyte *v, const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = 0; + out[3] = 1; +} + +static INLINE void insert_4f_1( const struct vf_attr *a, GLubyte *v, const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)(v); + (void) a; + + out[0] = in[0]; + out[1] = 0; + out[2] = 0; + out[3] = 1; +} + +static INLINE void insert_3f_xyw_4( const struct vf_attr *a, GLubyte *v, const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[3]; +} + +static INLINE void insert_3f_xyw_err( const struct vf_attr *a, GLubyte *v, const GLfloat *in ) +{ + (void) a; (void) v; (void) in; + _mesa_exit(1); +} + +static INLINE void insert_3f_3( const struct vf_attr *a, GLubyte *v, const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; +} + +static INLINE void insert_3f_2( const struct vf_attr *a, GLubyte *v, const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = 0; +} + +static INLINE void insert_3f_1( const struct vf_attr *a, GLubyte *v, const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)(v); + (void) a; + + out[0] = in[0]; + out[1] = 0; + out[2] = 0; +} + + +static INLINE void insert_2f_2( const struct vf_attr *a, GLubyte *v, const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; +} + +static INLINE void insert_2f_1( const struct vf_attr *a, GLubyte *v, const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)(v); + (void) a; + + out[0] = in[0]; + out[1] = 0; +} + +static INLINE void insert_1f_1( const struct vf_attr *a, GLubyte *v, const GLfloat *in ) +{ + GLfloat *out = (GLfloat *)(v); + (void) a; + + out[0] = in[0]; +} + +static INLINE void insert_null( const struct vf_attr *a, GLubyte *v, const GLfloat *in ) +{ + (void) a; (void) v; (void) in; +} + +static INLINE void insert_4chan_4f_rgba_4( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + GLchan *c = (GLchan *)v; + (void) a; + UNCLAMPED_FLOAT_TO_CHAN(c[0], in[0]); + UNCLAMPED_FLOAT_TO_CHAN(c[1], in[1]); + UNCLAMPED_FLOAT_TO_CHAN(c[2], in[2]); + UNCLAMPED_FLOAT_TO_CHAN(c[3], in[3]); +} + +static INLINE void insert_4chan_4f_rgba_3( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + GLchan *c = (GLchan *)v; + (void) a; + UNCLAMPED_FLOAT_TO_CHAN(c[0], in[0]); + UNCLAMPED_FLOAT_TO_CHAN(c[1], in[1]); + UNCLAMPED_FLOAT_TO_CHAN(c[2], in[2]); + c[3] = CHAN_MAX; +} + +static INLINE void insert_4chan_4f_rgba_2( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + GLchan *c = (GLchan *)v; + (void) a; + UNCLAMPED_FLOAT_TO_CHAN(c[0], in[0]); + UNCLAMPED_FLOAT_TO_CHAN(c[1], in[1]); + c[2] = 0; + c[3] = CHAN_MAX; +} + +static INLINE void insert_4chan_4f_rgba_1( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + GLchan *c = (GLchan *)v; + (void) a; + UNCLAMPED_FLOAT_TO_CHAN(c[0], in[0]); + c[1] = 0; + c[2] = 0; + c[3] = CHAN_MAX; +} + +static INLINE void insert_4ub_4f_rgba_4( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]); +} + +static INLINE void insert_4ub_4f_rgba_3( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_rgba_2( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + v[2] = 0; + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_rgba_1( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + v[1] = 0; + v[2] = 0; + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_bgra_4( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]); +} + +static INLINE void insert_4ub_4f_bgra_3( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_bgra_2( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + v[0] = 0; + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_bgra_1( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + v[1] = 0; + v[0] = 0; + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_argb_4( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]); +} + +static INLINE void insert_4ub_4f_argb_3( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]); + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_argb_2( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + v[3] = 0x00; + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_argb_1( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); + v[2] = 0x00; + v[3] = 0x00; + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_abgr_4( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]); +} + +static INLINE void insert_4ub_4f_abgr_3( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]); + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_abgr_2( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + v[1] = 0x00; + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_abgr_1( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); + v[2] = 0x00; + v[1] = 0x00; + v[0] = 0xff; +} + +static INLINE void insert_3ub_3f_rgb_3( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); +} + +static INLINE void insert_3ub_3f_rgb_2( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + v[2] = 0; +} + +static INLINE void insert_3ub_3f_rgb_1( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + v[1] = 0; + v[2] = 0; +} + +static INLINE void insert_3ub_3f_bgr_3( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); +} + +static INLINE void insert_3ub_3f_bgr_2( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + v[0] = 0; +} + +static INLINE void insert_3ub_3f_bgr_1( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + v[1] = 0; + v[0] = 0; +} + + +static INLINE void insert_1ub_1f_1( const struct vf_attr *a, GLubyte *v, + const GLfloat *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); +} + + +/*********************************************************************** + * Functions to perform the reverse operations to the above, for + * swrast translation and clip-interpolation. + * + * Currently always extracts a full 4 floats. + */ + +static void extract_4f_viewport( const struct vf_attr *a, GLfloat *out, + const GLubyte *v ) +{ + const GLfloat *in = (const GLfloat *)v; + const GLfloat *scale = a->vf->vp; + const GLfloat *trans = a->vf->vp + 4; + + /* Although included for completeness, the position coordinate is + * usually handled differently during clipping. + */ + out[0] = (in[0] - trans[0]) / scale[0]; + out[1] = (in[1] - trans[1]) / scale[1]; + out[2] = (in[2] - trans[2]) / scale[2]; + out[3] = in[3]; +} + +static void extract_3f_viewport( const struct vf_attr *a, GLfloat *out, + const GLubyte *v ) +{ + const GLfloat *in = (const GLfloat *)v; + const GLfloat *scale = a->vf->vp; + const GLfloat *trans = a->vf->vp + 4; + + out[0] = (in[0] - trans[0]) / scale[0]; + out[1] = (in[1] - trans[1]) / scale[1]; + out[2] = (in[2] - trans[2]) / scale[2]; + out[3] = 1; +} + + +static void extract_2f_viewport( const struct vf_attr *a, GLfloat *out, + const GLubyte *v ) +{ + const GLfloat *in = (const GLfloat *)v; + const GLfloat *scale = a->vf->vp; + const GLfloat *trans = a->vf->vp + 4; + + out[0] = (in[0] - trans[0]) / scale[0]; + out[1] = (in[1] - trans[1]) / scale[1]; + out[2] = 0; + out[3] = 1; +} + + +static void extract_4f( const struct vf_attr *a, GLfloat *out, const GLubyte *v ) +{ + const GLfloat *in = (const GLfloat *)v; + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +static void extract_3f_xyw( const struct vf_attr *a, GLfloat *out, const GLubyte *v ) +{ + const GLfloat *in = (const GLfloat *)v; + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = 0; + out[3] = in[2]; +} + + +static void extract_3f( const struct vf_attr *a, GLfloat *out, const GLubyte *v ) +{ + const GLfloat *in = (const GLfloat *)v; + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = 1; +} + + +static void extract_2f( const struct vf_attr *a, GLfloat *out, const GLubyte *v ) +{ + const GLfloat *in = (const GLfloat *)v; + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = 0; + out[3] = 1; +} + +static void extract_1f( const struct vf_attr *a, GLfloat *out, const GLubyte *v ) +{ + const GLfloat *in = (const GLfloat *)v; + (void) a; + + out[0] = in[0]; + out[1] = 0; + out[2] = 0; + out[3] = 1; +} + +static void extract_4chan_4f_rgba( const struct vf_attr *a, GLfloat *out, + const GLubyte *v ) +{ + GLchan *c = (GLchan *)v; + (void) a; + + out[0] = CHAN_TO_FLOAT(c[0]); + out[1] = CHAN_TO_FLOAT(c[1]); + out[2] = CHAN_TO_FLOAT(c[2]); + out[3] = CHAN_TO_FLOAT(c[3]); +} + +static void extract_4ub_4f_rgba( const struct vf_attr *a, GLfloat *out, + const GLubyte *v ) +{ + (void) a; + out[0] = UBYTE_TO_FLOAT(v[0]); + out[1] = UBYTE_TO_FLOAT(v[1]); + out[2] = UBYTE_TO_FLOAT(v[2]); + out[3] = UBYTE_TO_FLOAT(v[3]); +} + +static void extract_4ub_4f_bgra( const struct vf_attr *a, GLfloat *out, + const GLubyte *v ) +{ + (void) a; + out[2] = UBYTE_TO_FLOAT(v[0]); + out[1] = UBYTE_TO_FLOAT(v[1]); + out[0] = UBYTE_TO_FLOAT(v[2]); + out[3] = UBYTE_TO_FLOAT(v[3]); +} + +static void extract_4ub_4f_argb( const struct vf_attr *a, GLfloat *out, + const GLubyte *v ) +{ + (void) a; + out[3] = UBYTE_TO_FLOAT(v[0]); + out[0] = UBYTE_TO_FLOAT(v[1]); + out[1] = UBYTE_TO_FLOAT(v[2]); + out[2] = UBYTE_TO_FLOAT(v[3]); +} + +static void extract_4ub_4f_abgr( const struct vf_attr *a, GLfloat *out, + const GLubyte *v ) +{ + (void) a; + out[3] = UBYTE_TO_FLOAT(v[0]); + out[2] = UBYTE_TO_FLOAT(v[1]); + out[1] = UBYTE_TO_FLOAT(v[2]); + out[0] = UBYTE_TO_FLOAT(v[3]); +} + +static void extract_3ub_3f_rgb( const struct vf_attr *a, GLfloat *out, + const GLubyte *v ) +{ + (void) a; + out[0] = UBYTE_TO_FLOAT(v[0]); + out[1] = UBYTE_TO_FLOAT(v[1]); + out[2] = UBYTE_TO_FLOAT(v[2]); + out[3] = 1; +} + +static void extract_3ub_3f_bgr( const struct vf_attr *a, GLfloat *out, + const GLubyte *v ) +{ + (void) a; + out[2] = UBYTE_TO_FLOAT(v[0]); + out[1] = UBYTE_TO_FLOAT(v[1]); + out[0] = UBYTE_TO_FLOAT(v[2]); + out[3] = 1; +} + +static void extract_1ub_1f( const struct vf_attr *a, GLfloat *out, const GLubyte *v ) +{ + (void) a; + out[0] = UBYTE_TO_FLOAT(v[0]); + out[1] = 0; + out[2] = 0; + out[3] = 1; +} + + +const struct vf_format_info vf_format_info[EMIT_MAX] = +{ + { "1f", + extract_1f, + { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 }, + sizeof(GLfloat) }, + + { "2f", + extract_2f, + { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 }, + 2 * sizeof(GLfloat) }, + + { "3f", + extract_3f, + { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 }, + 3 * sizeof(GLfloat) }, + + { "4f", + extract_4f, + { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 }, + 4 * sizeof(GLfloat) }, + + { "2f_viewport", + extract_2f_viewport, + { insert_2f_viewport_1, insert_2f_viewport_2, insert_2f_viewport_2, + insert_2f_viewport_2 }, + 2 * sizeof(GLfloat) }, + + { "3f_viewport", + extract_3f_viewport, + { insert_3f_viewport_1, insert_3f_viewport_2, insert_3f_viewport_3, + insert_3f_viewport_3 }, + 3 * sizeof(GLfloat) }, + + { "4f_viewport", + extract_4f_viewport, + { insert_4f_viewport_1, insert_4f_viewport_2, insert_4f_viewport_3, + insert_4f_viewport_4 }, + 4 * sizeof(GLfloat) }, + + { "3f_xyw", + extract_3f_xyw, + { insert_3f_xyw_err, insert_3f_xyw_err, insert_3f_xyw_err, + insert_3f_xyw_4 }, + 3 * sizeof(GLfloat) }, + + { "1ub_1f", + extract_1ub_1f, + { insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1 }, + sizeof(GLubyte) }, + + { "3ub_3f_rgb", + extract_3ub_3f_rgb, + { insert_3ub_3f_rgb_1, insert_3ub_3f_rgb_2, insert_3ub_3f_rgb_3, + insert_3ub_3f_rgb_3 }, + 3 * sizeof(GLubyte) }, + + { "3ub_3f_bgr", + extract_3ub_3f_bgr, + { insert_3ub_3f_bgr_1, insert_3ub_3f_bgr_2, insert_3ub_3f_bgr_3, + insert_3ub_3f_bgr_3 }, + 3 * sizeof(GLubyte) }, + + { "4ub_4f_rgba", + extract_4ub_4f_rgba, + { insert_4ub_4f_rgba_1, insert_4ub_4f_rgba_2, insert_4ub_4f_rgba_3, + insert_4ub_4f_rgba_4 }, + 4 * sizeof(GLubyte) }, + + { "4ub_4f_bgra", + extract_4ub_4f_bgra, + { insert_4ub_4f_bgra_1, insert_4ub_4f_bgra_2, insert_4ub_4f_bgra_3, + insert_4ub_4f_bgra_4 }, + 4 * sizeof(GLubyte) }, + + { "4ub_4f_argb", + extract_4ub_4f_argb, + { insert_4ub_4f_argb_1, insert_4ub_4f_argb_2, insert_4ub_4f_argb_3, + insert_4ub_4f_argb_4 }, + 4 * sizeof(GLubyte) }, + + { "4ub_4f_abgr", + extract_4ub_4f_abgr, + { insert_4ub_4f_abgr_1, insert_4ub_4f_abgr_2, insert_4ub_4f_abgr_3, + insert_4ub_4f_abgr_4 }, + 4 * sizeof(GLubyte) }, + + { "4chan_4f_rgba", + extract_4chan_4f_rgba, + { insert_4chan_4f_rgba_1, insert_4chan_4f_rgba_2, insert_4chan_4f_rgba_3, + insert_4chan_4f_rgba_4 }, + 4 * sizeof(GLchan) }, + + { "pad", + NULL, + { NULL, NULL, NULL, NULL }, + 0 } + +}; + + + + +/*********************************************************************** + * Hardwired fastpaths for emitting whole vertices or groups of + * vertices + */ +#define EMIT5(NR, F0, F1, F2, F3, F4, NAME) \ +static void NAME( struct vertex_fetch *vf, \ + GLuint count, \ + GLubyte *v ) \ +{ \ + struct vf_attr *a = vf->attr; \ + GLuint i; \ + \ + for (i = 0 ; i < count ; i++, v += vf->vertex_stride) { \ + if (NR > 0) { \ + F0( &a[0], v + a[0].vertoffset, (GLfloat *)a[0].inputptr ); \ + a[0].inputptr += a[0].inputstride; \ + } \ + \ + if (NR > 1) { \ + F1( &a[1], v + a[1].vertoffset, (GLfloat *)a[1].inputptr ); \ + a[1].inputptr += a[1].inputstride; \ + } \ + \ + if (NR > 2) { \ + F2( &a[2], v + a[2].vertoffset, (GLfloat *)a[2].inputptr ); \ + a[2].inputptr += a[2].inputstride; \ + } \ + \ + if (NR > 3) { \ + F3( &a[3], v + a[3].vertoffset, (GLfloat *)a[3].inputptr ); \ + a[3].inputptr += a[3].inputstride; \ + } \ + \ + if (NR > 4) { \ + F4( &a[4], v + a[4].vertoffset, (GLfloat *)a[4].inputptr ); \ + a[4].inputptr += a[4].inputstride; \ + } \ + } \ +} + + +#define EMIT2(F0, F1, NAME) EMIT5(2, F0, F1, insert_null, \ + insert_null, insert_null, NAME) + +#define EMIT3(F0, F1, F2, NAME) EMIT5(3, F0, F1, F2, insert_null, \ + insert_null, NAME) + +#define EMIT4(F0, F1, F2, F3, NAME) EMIT5(4, F0, F1, F2, F3, \ + insert_null, NAME) + + +EMIT2(insert_3f_viewport_3, insert_4ub_4f_rgba_4, emit_viewport3_rgba4) +EMIT2(insert_3f_viewport_3, insert_4ub_4f_bgra_4, emit_viewport3_bgra4) +EMIT2(insert_3f_3, insert_4ub_4f_rgba_4, emit_xyz3_rgba4) + +EMIT3(insert_4f_viewport_4, insert_4ub_4f_rgba_4, insert_2f_2, emit_viewport4_rgba4_st2) +EMIT3(insert_4f_viewport_4, insert_4ub_4f_bgra_4, insert_2f_2, emit_viewport4_bgra4_st2) +EMIT3(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, emit_xyzw4_rgba4_st2) + +EMIT4(insert_4f_viewport_4, insert_4ub_4f_rgba_4, insert_2f_2, insert_2f_2, emit_viewport4_rgba4_st2_st2) +EMIT4(insert_4f_viewport_4, insert_4ub_4f_bgra_4, insert_2f_2, insert_2f_2, emit_viewport4_bgra4_st2_st2) +EMIT4(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, insert_2f_2, emit_xyzw4_rgba4_st2_st2) + + +/* Use the codegen paths to select one of a number of hardwired + * fastpaths. + */ +void vf_generate_hardwired_emit( struct vertex_fetch *vf ) +{ + vf_emit_func func = NULL; + + /* Does it fit a hardwired fastpath? Help! this is growing out of + * control! + */ + switch (vf->attr_count) { + case 2: + if (vf->attr[0].do_insert == insert_3f_viewport_3) { + if (vf->attr[1].do_insert == insert_4ub_4f_bgra_4) + func = emit_viewport3_bgra4; + else if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) + func = emit_viewport3_rgba4; + } + else if (vf->attr[0].do_insert == insert_3f_3 && + vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { + func = emit_xyz3_rgba4; + } + break; + case 3: + if (vf->attr[2].do_insert == insert_2f_2) { + if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { + if (vf->attr[0].do_insert == insert_4f_viewport_4) + func = emit_viewport4_rgba4_st2; + else if (vf->attr[0].do_insert == insert_4f_4) + func = emit_xyzw4_rgba4_st2; + } + else if (vf->attr[1].do_insert == insert_4ub_4f_bgra_4 && + vf->attr[0].do_insert == insert_4f_viewport_4) + func = emit_viewport4_bgra4_st2; + } + break; + case 4: + if (vf->attr[2].do_insert == insert_2f_2 && + vf->attr[3].do_insert == insert_2f_2) { + if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { + if (vf->attr[0].do_insert == insert_4f_viewport_4) + func = emit_viewport4_rgba4_st2_st2; + else if (vf->attr[0].do_insert == insert_4f_4) + func = emit_xyzw4_rgba4_st2_st2; + } + else if (vf->attr[1].do_insert == insert_4ub_4f_bgra_4 && + vf->attr[0].do_insert == insert_4f_viewport_4) + func = emit_viewport4_bgra4_st2_st2; + } + break; + } + + vf->emit = func; +} + +/*********************************************************************** + * Generic (non-codegen) functions for whole vertices or groups of + * vertices + */ + +void vf_generic_emit( struct vertex_fetch *vf, + GLuint count, + GLubyte *v ) +{ + struct vf_attr *a = vf->attr; + const GLuint attr_count = vf->attr_count; + const GLuint stride = vf->vertex_stride; + GLuint i, j; + + for (i = 0 ; i < count ; i++, v += stride) { + for (j = 0; j < attr_count; j++) { + GLfloat *in = (GLfloat *)a[j].inputptr; + a[j].inputptr += a[j].inputstride; + a[j].do_insert( &a[j], v + a[j].vertoffset, in ); + } + } +} + + diff --git a/src/mesa/vf/vf_sse.c b/src/mesa/vf/vf_sse.c new file mode 100644 index 0000000000..4d70196ffe --- /dev/null +++ b/src/mesa/vf/vf_sse.c @@ -0,0 +1,662 @@ +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell <keithw@tungstengraphics.com> + */ + +#include "main/glheader.h" +#include "main/colormac.h" +#include "main/simple_list.h" +#include "main/enums.h" + +#include "vf/vf.h" + +#if defined(USE_SSE_ASM) + +#include "x86/rtasm/x86sse.h" +#include "x86/common_x86_asm.h" + + +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 + + +struct x86_program { + struct x86_function func; + + struct vertex_fetch *vf; + GLboolean inputs_safe; + GLboolean outputs_safe; + GLboolean have_sse2; + + struct x86_reg identity; + struct x86_reg chan0; +}; + + +static struct x86_reg get_identity( struct x86_program *p ) +{ + return p->identity; +} + +static void emit_load4f_4( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movups(&p->func, dest, arg0); +} + +static void emit_load4f_3( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + /* Have to jump through some hoops: + * + * c 0 0 0 + * c 0 0 1 + * 0 0 c 1 + * a b c 1 + */ + sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); + sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); + sse_shufps(&p->func, dest, dest, SHUF(Y,Z,X,W) ); + sse_movlps(&p->func, dest, arg0); +} + +static void emit_load4f_2( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + /* Initialize from identity, then pull in low two words: + */ + sse_movups(&p->func, dest, get_identity(p)); + sse_movlps(&p->func, dest, arg0); +} + +static void emit_load4f_1( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + /* Pull in low word, then swizzle in identity */ + sse_movss(&p->func, dest, arg0); + sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); +} + + + +static void emit_load3f_3( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + /* Over-reads by 1 dword - potential SEGV if input is a vertex + * array. + */ + if (p->inputs_safe) { + sse_movups(&p->func, dest, arg0); + } + else { + /* c 0 0 0 + * c c c c + * a b c c + */ + sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); + sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X)); + sse_movlps(&p->func, dest, arg0); + } +} + +static void emit_load3f_2( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + emit_load4f_2(p, dest, arg0); +} + +static void emit_load3f_1( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + emit_load4f_1(p, dest, arg0); +} + +static void emit_load2f_2( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movlps(&p->func, dest, arg0); +} + +static void emit_load2f_1( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + emit_load4f_1(p, dest, arg0); +} + +static void emit_load1f_1( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movss(&p->func, dest, arg0); +} + +static void (*load[4][4])( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) = { + { emit_load1f_1, + emit_load1f_1, + emit_load1f_1, + emit_load1f_1 }, + + { emit_load2f_1, + emit_load2f_2, + emit_load2f_2, + emit_load2f_2 }, + + { emit_load3f_1, + emit_load3f_2, + emit_load3f_3, + emit_load3f_3 }, + + { emit_load4f_1, + emit_load4f_2, + emit_load4f_3, + emit_load4f_4 } +}; + +static void emit_load( struct x86_program *p, + struct x86_reg dest, + GLuint sz, + struct x86_reg src, + GLuint src_sz) +{ + load[sz-1][src_sz-1](p, dest, src); +} + +static void emit_store4f( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movups(&p->func, dest, arg0); +} + +static void emit_store3f( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + if (p->outputs_safe) { + /* Emit the extra dword anyway. This may hurt writecombining, + * may cause other problems. + */ + sse_movups(&p->func, dest, arg0); + } + else { + /* Alternate strategy - emit two, shuffle, emit one. + */ + sse_movlps(&p->func, dest, arg0); + sse_shufps(&p->func, arg0, arg0, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ + sse_movss(&p->func, x86_make_disp(dest,8), arg0); + } +} + +static void emit_store2f( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movlps(&p->func, dest, arg0); +} + +static void emit_store1f( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movss(&p->func, dest, arg0); +} + + +static void (*store[4])( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) = +{ + emit_store1f, + emit_store2f, + emit_store3f, + emit_store4f +}; + +static void emit_store( struct x86_program *p, + struct x86_reg dest, + GLuint sz, + struct x86_reg temp ) + +{ + store[sz-1](p, dest, temp); +} + +static void emit_pack_store_4ub( struct x86_program *p, + struct x86_reg dest, + struct x86_reg temp ) +{ + /* Scale by 255.0 + */ + sse_mulps(&p->func, temp, p->chan0); + + if (p->have_sse2) { + sse2_cvtps2dq(&p->func, temp, temp); + sse2_packssdw(&p->func, temp, temp); + sse2_packuswb(&p->func, temp, temp); + sse_movss(&p->func, dest, temp); + } + else { + struct x86_reg mmx0 = x86_make_reg(file_MMX, 0); + struct x86_reg mmx1 = x86_make_reg(file_MMX, 1); + sse_cvtps2pi(&p->func, mmx0, temp); + sse_movhlps(&p->func, temp, temp); + sse_cvtps2pi(&p->func, mmx1, temp); + mmx_packssdw(&p->func, mmx0, mmx1); + mmx_packuswb(&p->func, mmx0, mmx0); + mmx_movd(&p->func, dest, mmx0); + } +} + +static GLint get_offset( const void *a, const void *b ) +{ + return (const char *)b - (const char *)a; +} + +/* Not much happens here. Eventually use this function to try and + * avoid saving/reloading the source pointers each vertex (if some of + * them can fit in registers). + */ +static void get_src_ptr( struct x86_program *p, + struct x86_reg srcREG, + struct x86_reg vfREG, + struct vf_attr *a ) +{ + struct vertex_fetch *vf = p->vf; + struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr)); + + /* Load current a[j].inputptr + */ + x86_mov(&p->func, srcREG, ptr_to_src); +} + +static void update_src_ptr( struct x86_program *p, + struct x86_reg srcREG, + struct x86_reg vfREG, + struct vf_attr *a ) +{ + if (a->inputstride) { + struct vertex_fetch *vf = p->vf; + struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr)); + + /* add a[j].inputstride (hardcoded value - could just as easily + * pull the stride value from memory each time). + */ + x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride)); + + /* save new value of a[j].inputptr + */ + x86_mov(&p->func, ptr_to_src, srcREG); + } +} + + +/* Lots of hardcoding + * + * EAX -- pointer to current output vertex + * ECX -- pointer to current attribute + * + */ +static GLboolean build_vertex_emit( struct x86_program *p ) +{ + struct vertex_fetch *vf = p->vf; + GLuint j = 0; + + struct x86_reg vertexEAX = x86_make_reg(file_REG32, reg_AX); + struct x86_reg srcECX = x86_make_reg(file_REG32, reg_CX); + struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP); + struct x86_reg vfESI = x86_make_reg(file_REG32, reg_SI); + struct x86_reg temp = x86_make_reg(file_XMM, 0); + struct x86_reg vp0 = x86_make_reg(file_XMM, 1); + struct x86_reg vp1 = x86_make_reg(file_XMM, 2); + GLubyte *fixup, *label; + + /* Push a few regs? + */ + x86_push(&p->func, countEBP); + x86_push(&p->func, vfESI); + + + /* Get vertex count, compare to zero + */ + x86_xor(&p->func, srcECX, srcECX); + x86_mov(&p->func, countEBP, x86_fn_arg(&p->func, 2)); + x86_cmp(&p->func, countEBP, srcECX); + fixup = x86_jcc_forward(&p->func, cc_E); + + /* Initialize destination register. + */ + x86_mov(&p->func, vertexEAX, x86_fn_arg(&p->func, 3)); + + /* Move argument 1 (vf) into a reg: + */ + x86_mov(&p->func, vfESI, x86_fn_arg(&p->func, 1)); + + + /* Possibly load vp0, vp1 for viewport calcs: + */ + if (vf->allow_viewport_emits) { + sse_movups(&p->func, vp0, x86_make_disp(vfESI, get_offset(vf, &vf->vp[0]))); + sse_movups(&p->func, vp1, x86_make_disp(vfESI, get_offset(vf, &vf->vp[4]))); + } + + /* always load, needed or not: + */ + sse_movups(&p->func, p->chan0, x86_make_disp(vfESI, get_offset(vf, &vf->chan_scale[0]))); + sse_movups(&p->func, p->identity, x86_make_disp(vfESI, get_offset(vf, &vf->identity[0]))); + + /* Note address for loop jump */ + label = x86_get_label(&p->func); + + /* Emit code for each of the attributes. Currently routes + * everything through SSE registers, even when it might be more + * efficient to stick with regular old x86. No optimization or + * other tricks - enough new ground to cover here just getting + * things working. + */ + while (j < vf->attr_count) { + struct vf_attr *a = &vf->attr[j]; + struct x86_reg dest = x86_make_disp(vertexEAX, a->vertoffset); + + /* Now, load an XMM reg from src, perhaps transform, then save. + * Could be shortcircuited in specific cases: + */ + switch (a->format) { + case EMIT_1F: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 1, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case EMIT_2F: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 2, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case EMIT_3F: + /* Potentially the worst case - hardcode 2+1 copying: + */ + if (0) { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 3, temp); + update_src_ptr(p, srcECX, vfESI, a); + } + else { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 2, temp); + if (a->inputsize > 2) { + emit_load(p, temp, 1, x86_make_disp(srcECX, 8), 1); + emit_store(p, x86_make_disp(dest,8), 1, temp); + } + else { + sse_movss(&p->func, x86_make_disp(dest,8), get_identity(p)); + } + update_src_ptr(p, srcECX, vfESI, a); + } + break; + case EMIT_4F: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 4, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case EMIT_2F_VIEWPORT: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); + sse_mulps(&p->func, temp, vp0); + sse_addps(&p->func, temp, vp1); + emit_store(p, dest, 2, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case EMIT_3F_VIEWPORT: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); + sse_mulps(&p->func, temp, vp0); + sse_addps(&p->func, temp, vp1); + emit_store(p, dest, 3, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case EMIT_4F_VIEWPORT: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + sse_mulps(&p->func, temp, vp0); + sse_addps(&p->func, temp, vp1); + emit_store(p, dest, 4, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case EMIT_3F_XYW: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(X,Y,W,Z)); + emit_store(p, dest, 3, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + + case EMIT_1UB_1F: + /* Test for PAD3 + 1UB: + */ + if (j > 0 && + a[-1].vertoffset + a[-1].vertattrsize <= a->vertoffset - 3) + { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(X,X,X,X)); + emit_pack_store_4ub(p, x86_make_disp(dest, -3), temp); /* overkill! */ + update_src_ptr(p, srcECX, vfESI, a); + } + else { + _mesa_printf("Can't emit 1ub %x %x %d\n", a->vertoffset, a[-1].vertoffset, a[-1].vertattrsize ); + return GL_FALSE; + } + break; + case EMIT_3UB_3F_RGB: + case EMIT_3UB_3F_BGR: + /* Test for 3UB + PAD1: + */ + if (j == vf->attr_count - 1 || + a[1].vertoffset >= a->vertoffset + 4) { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); + if (a->format == EMIT_3UB_3F_BGR) + sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W)); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + } + /* Test for 3UB + 1UB: + */ + else if (j < vf->attr_count - 1 && + a[1].format == EMIT_1UB_1F && + a[1].vertoffset == a->vertoffset + 3) { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); + update_src_ptr(p, srcECX, vfESI, a); + + /* Make room for incoming value: + */ + sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); + + get_src_ptr(p, srcECX, vfESI, &a[1]); + emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize); + update_src_ptr(p, srcECX, vfESI, &a[1]); + + /* Rearrange and possibly do BGR conversion: + */ + if (a->format == EMIT_3UB_3F_BGR) + sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X)); + else + sse_shufps(&p->func, temp, temp, SHUF(Y,Z,W,X)); + + emit_pack_store_4ub(p, dest, temp); + j++; /* NOTE: two attrs consumed */ + } + else { + _mesa_printf("Can't emit 3ub\n"); + } + return GL_FALSE; /* add this later */ + break; + + case EMIT_4UB_4F_RGBA: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case EMIT_4UB_4F_BGRA: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W)); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case EMIT_4UB_4F_ARGB: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case EMIT_4UB_4F_ABGR: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X)); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case EMIT_4CHAN_4F_RGBA: + switch (CHAN_TYPE) { + case GL_UNSIGNED_BYTE: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case GL_FLOAT: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 4, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case GL_UNSIGNED_SHORT: + default: + _mesa_printf("unknown CHAN_TYPE %s\n", _mesa_lookup_enum_by_nr(CHAN_TYPE)); + return GL_FALSE; + } + break; + default: + _mesa_printf("unknown a[%d].format %d\n", j, a->format); + return GL_FALSE; /* catch any new opcodes */ + } + + /* Increment j by at least 1 - may have been incremented above also: + */ + j++; + } + + /* Next vertex: + */ + x86_lea(&p->func, vertexEAX, x86_make_disp(vertexEAX, vf->vertex_stride)); + + /* decr count, loop if not zero + */ + x86_dec(&p->func, countEBP); + x86_test(&p->func, countEBP, countEBP); + x86_jcc(&p->func, cc_NZ, label); + + /* Exit mmx state? + */ + if (p->func.need_emms) + mmx_emms(&p->func); + + /* Land forward jump here: + */ + x86_fixup_fwd_jump(&p->func, fixup); + + /* Pop regs and return + */ + x86_pop(&p->func, x86_get_base_reg(vfESI)); + x86_pop(&p->func, countEBP); + x86_ret(&p->func); + + vf->emit = (vf_emit_func)x86_get_func(&p->func); + return GL_TRUE; +} + + + +void vf_generate_sse_emit( struct vertex_fetch *vf ) +{ + struct x86_program p; + + if (!cpu_has_xmm) { + vf->codegen_emit = NULL; + return; + } + + _mesa_memset(&p, 0, sizeof(p)); + + p.vf = vf; + p.inputs_safe = 0; /* for now */ + p.outputs_safe = 0; /* for now */ + p.have_sse2 = cpu_has_xmm2; + p.identity = x86_make_reg(file_XMM, 6); + p.chan0 = x86_make_reg(file_XMM, 7); + + x86_init_func(&p.func); + + if (build_vertex_emit(&p)) { + vf_register_fastpath( vf, GL_TRUE ); + } + else { + /* Note the failure so that we don't keep trying to codegen an + * impossible state: + */ + vf_register_fastpath( vf, GL_FALSE ); + x86_release_func(&p.func); + } +} + +#else + +void vf_generate_sse_emit( struct vertex_fetch *vf ) +{ + /* Dummy version for when USE_SSE_ASM not defined */ +} + +#endif diff --git a/src/mesa/x86-64/glapi_x86-64.S b/src/mesa/x86-64/glapi_x86-64.S index cb34061b36..f8337ff93e 100644 --- a/src/mesa/x86-64/glapi_x86-64.S +++ b/src/mesa/x86-64/glapi_x86-64.S @@ -73,7 +73,7 @@ _x86_64_get_dispatch: .p2align 4,,15 _x86_64_get_dispatch: - movq _gl_DispatchTSD(%rip), %rdi + movq _gl_DispatchTSD@GOTPCREL(%rip), %rdi jmp pthread_getspecific@PLT #elif defined(THREADS) diff --git a/src/mesa/x86/Makefile b/src/mesa/x86/Makefile index dc8c7f355e..aa49a9134a 100644 --- a/src/mesa/x86/Makefile +++ b/src/mesa/x86/Makefile @@ -21,7 +21,7 @@ clean: gen_matypes: gen_matypes.c - $(CC) $(INCLUDE_DIRS) $(CFLAGS) gen_matypes.c -o gen_matypes + $(HOST_CC) $(INCLUDE_DIRS) $(HOST_CFLAGS) gen_matypes.c -o gen_matypes # need some special rules here, unfortunately matypes.h: ../main/mtypes.h ../tnl/t_context.h gen_matypes diff --git a/src/mesa/x86/rtasm/x86sse.c b/src/mesa/x86/rtasm/x86sse.c index e68f8dfec1..5aedf5b04b 100644 --- a/src/mesa/x86/rtasm/x86sse.c +++ b/src/mesa/x86/rtasm/x86sse.c @@ -1,3 +1,4 @@ +#ifdef USE_X86_ASM #if defined(__i386__) || defined(__386__) #include "main/imports.h" @@ -290,7 +291,7 @@ void x86_call( struct x86_function *p, void (*label)()) void x86_call( struct x86_function *p, struct x86_reg reg) { emit_1ub(p, 0xff); - emit_modrm(p, reg, reg); + emit_modrm_noreg(p, 2, reg); } #endif @@ -1192,3 +1193,9 @@ void x86sse_dummy( void ) } #endif + +#else /* USE_X86_ASM */ + +int x86sse_c_dummy_var; /* silence warning */ + +#endif /* USE_X86_ASM */ |