From 3d4246e22e90ffef5fe0cd935bd54c7f862e82f7 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 20 Mar 2009 12:19:40 +0100 Subject: slang: Use _mesa_snprintf() wrapper. --- src/mesa/shader/slang/slang_codegen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c index 8263aae334..a7cfc45e6f 100644 --- a/src/mesa/shader/slang/slang_codegen.c +++ b/src/mesa/shader/slang/slang_codegen.c @@ -1441,7 +1441,7 @@ _slang_gen_function_call(slang_assemble_ctx *A, slang_function *fun, if (A->pragmas->Debug) { char s[1000]; - snprintf(s, sizeof(s), "Call/inline %s()", (char *) fun->header.a_name); + _mesa_snprintf(s, sizeof(s), "Call/inline %s()", (char *) fun->header.a_name); n->Comment = _slang_strdup(s); } -- cgit v1.2.3 From 4489f9efee58f2cba374298f2d43c96e5cd2ff41 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 20 Mar 2009 13:05:51 +0100 Subject: stw: Keep per-thread storage for current context and pixel format. --- src/gallium/state_trackers/wgl/SConscript | 1 + .../state_trackers/wgl/shared/stw_context.c | 12 +-- src/gallium/state_trackers/wgl/shared/stw_device.c | 23 +++++ .../state_trackers/wgl/shared/stw_pixelformat.c | 9 +- src/gallium/state_trackers/wgl/shared/stw_tls.c | 101 +++++++++++++++++++++ src/gallium/state_trackers/wgl/shared/stw_tls.h | 53 +++++++++++ src/gallium/state_trackers/wgl/shared/stw_winsys.h | 6 ++ 7 files changed, 193 insertions(+), 12 deletions(-) create mode 100644 src/gallium/state_trackers/wgl/shared/stw_tls.c create mode 100644 src/gallium/state_trackers/wgl/shared/stw_tls.h (limited to 'src') diff --git a/src/gallium/state_trackers/wgl/SConscript b/src/gallium/state_trackers/wgl/SConscript index 2141b02d68..038a7a31b3 100644 --- a/src/gallium/state_trackers/wgl/SConscript +++ b/src/gallium/state_trackers/wgl/SConscript @@ -30,6 +30,7 @@ if env['platform'] in ['windows']: 'shared/stw_arbextensionsstring.c', 'shared/stw_getprocaddress.c', 'shared/stw_arbpixelformat.c', + 'shared/stw_tls.c', ] wgl = env.ConvenienceLibrary( diff --git a/src/gallium/state_trackers/wgl/shared/stw_context.c b/src/gallium/state_trackers/wgl/shared/stw_context.c index d77daac39c..69f25d6187 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_context.c +++ b/src/gallium/state_trackers/wgl/shared/stw_context.c @@ -39,9 +39,7 @@ #include "shared/stw_pixelformat.h" #include "stw_public.h" #include "stw_context.h" - -static HDC current_hdc = NULL; -static UINT_PTR current_hglrc = 0; +#include "stw_tls.h" BOOL stw_copy_context( @@ -264,13 +262,13 @@ get_window_size( HDC hdc, GLuint *width, GLuint *height ) UINT_PTR stw_get_current_context( void ) { - return current_hglrc; + return stw_tls_get_data()->currentGLRC; } HDC stw_get_current_dc( void ) { - return current_hdc; + return stw_tls_get_data()->currentDC; } BOOL @@ -295,8 +293,8 @@ stw_make_current( if (ctx == NULL) return FALSE; - current_hdc = hdc; - current_hglrc = hglrc; + stw_tls_get_data()->currentDC = hdc; + stw_tls_get_data()->currentGLRC = hglrc; if (glcurctx != NULL) { curctx = (struct stw_context *) glcurctx->DriverCtx; diff --git a/src/gallium/state_trackers/wgl/shared/stw_device.c b/src/gallium/state_trackers/wgl/shared/stw_device.c index 0dca856d73..4bec036fe3 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_device.c +++ b/src/gallium/state_trackers/wgl/shared/stw_device.c @@ -35,6 +35,7 @@ #include "shared/stw_winsys.h" #include "shared/stw_pixelformat.h" #include "shared/stw_public.h" +#include "shared/stw_tls.h" #ifdef WIN32_THREADS extern _glthread_Mutex OneTimeLock; @@ -70,6 +71,8 @@ st_init(const struct stw_winsys *stw_winsys) assert(!stw_dev); + stw_tls_init(); + stw_dev = &stw_dev_storage; memset(stw_dev, 0, sizeof(*stw_dev)); @@ -101,6 +104,24 @@ error1: } +boolean +st_init_thread(void) +{ + if (!stw_tls_init_thread()) { + return FALSE; + } + + return TRUE; +} + + +void +st_cleanup_thread(void) +{ + stw_tls_cleanup_thread(); +} + + void st_cleanup(void) { @@ -133,6 +154,8 @@ st_cleanup(void) debug_memory_end(stw_dev->memdbg_no); #endif + stw_tls_cleanup(); + stw_dev = NULL; } diff --git a/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c b/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c index 2992a1ac0a..b216ca5c82 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c +++ b/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c @@ -28,6 +28,7 @@ #include "util/u_debug.h" #include "stw_pixelformat.h" #include "stw_public.h" +#include "stw_tls.h" #define MAX_PIXELFORMATS 16 @@ -35,8 +36,6 @@ static struct pixelformat_info pixelformats[MAX_PIXELFORMATS]; static uint pixelformat_count = 0; static uint pixelformat_extended_count = 0; -static uint currentpixelformat = 0; - static void add_standard_pixelformats( @@ -248,7 +247,7 @@ int stw_pixelformat_get( HDC hdc ) { - return currentpixelformat; + return stw_tls_get_data()->currentPixelFormat; } @@ -267,8 +266,8 @@ stw_pixelformat_set( if (index >= count) return FALSE; - currentpixelformat = iPixelFormat; - + stw_tls_get_data()->currentPixelFormat = iPixelFormat; + /* Some applications mistakenly use the undocumented wglSetPixelFormat * function instead of SetPixelFormat, so we call SetPixelFormat here to * avoid opengl32.dll's wglCreateContext to fail */ diff --git a/src/gallium/state_trackers/wgl/shared/stw_tls.c b/src/gallium/state_trackers/wgl/shared/stw_tls.c new file mode 100644 index 0000000000..e72bafb880 --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_tls.c @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include + +#include "pipe/p_compiler.h" +#include "util/u_memory.h" +#include "stw_tls.h" + +static DWORD tlsIndex = TLS_OUT_OF_INDEXES; + +boolean +stw_tls_init(void) +{ + tlsIndex = TlsAlloc(); + if (tlsIndex == TLS_OUT_OF_INDEXES) { + return FALSE; + } + + return TRUE; +} + +boolean +stw_tls_init_thread(void) +{ + struct stw_tls_data *data; + + if (tlsIndex == TLS_OUT_OF_INDEXES) { + return FALSE; + } + + data = MALLOC(sizeof(*data)); + if (!data) { + return FALSE; + } + + data->currentPixelFormat = 0; + data->currentDC = NULL; + data->currentGLRC = 0; + + TlsSetValue(tlsIndex, data); + + return TRUE; +} + +void +stw_tls_cleanup_thread(void) +{ + struct stw_tls_data *data; + + if (tlsIndex == TLS_OUT_OF_INDEXES) { + return; + } + + data = (struct stw_tls_data *) TlsGetValue(tlsIndex); + TlsSetValue(tlsIndex, NULL); + FREE(data); +} + +void +stw_tls_cleanup(void) +{ + if (tlsIndex != TLS_OUT_OF_INDEXES) { + TlsFree(tlsIndex); + tlsIndex = TLS_OUT_OF_INDEXES; + } +} + +struct stw_tls_data * +stw_tls_get_data(void) +{ + if (tlsIndex == TLS_OUT_OF_INDEXES) { + return NULL; + } + + return (struct stw_tls_data *) TlsGetValue(tlsIndex); +} diff --git a/src/gallium/state_trackers/wgl/shared/stw_tls.h b/src/gallium/state_trackers/wgl/shared/stw_tls.h new file mode 100644 index 0000000000..23b61e68ff --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_tls.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef STW_TLS_H +#define STW_TLS_H + +struct stw_tls_data +{ + uint currentPixelFormat; + HDC currentDC; + UINT_PTR currentGLRC; +}; + +boolean +stw_tls_init(void); + +boolean +stw_tls_init_thread(void); + +void +stw_tls_cleanup_thread(void); + +void +stw_tls_cleanup(void); + +struct stw_tls_data * +stw_tls_get_data(void); + +#endif /* STW_TLS_H */ diff --git a/src/gallium/state_trackers/wgl/shared/stw_winsys.h b/src/gallium/state_trackers/wgl/shared/stw_winsys.h index a85a9a2257..e4a1d4f979 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_winsys.h +++ b/src/gallium/state_trackers/wgl/shared/stw_winsys.h @@ -53,6 +53,12 @@ struct stw_winsys boolean st_init(const struct stw_winsys *stw_winsys); +boolean +st_init_thread(void); + +void +st_cleanup_thread(void); + void st_cleanup(void); -- cgit v1.2.3 From 36e985e96e6da817042ba1b2dfadf96f85e32afb Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 20 Mar 2009 13:06:28 +0100 Subject: winsys/gdi: Init state tracker's per-thread data. --- src/gallium/winsys/gdi/gdi_softpipe_winsys.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c index 440666d835..d5d9431865 100644 --- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c @@ -312,9 +312,20 @@ DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) { switch (fdwReason) { case DLL_PROCESS_ATTACH: - return st_init(&stw_winsys); + if (!st_init(&stw_winsys)) { + return FALSE; + } + return st_init_thread(); + + case DLL_THREAD_ATTACH: + return st_init_thread(); + + case DLL_THREAD_DETACH: + st_cleanup_thread(); + break; case DLL_PROCESS_DETACH: + st_cleanup_thread(); st_cleanup(); break; } -- cgit v1.2.3 From 5465f3adf93bd58b528bd6703b2367eb00c78c31 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 20 Mar 2009 15:45:00 +0100 Subject: stw: Use u_handle_table to maintain context list. --- .../state_trackers/wgl/shared/stw_context.c | 24 ++++++++-------------- src/gallium/state_trackers/wgl/shared/stw_device.c | 19 +++++++++++------ src/gallium/state_trackers/wgl/shared/stw_device.h | 8 ++------ 3 files changed, 23 insertions(+), 28 deletions(-) (limited to 'src') diff --git a/src/gallium/state_trackers/wgl/shared/stw_context.c b/src/gallium/state_trackers/wgl/shared/stw_context.c index 69f25d6187..31cb025ac5 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_context.c +++ b/src/gallium/state_trackers/wgl/shared/stw_context.c @@ -135,17 +135,7 @@ stw_create_layer_context( pipe_mutex_lock( stw_dev->mutex ); { - UINT_PTR i; - - for (i = 0; i < STW_CONTEXT_MAX; i++) { - if (stw_dev->ctx_array[i].ctx == NULL) { - /* success: - */ - stw_dev->ctx_array[i].ctx = ctx; - hglrc = i + 1; - break; - } - } + hglrc = handle_table_add(stw_dev->ctx_table, ctx); } pipe_mutex_unlock( stw_dev->mutex ); @@ -195,12 +185,14 @@ stw_delete_context( if (WindowFromDC( ctx->hdc ) != NULL) ReleaseDC( WindowFromDC( ctx->hdc ), ctx->hdc ); - st_destroy_context( ctx->st ); + pipe_mutex_lock(stw_dev->mutex); + { + st_destroy_context(ctx->st); + FREE(ctx); + handle_table_remove(stw_dev->ctx_table, hglrc); + } + pipe_mutex_unlock(stw_dev->mutex); - FREE( ctx ); - - stw_dev->ctx_array[hglrc - 1].ctx = NULL; - ret = TRUE; } diff --git a/src/gallium/state_trackers/wgl/shared/stw_device.c b/src/gallium/state_trackers/wgl/shared/stw_device.c index 4bec036fe3..3c1eb1ad39 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_device.c +++ b/src/gallium/state_trackers/wgl/shared/stw_device.c @@ -94,6 +94,11 @@ st_init(const struct stw_winsys *stw_winsys) pipe_mutex_init( stw_dev->mutex ); + stw_dev->ctx_table = handle_table_create(); + if (!stw_dev->ctx_table) { + goto error1; + } + pixelformat_init(); return TRUE; @@ -135,9 +140,12 @@ st_cleanup(void) pipe_mutex_lock( stw_dev->mutex ); { /* Ensure all contexts are destroyed */ - for (i = 0; i < STW_CONTEXT_MAX; i++) - if (stw_dev->ctx_array[i].ctx) - stw_delete_context( i + 1 ); + i = handle_table_get_first_handle(stw_dev->ctx_table); + while (i) { + stw_delete_context(i); + i = handle_table_get_next_handle(stw_dev->ctx_table, i); + } + handle_table_destroy(stw_dev->ctx_table); } pipe_mutex_unlock( stw_dev->mutex ); @@ -163,13 +171,12 @@ st_cleanup(void) struct stw_context * stw_lookup_context( UINT_PTR dhglrc ) { - if (dhglrc == 0 || - dhglrc >= STW_CONTEXT_MAX) + if (dhglrc == 0) return NULL; if (stw_dev == NULL) return NULL; - return stw_dev->ctx_array[dhglrc - 1].ctx; + return (struct stw_context *) handle_table_get(stw_dev->ctx_table, dhglrc); } diff --git a/src/gallium/state_trackers/wgl/shared/stw_device.h b/src/gallium/state_trackers/wgl/shared/stw_device.h index 80da14b84f..6a9cee0d02 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_device.h +++ b/src/gallium/state_trackers/wgl/shared/stw_device.h @@ -31,9 +31,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_thread.h" - - -#define STW_CONTEXT_MAX 32 +#include "util/u_handle_table.h" struct pipe_screen; @@ -45,9 +43,7 @@ struct stw_device pipe_mutex mutex; - struct { - struct stw_context *ctx; - } ctx_array[STW_CONTEXT_MAX]; + struct handle_table *ctx_table; #ifdef DEBUG unsigned long memdbg_no; -- cgit v1.2.3 From d6e877d0d716805907cbbaca3bcfcee84f98add6 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 20 Mar 2009 16:36:23 +0100 Subject: stw: Do not err on nil context handle in MakeCurrent(). --- src/gallium/state_trackers/wgl/shared/stw_context.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'src') diff --git a/src/gallium/state_trackers/wgl/shared/stw_context.c b/src/gallium/state_trackers/wgl/shared/stw_context.c index 31cb025ac5..89df8b0a2a 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_context.c +++ b/src/gallium/state_trackers/wgl/shared/stw_context.c @@ -281,9 +281,6 @@ stw_make_current( pipe_mutex_lock( stw_dev->mutex ); ctx = stw_lookup_context( hglrc ); pipe_mutex_unlock( stw_dev->mutex ); - - if (ctx == NULL) - return FALSE; stw_tls_get_data()->currentDC = hdc; stw_tls_get_data()->currentGLRC = hglrc; -- cgit v1.2.3 From 66175aac7609ad314f25fbdff0d3958af310dc24 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 18 Mar 2009 12:07:09 -0700 Subject: Fix DRI2 accelerated EXT_texture_from_pixmap with GL_RGB format. This requires upgrading the interface so that the argument to glXBindTexImageEXT isn't just dropped on the floor. Note that this only fixes the accelerated path on Intel, as Mesa's texture format support is missing x8r8g8b8 support (right now, GL_RGB textures get uploaded as a8r8gb8, but in this case we're not doing the upload so we can't really work around it that way). Fixes bugs with compositors trying to use shaders that use alpha channels, on windows without a valid alpha channel. Bug #19910 and likely others as well. Reviewed-by: Ian Romanick --- include/GL/internal/dri_interface.h | 16 ++++++++++++++-- include/GL/internal/glcore.h | 4 ++++ src/glx/x11/glx_pbuffer.c | 19 +++++++++++++++++++ src/glx/x11/glxclient.h | 1 + src/glx/x11/glxcmds.c | 18 +++++++++++++----- src/mesa/drivers/dri/i915/i830_texstate.c | 10 +++++++--- src/mesa/drivers/dri/i915/i915_texstate.c | 11 ++++++++--- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 24 +++++++++++++++++------- src/mesa/drivers/dri/intel/intel_screen.c | 1 + src/mesa/drivers/dri/intel/intel_tex.h | 2 ++ src/mesa/drivers/dri/intel/intel_tex_image.c | 18 ++++++++++++++++-- 11 files changed, 102 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h index a726b93234..a83602bfd8 100644 --- a/include/GL/internal/dri_interface.h +++ b/include/GL/internal/dri_interface.h @@ -231,7 +231,7 @@ struct __DRItexOffsetExtensionRec { #define __DRI_TEX_BUFFER "DRI_TexBuffer" -#define __DRI_TEX_BUFFER_VERSION 1 +#define __DRI_TEX_BUFFER_VERSION 2 struct __DRItexBufferExtensionRec { __DRIextension base; @@ -239,11 +239,23 @@ struct __DRItexBufferExtensionRec { * Method to override base texture image with the contents of a * __DRIdrawable. * - * For GLX_EXT_texture_from_pixmap with AIGLX. + * For GLX_EXT_texture_from_pixmap with AIGLX. Deprecated in favor of + * setTexBuffer2 in version 2 of this interface */ void (*setTexBuffer)(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *pDraw); + + /** + * Method to override base texture image with the contents of a + * __DRIdrawable, including the required texture format attribute. + * + * For GLX_EXT_texture_from_pixmap with AIGLX. + */ + void (*setTexBuffer2)(__DRIcontext *pDRICtx, + GLint target, + GLint format, + __DRIdrawable *pDraw); }; /** diff --git a/include/GL/internal/glcore.h b/include/GL/internal/glcore.h index 547b111370..18f657662a 100644 --- a/include/GL/internal/glcore.h +++ b/include/GL/internal/glcore.h @@ -178,4 +178,8 @@ typedef struct __GLcontextModesRec { #define GLX_TEXTURE_2D_BIT_EXT 0x00000002 #define GLX_TEXTURE_RECTANGLE_BIT_EXT 0x00000004 +#define GLX_TEXTURE_FORMAT_NONE_EXT 0x20D8 +#define GLX_TEXTURE_FORMAT_RGB_EXT 0x20D9 +#define GLX_TEXTURE_FORMAT_RGBA_EXT 0x20DA + #endif /* __gl_core_h_ */ diff --git a/src/glx/x11/glx_pbuffer.c b/src/glx/x11/glx_pbuffer.c index a602cd2881..6bcf965056 100644 --- a/src/glx/x11/glx_pbuffer.c +++ b/src/glx/x11/glx_pbuffer.c @@ -189,6 +189,21 @@ determineTextureTarget(const int *attribs, int numAttribs) return target; } + + +static GLenum +determineTextureFormat(const int *attribs, int numAttribs) +{ + GLenum target = 0; + int i; + + for (i = 0; i < numAttribs; i++) { + if (attribs[2 * i] == GLX_TEXTURE_FORMAT_EXT) + return attribs[2 * i + 1]; + } + + return 0; +} #endif /** @@ -294,6 +309,9 @@ GetDrawableAttribute(Display * dpy, GLXDrawable drawable, if (pdraw != NULL && !pdraw->textureTarget) pdraw->textureTarget = determineTextureTarget((const int *) data, num_attributes); + if (pdraw != NULL && !pdraw->textureFormat) + pdraw->textureFormat = + determineTextureFormat((const int *) data, num_attributes); } #endif @@ -374,6 +392,7 @@ CreateDrawable(Display * dpy, const __GLcontextModes * fbconfig, } pdraw->textureTarget = determineTextureTarget(attrib_list, i); + pdraw->textureFormat = determineTextureFormat(attrib_list, i); } while (0); #endif diff --git a/src/glx/x11/glxclient.h b/src/glx/x11/glxclient.h index caf58bbd44..c42e80a0e8 100644 --- a/src/glx/x11/glxclient.h +++ b/src/glx/x11/glxclient.h @@ -161,6 +161,7 @@ struct __GLXDRIdrawableRec { __GLXscreenConfigs *psc; GLenum textureTarget; __DRIdrawable *driDrawable; + GLenum textureFormat; /* EXT_texture_from_pixmap support */ }; /* diff --git a/src/glx/x11/glxcmds.c b/src/glx/x11/glxcmds.c index fc0e593cb3..e5c0db4c96 100644 --- a/src/glx/x11/glxcmds.c +++ b/src/glx/x11/glxcmds.c @@ -2631,11 +2631,19 @@ static void __glXBindTexImageEXT(Display *dpy, if (gc->driContext) { __GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable, NULL); - if (pdraw != NULL) - (*pdraw->psc->texBuffer->setTexBuffer)(gc->__driContext, - pdraw->textureTarget, - pdraw->driDrawable); - + if (pdraw != NULL) { + if (pdraw->psc->texBuffer->base.version >= 2 && + pdraw->psc->texBuffer->setTexBuffer2 != NULL) { + (*pdraw->psc->texBuffer->setTexBuffer2)(gc->__driContext, + pdraw->textureTarget, + pdraw->textureFormat, + pdraw->driDrawable); + } else { + (*pdraw->psc->texBuffer->setTexBuffer)(gc->__driContext, + pdraw->textureTarget, + pdraw->driDrawable); + } + } return; } #endif diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index c718bb0055..df43b779a7 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -38,7 +38,7 @@ static GLuint -translate_texture_format(GLuint mesa_format) +translate_texture_format(GLuint mesa_format, GLuint internal_format) { switch (mesa_format) { case MESA_FORMAT_L8: @@ -56,7 +56,10 @@ translate_texture_format(GLuint mesa_format) case MESA_FORMAT_ARGB4444: return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - return MAPSURF_32BIT | MT_32BIT_ARGB8888; + if (internal_format == GL_RGB) + return MAPSURF_32BIT | MT_32BIT_XRGB8888; + else + return MAPSURF_32BIT | MT_32BIT_ARGB8888; case MESA_FORMAT_YCBCR_REV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case MESA_FORMAT_YCBCR: @@ -162,7 +165,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) 0, intelObj-> firstLevel); - format = translate_texture_format(firstImage->TexFormat->MesaFormat); + format = translate_texture_format(firstImage->TexFormat->MesaFormat, + firstImage->InternalFormat); pitch = intelObj->mt->pitch * intelObj->mt->cpp; } diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index adbb52a3a3..6d25f8dd8e 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -37,7 +37,8 @@ static GLuint -translate_texture_format(GLuint mesa_format, GLenum DepthMode) +translate_texture_format(GLuint mesa_format, GLuint internal_format, + GLenum DepthMode) { switch (mesa_format) { case MESA_FORMAT_L8: @@ -55,7 +56,10 @@ translate_texture_format(GLuint mesa_format, GLenum DepthMode) case MESA_FORMAT_ARGB4444: return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - return MAPSURF_32BIT | MT_32BIT_ARGB8888; + if (internal_format == GL_RGB) + return MAPSURF_32BIT | MT_32BIT_XRGB8888; + else + return MAPSURF_32BIT | MT_32BIT_ARGB8888; case MESA_FORMAT_YCBCR_REV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case MESA_FORMAT_YCBCR: @@ -173,7 +177,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) firstLevel); format = translate_texture_format(firstImage->TexFormat->MesaFormat, - tObj->DepthMode); + firstImage->InternalFormat, + tObj->DepthMode); pitch = intelObj->mt->pitch * intelObj->mt->cpp; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 9b320480b6..e6113eff87 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -69,7 +69,8 @@ static GLuint translate_tex_target( GLenum target ) } -static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) +static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, + GLenum depth_mode ) { switch( mesa_format ) { case MESA_FORMAT_L8: @@ -89,10 +90,16 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) return BRW_SURFACEFORMAT_R8G8B8_UNORM; case MESA_FORMAT_ARGB8888: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; + else + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; case MESA_FORMAT_RGBA8888_REV: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; + else + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; case MESA_FORMAT_RGB565: return BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -161,7 +168,7 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) struct brw_wm_surface_key { GLenum target, depthmode; dri_bo *bo; - GLint format; + GLint format, internal_format; GLint first_level, last_level; GLint width, height, depth; GLint pitch, cpp; @@ -199,9 +206,11 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; surf.ss0.surface_type = translate_tex_target(key->target); - - if (key->bo) - surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode); + if (key->bo) { + surf.ss0.surface_format = translate_tex_format(key->format, + key->internal_format, + key->depthmode); + } else { switch (key->depth) { case 32: @@ -278,6 +287,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.offset = intelObj->textureOffset; } else { key.format = firstImage->TexFormat->MesaFormat; + key.internal_format = firstImage->InternalFormat; key.pitch = intelObj->mt->pitch; key.depth = firstImage->Depth; key.bo = intelObj->mt->region->buffer; diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index e8c074712c..d20ea15187 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -211,6 +211,7 @@ static const __DRItexOffsetExtension intelTexOffsetExtension = { static const __DRItexBufferExtension intelTexBufferExtension = { { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, intelSetTexBuffer, + intelSetTexBuffer2, }; static const __DRIextension *intelScreenExtensions[] = { diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h index 742ccc043a..f5372d82fb 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.h +++ b/src/mesa/drivers/dri/intel/intel_tex.h @@ -149,6 +149,8 @@ void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch); void intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *pDraw); +void intelSetTexBuffer2(__DRIcontext *pDRICtx, + GLint target, GLint format, __DRIdrawable *pDraw); GLuint intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit); diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 943636c37b..e902187637 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -714,7 +714,9 @@ intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, } void -intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, + GLint glx_texture_format, + __DRIdrawable *dPriv) { struct intel_framebuffer *intel_fb = dPriv->driverPrivate; struct intel_context *intel = pDRICtx->driverPrivate; @@ -745,7 +747,10 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) type = GL_BGRA; format = GL_UNSIGNED_BYTE; - internalFormat = (rb->region->cpp == 3 ? 3 : 4); + if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + internalFormat = GL_RGB; + else + internalFormat = GL_RGBA; mt = intel_miptree_create_for_region(intel, target, internalFormat, @@ -785,3 +790,12 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) _mesa_unlock_texture(&intel->ctx, texObj); } + +void +intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +{ + /* The old interface didn't have the format argument, so copy our + * implementation's behavior at the time. + */ + intelSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); +} -- cgit v1.2.3 From 210b468722ae4d4a97ccd788ad9de58858a0c7fa Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 20 Mar 2009 15:08:59 +0000 Subject: gallium/util: add upload manager helper module Add a module that will manage uploading and coalescing multiple user-buffers, malloc-buffers and other random data that doesn't happen to be in a GPU buffer already. The module stuffs multiple little uploads into larger GPU buffers to reduce create/destroy overheads, etc. --- src/gallium/auxiliary/util/Makefile | 1 + src/gallium/auxiliary/util/SConscript | 1 + src/gallium/auxiliary/util/u_upload_mgr.c | 220 ++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_upload_mgr.h | 75 ++++++++++ 4 files changed, 297 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_upload_mgr.c create mode 100644 src/gallium/auxiliary/util/u_upload_mgr.h (limited to 'src') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 160df8dfa7..d68bdeadcc 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -24,6 +24,7 @@ C_SOURCES = \ u_tile.c \ u_time.c \ u_timed_winsys.c \ + u_upload_mgr.c \ u_simple_screen.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 9d5dd006f0..0f15c632c3 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -26,6 +26,7 @@ util = env.ConvenienceLibrary( 'u_tile.c', 'u_time.c', 'u_timed_winsys.c', + 'u_upload_mgr.c', 'u_simple_screen.c', ]) diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c new file mode 100644 index 0000000000..d9c0d7afa8 --- /dev/null +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -0,0 +1,220 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Helper utility for uploading user buffers & other data, and + * coalescing small buffers into larger ones. + */ + +#include "pipe/p_error.h" +#include "pipe/p_inlines.h" +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "u_upload_mgr.h" + + +struct u_upload_mgr { + struct pipe_screen *screen; + + unsigned default_size; + unsigned alignment; + unsigned usage; + + /* The active buffer: + */ + struct pipe_buffer *buffer; + unsigned size; + unsigned offset; +}; + + +struct u_upload_mgr *u_upload_create( struct pipe_screen *screen, + unsigned default_size, + unsigned alignment, + unsigned usage ) +{ + struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr ); + + upload->default_size = default_size; + upload->screen = screen; + upload->alignment = alignment; + upload->usage = usage; + upload->buffer = NULL; + + return upload; +} + + +static INLINE void +my_buffer_write(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, unsigned size, unsigned dirty_size, + const void *data) +{ + uint8_t *map; + + assert(offset < buf->size); + assert(offset + size <= buf->size); + assert(dirty_size >= size); + assert(size); + + map = pipe_buffer_map_range(screen, buf, offset, size, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(map); + if(map) { + memcpy(map + offset, data, size); + pipe_buffer_flush_mapped_range(screen, buf, offset, dirty_size); + pipe_buffer_unmap(screen, buf); + } +} + +/* Release old buffer. + * + * This must usually be called prior to firing the command stream + * which references the upload buffer, as many memory managers will + * cause subsequent maps of a fired buffer to wait. + * + * Can improve this with a change to pipe_buffer_write to use the + * DONT_WAIT bit, but for now, it's easiest just to grab a new buffer. + */ +void u_upload_flush( struct u_upload_mgr *upload ) +{ + pipe_buffer_reference( &upload->buffer, NULL ); + upload->size = 0; +} + + +void u_upload_destroy( struct u_upload_mgr *upload ) +{ + u_upload_flush( upload ); + FREE( upload ); +} + + +static enum pipe_error +u_upload_alloc_buffer( struct u_upload_mgr *upload, + unsigned min_size ) +{ + /* Release old buffer, if present: + */ + u_upload_flush( upload ); + + /* Allocate a new one: + */ + upload->size = align(MAX2(upload->default_size, min_size), 4096); + + upload->buffer = pipe_buffer_create( upload->screen, + upload->alignment, + upload->usage | PIPE_BUFFER_USAGE_CPU_WRITE, + upload->size ); + if (upload->buffer == NULL) + goto fail; + + upload->offset = 0; + return 0; + +fail: + if (upload->buffer) + pipe_buffer_reference( &upload->buffer, NULL ); + + return PIPE_ERROR_OUT_OF_MEMORY; +} + + +enum pipe_error u_upload_data( struct u_upload_mgr *upload, + unsigned size, + const void *data, + unsigned *out_offset, + struct pipe_buffer **outbuf ) +{ + unsigned alloc_size = align( size, upload->alignment ); + enum pipe_error ret = PIPE_OK; + + if (upload->offset + alloc_size > upload->size) { + ret = u_upload_alloc_buffer( upload, alloc_size ); + if (ret) + return ret; + } + + /* Copy the data, using map_range if available: + */ + my_buffer_write( upload->screen, + upload->buffer, + upload->offset, + size, + alloc_size, + data ); + + /* Emit the return values: + */ + pipe_buffer_reference( outbuf, upload->buffer ); + *out_offset = upload->offset; + upload->offset += alloc_size; + return PIPE_OK; +} + + +/* As above, but upload the full contents of a buffer. Useful for + * uploading user buffers, avoids generating an explosion of GPU + * buffers if you have an app that does lots of small vertex buffer + * renders or DrawElements calls. + */ +enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, + unsigned offset, + unsigned size, + struct pipe_buffer *inbuf, + unsigned *out_offset, + struct pipe_buffer **outbuf ) +{ + enum pipe_error ret = PIPE_OK; + const char *map = NULL; + + map = (const char *)pipe_buffer_map( + upload->screen, inbuf, PIPE_BUFFER_USAGE_CPU_READ ); + + if (map == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto done; + } + + if (0) + debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size); + + ret = u_upload_data( upload, + size, + map + offset, + out_offset, + outbuf ); + if (ret) + goto done; + +done: + if (map) + pipe_buffer_unmap( upload->screen, inbuf ); + + return ret; +} diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h new file mode 100644 index 0000000000..745b5834af --- /dev/null +++ b/src/gallium/auxiliary/util/u_upload_mgr.h @@ -0,0 +1,75 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Helper utility for uploading user buffers & other data, and + * coalescing small buffers into larger ones. + */ + +#ifndef U_UPLOAD_MGR_H +#define U_UPLOAD_MGR_H + +struct pipe_screen; +struct pipe_buffer; +struct u_upload_mgr; + + +struct u_upload_mgr *u_upload_create( struct pipe_screen *screen, + unsigned default_size, + unsigned alignment, + unsigned usage ); + +void u_upload_destroy( struct u_upload_mgr *upload ); + +/* Unmap and release old buffer. + * + * This must usually be called prior to firing the command stream + * which references the upload buffer, as many memory managers either + * don't like firing a mapped buffer or cause subsequent maps of a + * fired buffer to wait. For now, it's easiest just to grab a new + * buffer. + */ +void u_upload_flush( struct u_upload_mgr *upload ); + + +enum pipe_error u_upload_data( struct u_upload_mgr *upload, + unsigned size, + const void *data, + unsigned *out_offset, + struct pipe_buffer **outbuf ); + + +enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, + unsigned offset, + unsigned size, + struct pipe_buffer *inbuf, + unsigned *out_offset, + struct pipe_buffer **outbuf ); + + + +#endif + -- cgit v1.2.3 From 48f6e754898879898165dca013157dffe2babb12 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 18 Mar 2009 16:54:25 +0000 Subject: gallium: Explain what happens if buffer_flush_mapped_range isn't called. --- src/gallium/include/pipe/p_screen.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src') diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index ed3a026023..ceac755e71 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -223,6 +223,13 @@ struct pipe_screen { * specified to buffer_map_range. This is different from the * ARB_map_buffer_range semantics because we don't forbid multiple mappings * of the same buffer (yet). + * + * If the buffer was mapped for writing and no buffer_flush_mapped_range + * call was done until the buffer_unmap is called then the pipe driver will + * assumed that the whole buffer was written. This is for backward + * compatibility purposes and may affect performance -- the state tracker + * should always specify exactly what got written while the buffer was + * mapped. */ void (*buffer_flush_mapped_range)( struct pipe_screen *screen, struct pipe_buffer *buf, -- cgit v1.2.3 From 8852ac2b354522b194e32f8651e3511e69586bd1 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 19 Mar 2009 12:29:03 -0700 Subject: r300-gallium: A bit more invariant state. --- src/gallium/drivers/r300/r300_reg.h | 17 ++++++++++--- src/gallium/drivers/r300/r300_state_invariant.c | 34 +++++++++++++------------ 2 files changed, 31 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 6f3ad970ab..3fe45e1393 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -293,10 +293,19 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */ /* Programmable Stream Control Signed Normalize Control */ -#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc -# define SGN_NORM_ZERO 0 -# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1 -# define SGN_NORM_NO_ZERO 2 +#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc +# define SGN_NORM_ZERO 0 +# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1 +# define SGN_NORM_NO_ZERO 2 +# define R300_SGN_NORM_NO_ZERO (SGN_NORM_NO_ZERO | \ + (SGN_NORM_NO_ZERO << 2) | (SGN_NORM_NO_ZERO << 4) | \ + (SGN_NORM_NO_ZERO << 6) | (SGN_NORM_NO_ZERO << 8) | \ + (SGN_NORM_NO_ZERO << 10) | (SGN_NORM_NO_ZERO << 12) | \ + (SGN_NORM_NO_ZERO << 14) | (SGN_NORM_NO_ZERO << 16) | \ + (SGN_NORM_NO_ZERO << 18) | (SGN_NORM_NO_ZERO << 20) | \ + (SGN_NORM_NO_ZERO << 22) | (SGN_NORM_NO_ZERO << 24) | \ + (SGN_NORM_NO_ZERO << 26) | (SGN_NORM_NO_ZERO << 28) | \ + (SGN_NORM_NO_ZERO << 30)) /* gap */ diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 3d51a8e65d..e1837b6380 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -34,11 +34,11 @@ void r300_emit_invariant_state(struct r300_context* r300) struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(24 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(30 + (caps->has_tcl ? 2: 0)); + /*** Graphics Backend (GB) ***/ /* Various GB enables */ - OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE | - R300_GB_LINE_STUFF_ENABLE | R300_GB_TRIANGLE_STUFF_ENABLE); + OUT_CS_REG(R300_GB_ENABLE, 0x0); /* Subpixel multisampling for AA */ OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); OUT_CS_REG(R300_GB_MSPOS1, 0x66666666); @@ -49,6 +49,8 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); /* AA enable */ OUT_CS_REG(R300_GB_AA_CONFIG, 0x0); + + /*** Geometry Assembly (GA) ***/ /* GA errata fixes. */ if (caps->is_r500) { OUT_CS_REG(R300_GA_ENHANCE, @@ -62,13 +64,19 @@ void r300_emit_invariant_state(struct r300_context* r300) R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE); } - /* Fog block. */ - OUT_CS_REG(R300_FG_FOG_BLEND, 0x00000000); - OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x00000000); - OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x00000000); - OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x00000000); - OUT_CS_REG(R300_FG_DEPTH_SRC, 0x00000000); + /*** Fog (FG) ***/ + OUT_CS_REG(R300_FG_FOG_BLEND, 0x0); + OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0); + OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0); + OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0); + OUT_CS_REG(R300_FG_DEPTH_SRC, 0x0); + /*** VAP ***/ + /* Max and min vertex index clamp. */ + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xffffff); + /* Sign/normalize control */ + OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); /* TCL-only stuff */ if (caps->has_tcl) { /* Amount of time to wait for vertex fetches in PVS */ @@ -78,7 +86,7 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(99 + (caps->has_tcl ? 26 : 0)); + BEGIN_CS(91 + (caps->has_tcl ? 26 : 0)); /* Flush PVS. */ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); @@ -86,9 +94,6 @@ void r300_emit_invariant_state(struct r300_context* r300) R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT); - /* Max and min vertex index clamp. */ - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xFFFFFF); - OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0); /* XXX endian */ if (caps->has_tcl) { OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP); @@ -103,8 +108,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP | R300_VAP_TCL_BYPASS); } - /* XXX magic number not in r300_reg */ - OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA); /* XXX point tex stuffing */ OUT_CS_REG_SEQ(R300_GA_POINT_S0, 1); OUT_CS_32F(0.0); @@ -157,7 +160,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F); /* Vertex size. */ OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8); - OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA); OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0, 0x00000003); OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x00000000); OUT_CS_REG(R300_TX_ENABLE, 0x0); -- cgit v1.2.3 From f3f5e04103d804a23cfbe8bd264c8e0db64bd31f Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 19 Mar 2009 20:32:08 -0700 Subject: r300-gallium: Clean up some emit, and some state handlers. --- src/gallium/drivers/r300/r300_emit.c | 17 +---------------- src/gallium/drivers/r300/r300_emit.h | 10 ++++++++++ src/gallium/drivers/r300/r300_state_inlines.h | 25 +++++++++++++++++++++---- 3 files changed, 32 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index a2e771bd1b..9bfb89626c 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -152,21 +152,6 @@ void r500_emit_fragment_shader(struct r300_context* r300, END_CS; } -/* Translate pipe_format into US_OUT_FMT. Note that formats are stored from - * C3 to C0. */ -uint32_t translate_out_fmt(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - return R300_US_OUT_FMT_C4_8 | - R300_C0_SEL_B | R300_C1_SEL_G | - R300_C2_SEL_R | R300_C3_SEL_A; - default: - return R300_US_OUT_FMT_UNUSED; - } - return 0; -} - /* XXX add pitch, stride, clean up */ void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb) @@ -182,7 +167,7 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), - translate_out_fmt(fb->cbufs[i]->format)); + r300_translate_out_fmt(fb->cbufs[i]->format)); } if (fb->zsbuf) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 4aba1ee08c..0bc1f90e6a 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -28,6 +28,7 @@ #include "r300_context.h" #include "r300_cs.h" #include "r300_screen.h" +#include "r300_state_inlines.h" void r300_emit_blend_state(struct r300_context* r300, struct r300_blend_state* blend); @@ -52,11 +53,20 @@ void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); void r300_emit_rs_block_state(struct r300_context* r300, struct r300_rs_block* rs); +void r300_emit_sampler(struct r300_context* r300, + struct r300_sampler_state* sampler, unsigned offset); + void r300_emit_scissor_state(struct r300_context* r300, struct r300_scissor_state* scissor); +void r300_emit_texture(struct r300_context* r300, + struct r300_texture* tex, unsigned offset); + void r300_emit_vertex_format_state(struct r300_context* r300); +void r300_emit_viewport_state(struct r300_context* r300, + struct r300_viewport_state* viewport); + /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index fd92c71756..b80ff1c1ab 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -297,8 +297,7 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_A32R32G32B32: return R300_COLOR_FORMAT_ARGB32323232; case PIPE_FORMAT_A16R16G16B16: - return R300_COLOR_FORMAT_ARGB16161616; */ - /* XXX Not in pipe_format + return R300_COLOR_FORMAT_ARGB16161616; case PIPE_FORMAT_A10R10G10B10_UNORM: return R500_COLOR_FORMAT_ARGB10101010; case PIPE_FORMAT_A2R10G10B10_UNORM: @@ -306,7 +305,7 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_I10_UNORM: return R500_COLOR_FORMAT_I10; */ default: - debug_printf("r300: Implementation error: " \ + debug_printf("r300: Implementation error: " "Got unsupported color format %s in %s\n", pf_name(format), __FUNCTION__); break; @@ -324,7 +323,7 @@ static INLINE uint32_t r300_translate_zsformat(enum pipe_format format) case PIPE_FORMAT_Z24S8_UNORM: return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; default: - debug_printf("r300: Implementation error: " \ + debug_printf("r300: Implementation error: " "Got unsupported ZS format %s in %s\n", pf_name(format), __FUNCTION__); break; @@ -332,6 +331,24 @@ static INLINE uint32_t r300_translate_zsformat(enum pipe_format format) return 0; } +/* Translate pipe_format into US_OUT_FMT. + * Note that formats are stored from C3 to C0. */ +static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + return R300_US_OUT_FMT_C4_8 | + R300_C0_SEL_B | R300_C1_SEL_G | + R300_C2_SEL_R | R300_C3_SEL_A; + default: + debug_printf("r300: Implementation error: " + "Got unsupported output format %s in %s\n", + pf_name(format), __FUNCTION__); + return R300_US_OUT_FMT_UNUSED; + } + return 0; +} + /* Non-CSO state. (For now.) */ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) -- cgit v1.2.3 From adb40a94b0d8a023cfa900017dc17e26179c1cfd Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Thu, 19 Mar 2009 20:36:59 -0700 Subject: r300-gallium: Clean up r300_swtcl_emit. Some compile warnings, some statements without effect. --- src/gallium/drivers/r300/r300_swtcl_emit.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/r300/r300_swtcl_emit.c b/src/gallium/drivers/r300/r300_swtcl_emit.c index 3db09514c6..c82ee9c087 100644 --- a/src/gallium/drivers/r300/r300_swtcl_emit.c +++ b/src/gallium/drivers/r300/r300_swtcl_emit.c @@ -66,7 +66,7 @@ r300_swtcl_render_get_vertex_info(struct vbuf_render* render) r300_update_derived_state(r300); - return &r300->vertex_info; + return &r300->vertex_info.vinfo; } static boolean r300_swtcl_render_allocate_vertices(struct vbuf_render* render, @@ -177,7 +177,6 @@ static boolean r300_swtcl_render_set_primitive(struct vbuf_render* render, static void prepare_render(struct r300_swtcl_render* render, unsigned count) { struct r300_context* r300 = render->r300; - int i; CS_LOCALS(r300); @@ -210,7 +209,6 @@ static void r300_swtcl_render_draw_arrays(struct vbuf_render* render, { struct r300_swtcl_render* r300render = r300_swtcl_render(render); struct r300_context* r300 = r300render->r300; - struct pipe_screen* screen = r300->context.screen; CS_LOCALS(r300); @@ -239,24 +237,22 @@ static void r300_swtcl_render_draw(struct vbuf_render* render, CS_LOCALS(r300); - count /= 4; - prepare_render(r300render, count); /* Send our indices into an index buffer. */ index_buffer = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, - count * 4); + count); if (!index_buffer) { return; } index_map = pipe_buffer_map(screen, index_buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(index_map, indices, count * 4); + memcpy(index_map, indices, count); pipe_buffer_unmap(screen, index_buffer); debug_printf("r300: Doing indexbuf render, count %d\n", count); -#if 0 + BEGIN_CS(5); OUT_CS(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0)); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | @@ -266,7 +262,6 @@ static void r300_swtcl_render_draw(struct vbuf_render* render, OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2)); OUT_CS_RELOC(index_buffer, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; -#endif } static void r300_swtcl_render_destroy(struct vbuf_render* render) @@ -277,7 +272,6 @@ static void r300_swtcl_render_destroy(struct vbuf_render* render) static struct vbuf_render* r300_swtcl_render_create(struct r300_context* r300) { struct r300_swtcl_render* r300render = CALLOC_STRUCT(r300_swtcl_render); - struct pipe_screen* screen = r300->context.screen; r300render->r300 = r300; @@ -295,19 +289,6 @@ static struct vbuf_render* r300_swtcl_render_create(struct r300_context* r300) r300render->base.release_vertices = r300_swtcl_render_release_vertices; r300render->base.destroy = r300_swtcl_render_destroy; - /* XXX bonghits ahead - r300render->vbo_alloc_size = 128 * 4096; - r300render->vbo_size = r300render->vbo_alloc_size; - r300render->vbo_offset = 0; - r300render->vbo = pipe_buffer_create(screen, - 64, - PIPE_BUFFER_USAGE_VERTEX, - r300render->vbo_size); - r300render->vbo_map = pipe_buffer_map(screen, - r300render->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); - pipe_buffer_unmap(screen, r300render->vbo); */ - return &r300render->base; } -- cgit v1.2.3 From 04fe31cd5efc5703b9cd975391a992866432f59d Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 20 Mar 2009 00:15:03 -0700 Subject: r300-gallium: Properly offset scissors. As per r300_reg, classic Mesa, and xf86-video-ati. --- src/gallium/drivers/r300/r300_state.c | 22 ++++++++++++++++------ src/gallium/drivers/r300/r300_surface.c | 12 ++++++++++-- 2 files changed, 26 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 58bce22fc8..2a026e7fca 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -515,12 +515,22 @@ static void r300_set_scissor_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); draw_flush(r300->draw); - r300->scissor_state->scissor_top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - r300->scissor_state->scissor_bottom_right = - (state->maxx << R300_SCISSORS_X_SHIFT) | - (state->maxy << R300_SCISSORS_Y_SHIFT); + if (r300_screen(r300->context.screen)->caps->is_r500) { + r300->scissor_state->scissor_top_left = + (state->minx << R300_SCISSORS_X_SHIFT) | + (state->miny << R300_SCISSORS_Y_SHIFT); + r300->scissor_state->scissor_bottom_right = + (state->maxx << R300_SCISSORS_X_SHIFT) | + (state->maxy << R300_SCISSORS_Y_SHIFT); + } else { + /* Offset of 1440 in non-R500 chipsets. */ + r300->scissor_state->scissor_top_left = + ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); + r300->scissor_state->scissor_bottom_right = + ((state->maxx + 1440) << R300_SCISSORS_X_SHIFT) | + ((state->maxy + 1440) << R300_SCISSORS_Y_SHIFT); + } r300->dirty_state |= R300_NEW_SCISSOR; } diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 2cc0677e52..3672f60b1b 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -80,8 +80,16 @@ static void r300_surface_fill(struct pipe_context* pipe, /* Pixel scissors */ OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT)); - OUT_CS((w << R300_SCISSORS_X_SHIFT) | (h << R300_SCISSORS_Y_SHIFT)); + if (caps->is_r500) { + OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT)); + OUT_CS((w << R300_SCISSORS_X_SHIFT) | (h << R300_SCISSORS_Y_SHIFT)); + } else { + /* Non-R500 chipsets have an offset of 1440 in their scissors. */ + OUT_CS(((x + 1440) << R300_SCISSORS_X_SHIFT) | + ((y + 1440) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((w + 1440) << R300_SCISSORS_X_SHIFT) | + ((h + 1440) << R300_SCISSORS_Y_SHIFT)); + } /* The size of the point we're about to draw, in sixths of pixels */ OUT_CS_REG(R300_GA_POINT_SIZE, -- cgit v1.2.3 From f1429580848b471c487e55a9a81b904452f50df5 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 20 Mar 2009 00:35:38 -0700 Subject: r300-gallium: Clean up surface_fill, prep for surface_copy code. --- src/gallium/drivers/r300/r300_surface.c | 121 ++++++++++++++------------------ 1 file changed, 54 insertions(+), 67 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 3672f60b1b..86fe3fc4f9 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -23,6 +23,55 @@ #include "r300_surface.h" +static void r300_surface_setup(struct pipe_context* pipe, + struct pipe_surface* dest, + unsigned x, unsigned y, + unsigned w, unsigned h) +{ + struct r300_context* r300 = r300_context(pipe); + CS_LOCALS(r300); + struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; + struct r300_texture* tex = (struct r300_texture*)dest->texture; + unsigned pixpitch = tex->stride / tex->tex.block.size; + + r300_emit_blend_state(r300, &blend_clear_state); + r300_emit_blend_color_state(r300, &blend_color_clear_state); + r300_emit_dsa_state(r300, &dsa_clear_state); + r300_emit_rs_state(r300, &rs_clear_state); + + BEGIN_CS(15); + + /* Pixel scissors. */ + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + if (caps->is_r500) { + OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT)); + OUT_CS((w << R300_SCISSORS_X_SHIFT) | (h << R300_SCISSORS_Y_SHIFT)); + } else { + /* Non-R500 chipsets have an offset of 1440 in their scissors. */ + OUT_CS(((x + 1440) << R300_SCISSORS_X_SHIFT) | + ((y + 1440) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((w + 1440) << R300_SCISSORS_X_SHIFT) | + ((h + 1440) << R300_SCISSORS_Y_SHIFT)); + } + + /* Flush colorbuffer and blend caches. */ + OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL); + OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + + /* Setup colorbuffer. */ + OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch | + r300_translate_colorformat(tex->tex.format)); + OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0xf); + + END_CS; +} + /* Provides pipe_context's "surface_fill". Commonly used for clearing * buffers. */ static void r300_surface_fill(struct pipe_context* pipe, @@ -53,10 +102,7 @@ static void r300_surface_fill(struct pipe_context* pipe, return; } - r300_emit_blend_state(r300, &blend_clear_state); - r300_emit_blend_color_state(r300, &blend_color_clear_state); - r300_emit_dsa_state(r300, &dsa_clear_state); - r300_emit_rs_state(r300, &rs_clear_state); + r300_surface_setup(r300, dest, x, y, w, h); /* Fragment shader setup */ if (caps->is_r500) { @@ -67,7 +113,7 @@ static void r300_surface_fill(struct pipe_context* pipe, r300_emit_rs_block_state(r300, &r300_rs_block_clear_state); } - BEGIN_CS(36); + BEGIN_CS(21); /* Viewport setup */ OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); @@ -78,37 +124,11 @@ static void r300_surface_fill(struct pipe_context* pipe, OUT_CS_32F(1.0); OUT_CS_32F(0.0); - /* Pixel scissors */ - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - if (caps->is_r500) { - OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT)); - OUT_CS((w << R300_SCISSORS_X_SHIFT) | (h << R300_SCISSORS_Y_SHIFT)); - } else { - /* Non-R500 chipsets have an offset of 1440 in their scissors. */ - OUT_CS(((x + 1440) << R300_SCISSORS_X_SHIFT) | - ((y + 1440) << R300_SCISSORS_Y_SHIFT)); - OUT_CS(((w + 1440) << R300_SCISSORS_X_SHIFT) | - ((h + 1440) << R300_SCISSORS_Y_SHIFT)); - } - /* The size of the point we're about to draw, in sixths of pixels */ OUT_CS_REG(R300_GA_POINT_SIZE, ((h * 6) & R300_POINTSIZE_Y_MASK) | ((w * 6) << R300_POINTSIZE_X_SHIFT)); - /* Flush colorbuffer and blend caches. */ - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, - R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D | - R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL); - OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); - - OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); - OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch | - r300_translate_colorformat(tex->tex.format)); - OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F); /* XXX Packet3 */ OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | @@ -151,47 +171,14 @@ static void r300_surface_copy(struct pipe_context* pipe, " dimensions %dx%d (pixel pitch %d)\n", src, srcx, srcy, dest, destx, desty, w, h, pixpitch); + /* if ((srctex == desttex) && + ((destx < srcx + w) || (srcx < destx + w)) && + ((desty < srcy + h) || (srcy < destx + h))) { */ if (TRUE) { debug_printf("r300: Falling back on surface_copy\n"); return util_surface_copy(pipe, FALSE, dest, destx, desty, src, srcx, srcy, w, h); } -#if 0 - BEGIN_CS(); - OUT_CS_REG(RADEON_DEFAULT_SC_BOTTOM_RIGHT,(RADEON_DEFAULT_SC_RIGHT_MAX | - RADEON_DEFAULT_SC_BOTTOM_MAX)); - OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, (RADEON_GMC_DST_PITCH_OFFSET_CNTL | - RADEON_GMC_SRC_PITCH_OFFSET_CNTL | - RADEON_GMC_BRUSH_NONE | - (datatype << 8) | - RADEON_GMC_SRC_DATATYPE_COLOR | - RADEON_ROP[rop].rop | - RADEON_DP_SRC_SOURCE_MEMORY | - RADEON_GMC_CLR_CMP_CNTL_DIS)); - OUT_CS_REG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff); - OUT_CS_REG(RADEON_DP_BRUSH_BKGD_CLR, 0x0); - OUT_CS_REG(RADEON_DP_SRC_FRGD_CLR, 0xffffffff); - OUT_CS_REG(RADEON_DP_SRC_BKGD_CLR, 0x0); - OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, planemask); - OUT_ACCEL_REG(RADEON_DP_CNTL, ((info->accel_state->xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) | - (info->accel_state->ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0)); -); - - OUT_CS_REG_SEQ(RADEON_DST_PITCH_OFFSET, 1); - OUT_CS_RELOC(desttex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - - OUT_CS_REG_SEQ(RADEON_SRC_PITCH_OFFSET, 1); - OUT_CS_RELOC(srctex->buffer, 0, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); - - OUT_CS_REG(RADEON_SRC_Y_X, (srcy << 16) | srcx); - OUT_CS_REG(RADEON_DST_Y_X, (desty << 16) | destx); - OUT_CS_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w); - OUT_CS_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL); - OUT_CS_REG(RADEON_WAIT_UNTIL, - RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); - END_CS; -#endif } void r300_init_surface_functions(struct r300_context* r300) -- cgit v1.2.3 From 8066edb2a254d15ed92c2d350a7799adf3cca0d7 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 20 Mar 2009 00:43:29 -0700 Subject: r300-gallium: Simplify/neaten up packet3. Deck chairs on the Hindenburg. :3 --- src/gallium/drivers/r300/r300_cs.h | 3 --- src/gallium/drivers/r300/r300_cs_inlines.h | 9 +++++++++ src/gallium/drivers/r300/r300_surface.c | 4 ++-- src/gallium/drivers/r300/r300_swtcl_emit.c | 8 ++++---- 4 files changed, 15 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index d8038ff1e1..443dfc0233 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -47,9 +47,6 @@ #define CP_PACKET0(register, count) \ (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2)) -#define CP_PACKET3(op, count) \ - (RADEON_CP_PACKET3 | (op) | ((count) << 16)) - #define CS_LOCALS(context) \ struct r300_winsys* cs_winsys = context->winsys; \ struct radeon_cs* cs = cs_winsys->cs; \ diff --git a/src/gallium/drivers/r300/r300_cs_inlines.h b/src/gallium/drivers/r300/r300_cs_inlines.h index 03bb608eb9..64bd58193a 100644 --- a/src/gallium/drivers/r300/r300_cs_inlines.h +++ b/src/gallium/drivers/r300/r300_cs_inlines.h @@ -36,15 +36,24 @@ OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ } while (0) +/* XXX might no longer be needed */ #define R300_PACIFY do { \ OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 14) | (1 << 15) | (1 << 16) | (1 << 17) | \ (1 << 18)); \ } while (0) +/* XXX do we still use this? */ #define R300_SCREENDOOR do { \ OUT_CS_REG(R300_SC_SCREENDOOR, 0x0); \ R300_PACIFY; \ OUT_CS_REG(R300_SC_SCREENDOOR, 0xffffff); \ } while (0) +#define CP_PACKET3(op, count) \ + (RADEON_CP_PACKET3 | (op) | ((count) << 16)) + +#define R300_CS_PKT3(op, count) do { \ + OUT_CS(CP_PACKET3(op, count)); \ +} while (0) + #endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 86fe3fc4f9..db18975a10 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -129,8 +129,8 @@ static void r300_surface_fill(struct pipe_context* pipe, ((h * 6) & R300_POINTSIZE_Y_MASK) | ((w * 6) << R300_POINTSIZE_X_SHIFT)); - /* XXX Packet3 */ - OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); + /* Packet3 with our point vertex */ + OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 8); OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | (1 << R300_PRIM_NUM_VERTICES_SHIFT)); OUT_CS_32F(w / 2.0); diff --git a/src/gallium/drivers/r300/r300_swtcl_emit.c b/src/gallium/drivers/r300/r300_swtcl_emit.c index c82ee9c087..83c25f496b 100644 --- a/src/gallium/drivers/r300/r300_swtcl_emit.c +++ b/src/gallium/drivers/r300/r300_swtcl_emit.c @@ -194,7 +194,7 @@ static void prepare_render(struct r300_swtcl_render* render, unsigned count) * VBPNTR [relocated BO] */ BEGIN_CS(7); - OUT_CS(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 3)); + OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); OUT_CS(1); OUT_CS(r300->vertex_info.vinfo.size | (r300->vertex_info.vinfo.size << 8)); @@ -219,7 +219,7 @@ static void r300_swtcl_render_draw_arrays(struct vbuf_render* render, debug_printf("r300: Doing vbuf render, count %d\n", count); BEGIN_CS(2); - OUT_CS(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0)); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | r300render->hwprim); END_CS; @@ -254,11 +254,11 @@ static void r300_swtcl_render_draw(struct vbuf_render* render, debug_printf("r300: Doing indexbuf render, count %d\n", count); BEGIN_CS(5); - OUT_CS(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0)); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300render->hwprim | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - OUT_CS(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2)); + OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2)); OUT_CS_RELOC(index_buffer, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; -- cgit v1.2.3 From edfaa686091a4f6238b8f315a475d90ff2c2f5f5 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 20 Mar 2009 00:48:53 -0700 Subject: r300-gallium: Put r300_cs_inlines to bed. Guess it was a mistake in the first place. Oops. --- src/gallium/drivers/r300/r300_cs.h | 17 ++++++++- src/gallium/drivers/r300/r300_cs_inlines.h | 59 ------------------------------ 2 files changed, 16 insertions(+), 60 deletions(-) delete mode 100644 src/gallium/drivers/r300/r300_cs_inlines.h (limited to 'src') diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 443dfc0233..2b9a441147 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -115,6 +115,21 @@ cs_winsys->flush_cs(cs); \ } while (0) -#include "r300_cs_inlines.h" +#define RADEON_ONE_REG_WR (1 << 15) + +#define OUT_CS_ONE_REG(register, count) do { \ + if (VERY_VERBOSE_REGISTERS) \ + debug_printf("r300: writing data sequence of %d to 0x%04X\n", \ + count, register); \ + assert(register); \ + OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ +} while (0) + +#define CP_PACKET3(op, count) \ + (RADEON_CP_PACKET3 | (op) | ((count) << 16)) + +#define R300_CS_PKT3(op, count) do { \ + OUT_CS(CP_PACKET3(op, count)); \ +} while (0) #endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_cs_inlines.h b/src/gallium/drivers/r300/r300_cs_inlines.h deleted file mode 100644 index 64bd58193a..0000000000 --- a/src/gallium/drivers/r300/r300_cs_inlines.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -/* r300_cs_inlines: This is just a handful of useful inlines for sending - * (very) common instructions to the CS buffer. Should only be included from - * r300_cs.h, probably. */ - -#ifdef R300_CS_H - -#define RADEON_ONE_REG_WR (1 << 15) - -#define OUT_CS_ONE_REG(register, count) do { \ - if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing data sequence of %d to 0x%04X\n", \ - count, register); \ - assert(register); \ - OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ -} while (0) - -/* XXX might no longer be needed */ -#define R300_PACIFY do { \ - OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 14) | (1 << 15) | (1 << 16) | (1 << 17) | \ - (1 << 18)); \ -} while (0) - -/* XXX do we still use this? */ -#define R300_SCREENDOOR do { \ - OUT_CS_REG(R300_SC_SCREENDOOR, 0x0); \ - R300_PACIFY; \ - OUT_CS_REG(R300_SC_SCREENDOOR, 0xffffff); \ -} while (0) - -#define CP_PACKET3(op, count) \ - (RADEON_CP_PACKET3 | (op) | ((count) << 16)) - -#define R300_CS_PKT3(op, count) do { \ - OUT_CS(CP_PACKET3(op, count)); \ -} while (0) - -#endif /* R300_CS_H */ -- cgit v1.2.3 From f411a66c0679c1aa7a9ee3d1eb633a8cbf3ef5f2 Mon Sep 17 00:00:00 2001 From: Corbin Simpson Date: Fri, 20 Mar 2009 14:47:49 -0700 Subject: r300-gallium: Misspelled macro name. *pulls paper bag down over head* --- src/gallium/drivers/r300/r300_cs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index 2b9a441147..9913678d27 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -128,7 +128,7 @@ #define CP_PACKET3(op, count) \ (RADEON_CP_PACKET3 | (op) | ((count) << 16)) -#define R300_CS_PKT3(op, count) do { \ +#define OUT_CS_PKT3(op, count) do { \ OUT_CS(CP_PACKET3(op, count)); \ } while (0) -- cgit v1.2.3 From 12256fc2b2e0a54db24210a4b86f6fb5919d0fe8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 20 Mar 2009 17:08:30 -0600 Subject: mesa: linear scan register allocation for shader programs This is a check-point commit; not turned on yet. Use the linear scan register allocation algorithm to re-allocate temporary registers. This is done by computing the live intervals for registers and reallocating temps with that information. For some shaders this dramatically reduces the number of temp registers needed. For the time being we give up on a few cases such as relative-indexed temps and subroutine calls (but we inline most GLSL functions anyway). --- src/mesa/shader/prog_optimize.c | 428 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 407 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/mesa/shader/prog_optimize.c b/src/mesa/shader/prog_optimize.c index ec06da141d..458a69f70b 100644 --- a/src/mesa/shader/prog_optimize.c +++ b/src/mesa/shader/prog_optimize.c @@ -33,6 +33,9 @@ #include "prog_print.h" +#define MAX_LOOP_NESTING 50 + + static GLboolean dbg = GL_FALSE; @@ -75,6 +78,37 @@ remove_instructions(struct gl_program *prog, const GLboolean *removeFlags) } +/** + * Remap register indexes according to map. + * \param prog the program to search/replace + * \param file the type of register file to search/replace + * \param map maps old register indexes to new indexes + */ +static void +replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[]) +{ + GLuint i; + + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = prog->Instructions + i; + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == file) { + GLuint index = inst->SrcReg[j].Index; + ASSERT(map[index] >= 0); + inst->SrcReg[j].Index = map[index]; + } + } + if (inst->DstReg.File == file) { + const GLuint index = inst->DstReg.Index; + ASSERT(map[index] >= 0); + inst->DstReg.Index = map[index]; + } + } +} + + /** * Consolidate temporary registers to use low numbers. For example, if the * shader only uses temps 4, 5, 8, replace them with 0, 1, 2. @@ -83,7 +117,7 @@ static void _mesa_consolidate_registers(struct gl_program *prog) { GLboolean tempUsed[MAX_PROGRAM_TEMPS]; - GLuint tempMap[MAX_PROGRAM_TEMPS]; + GLint tempMap[MAX_PROGRAM_TEMPS]; GLuint tempMax = 0, i; if (dbg) { @@ -92,6 +126,10 @@ _mesa_consolidate_registers(struct gl_program *prog) memset(tempUsed, 0, sizeof(tempUsed)); + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + tempMap[i] = -1; + } + /* set tempUsed[i] if temporary [i] is referenced */ for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; @@ -132,26 +170,8 @@ _mesa_consolidate_registers(struct gl_program *prog) } } - /* now replace occurances of old temp indexes with new indexes */ - for (i = 0; i < prog->NumInstructions; i++) { - struct prog_instruction *inst = prog->Instructions + i; - const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); - GLuint j; - for (j = 0; j < numSrc; j++) { - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { - GLuint index = inst->SrcReg[j].Index; - assert(index <= tempMax); - assert(tempUsed[index]); - inst->SrcReg[j].Index = tempMap[index]; - } - } - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - const GLuint index = inst->DstReg.Index; - assert(tempUsed[index]); - assert(index <= tempMax); - inst->DstReg.Index = tempMap[index]; - } - } + replace_regs(prog, PROGRAM_TEMPORARY, tempMap); + if (dbg) { _mesa_printf("Optimize: End register consolidation\n"); } @@ -409,6 +429,370 @@ _mesa_remove_extra_moves(struct gl_program *prog) } +/** A live register interval */ +struct interval +{ + GLuint Reg; /** The temporary register index */ + GLuint Start, End; /** Start/end instruction numbers */ +}; + + +/** A list of register intervals */ +struct interval_list +{ + GLuint Num; + struct interval Intervals[MAX_PROGRAM_TEMPS]; +}; + + +static void +append_interval(struct interval_list *list, const struct interval *inv) +{ + list->Intervals[list->Num++] = *inv; +} + + +/** Insert interval inv into list, sorted by interval end */ +static void +insert_interval_by_end(struct interval_list *list, const struct interval *inv) +{ + /* XXX we could do a binary search insertion here since list is sorted */ + GLint i = list->Num - 1; + while (i >= 0 && list->Intervals[i].End > inv->End) { + list->Intervals[i + 1] = list->Intervals[i]; + i--; + } + list->Intervals[i + 1] = *inv; + list->Num++; + +#ifdef DEBUG + { + GLuint i; + for (i = 0; i + 1 < list->Num; i++) { + ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End); + } + } +#endif +} + + +/** Remove the given interval from the interval list */ +static void +remove_interval(struct interval_list *list, const struct interval *inv) +{ + /* XXX we could binary search since list is sorted */ + GLuint k; + for (k = 0; k < list->Num; k++) { + if (list->Intervals[k].Reg == inv->Reg) { + /* found, remove it */ + ASSERT(list->Intervals[k].Start == inv->Start); + ASSERT(list->Intervals[k].End == inv->End); + while (k < list->Num - 1) { + list->Intervals[k] = list->Intervals[k + 1]; + k++; + } + list->Num--; + return; + } + } +} + + +/** called by qsort() */ +static int +compare_start(const void *a, const void *b) +{ + const struct interval *ia = (const struct interval *) a; + const struct interval *ib = (const struct interval *) b; + if (ia->Start < ib->Start) + return -1; + else if (ia->Start > ib->Start) + return +1; + else + return 0; +} + +/** sort the interval list according to interval starts */ +static void +sort_interval_list_by_start(struct interval_list *list) +{ + qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start); +#ifdef DEBUG + { + GLuint i; + for (i = 0; i + 1 < list->Num; i++) { + ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start); + } + } +#endif +} + + +/** + * Update the intermediate interval info for register 'index' and + * instruction 'ic'. + */ +static void +update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic) +{ + ASSERT(index < MAX_PROGRAM_TEMPS); + if (intBegin[index] == -1) { + ASSERT(intEnd[index] == -1); + intBegin[index] = intEnd[index] = ic; + } + else { + intEnd[index] = ic; + } +} + + +/** + * Find the live intervals for each temporary register in the program. + * For register R, the interval [A,B] indicates that R is referenced + * from instruction A through instruction B. + * Special consideration is needed for loops and subroutines. + * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason + */ +static GLboolean +find_live_intervals(struct gl_program *prog, + struct interval_list *liveIntervals) +{ + struct loop_info + { + GLuint Start, End; /**< Start, end instructions of loop */ + }; + struct loop_info loopStack[MAX_LOOP_NESTING]; + GLuint loopStackDepth = 0; + GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS]; + GLuint i; + + /* + * Note: we'll return GL_FALSE below if we find relative indexing + * into the TEMP register file. We can't handle that yet. + * We also give up on subroutines for now. + */ + + if (dbg) { + _mesa_printf("Optimize: Begin find intervals\n"); + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + intBegin[i] = intEnd[i] = -1; + } + + /* Scan instructions looking for temporary registers */ + for (i = 0; i < prog->NumInstructions; i++) { + const struct prog_instruction *inst = prog->Instructions + i; + if (inst->Opcode == OPCODE_BGNLOOP) { + loopStack[loopStackDepth].Start = i; + loopStack[loopStackDepth].End = inst->BranchTarget; + loopStackDepth++; + } + else if (inst->Opcode == OPCODE_ENDLOOP) { + loopStackDepth--; + } + else if (inst->Opcode == OPCODE_CAL) { + return GL_FALSE; + } + else { + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + const GLuint index = inst->SrcReg[j].Index; + if (inst->SrcReg[j].RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + const GLuint index = inst->DstReg.Index; + if (inst->DstReg.RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + } + + /* Build live intervals list from intermediate arrays */ + liveIntervals->Num = 0; + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (intBegin[i] >= 0) { + struct interval inv; + inv.Reg = i; + inv.Start = intBegin[i]; + inv.End = intEnd[i]; + append_interval(liveIntervals, &inv); + } + } + + /* Sort the list according to interval starts */ + sort_interval_list_by_start(liveIntervals); + + if (dbg) { + /* print interval info */ + for (i = 0; i < liveIntervals->Num; i++) { + const struct interval *inv = liveIntervals->Intervals + i; + _mesa_printf("Reg[%d] live [%d, %d]:", + inv->Reg, inv->Start, inv->End); + if (1) { + int j; + for (j = 0; j < inv->Start; j++) + _mesa_printf(" "); + for (j = inv->Start; j <= inv->End; j++) + _mesa_printf("x"); + } + _mesa_printf("\n"); + } + } + + return GL_TRUE; +} + + +static GLuint +alloc_register(GLboolean usedRegs[MAX_PROGRAM_TEMPS]) +{ + GLuint k; + for (k = 0; k < MAX_PROGRAM_TEMPS; k++) { + if (!usedRegs[k]) { + usedRegs[k] = GL_TRUE; + return k; + } + } + return MAX_PROGRAM_TEMPS; +} + + +/** + * This function implements "Linear Scan Register Allocation" to reduce + * the number of temporary registers used by the program. + * + * We compute the "live interval" for all temporary registers then + * examine the overlap of the intervals to allocate new registers. + * Basically, if two intervals do not overlap, they can use the same register. + */ +static void +_mesa_reallocate_registers(struct gl_program *prog) +{ + struct interval_list liveIntervals; + GLint registerMap[MAX_PROGRAM_TEMPS]; + GLboolean usedRegs[MAX_PROGRAM_TEMPS]; + GLuint i; + GLuint maxTemp = 0; + + if (dbg) { + _mesa_printf("Optimize: Begin live-interval register reallocation\n"); + _mesa_print_program(prog); + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + registerMap[i] = -1; + usedRegs[i] = GL_FALSE; + } + + if (!find_live_intervals(prog, &liveIntervals)) { + if (dbg) + _mesa_printf("Aborting register reallocation\n"); + return; + } + + { + struct interval_list activeIntervals; + activeIntervals.Num = 0; + + /* loop over live intervals, allocating a new register for each */ + for (i = 0; i < liveIntervals.Num; i++) { + const struct interval *live = liveIntervals.Intervals + i; + + if (dbg) + _mesa_printf("Consider register %u\n", live->Reg); + + /* Expire old intervals. Intervals which have ended with respect + * to the live interval can have their remapped registers freed. + */ + { + GLint j; + for (j = 0; j < activeIntervals.Num; j++) { + const struct interval *inv = activeIntervals.Intervals + j; + if (inv->End >= live->Start) { + /* Stop now. Since the activeInterval list is sorted + * we know we don't have to go further. + */ + break; + } + else { + /* Interval 'inv' has expired */ + const GLint regNew = registerMap[inv->Reg]; + ASSERT(regNew >= 0); + + if (dbg) + _mesa_printf(" expire interval for reg %u\n", inv->Reg); + + /* remove interval j from active list */ + remove_interval(&activeIntervals, inv); + j--; /* counter-act j++ in for-loop above */ + + /* return register regNew to the free pool */ + if (dbg) + _mesa_printf(" free reg %d\n", regNew); + ASSERT(usedRegs[regNew] == GL_TRUE); + usedRegs[regNew] = GL_FALSE; + } + } + } + + /* find a free register for this live interval */ + { + const GLuint k = alloc_register(usedRegs); + if (k == MAX_PROGRAM_TEMPS) { + /* out of registers, give up */ + return; + } + registerMap[live->Reg] = k; + maxTemp = MAX2(maxTemp, k); + if (dbg) + _mesa_printf(" remap register %d -> %d\n", live->Reg, k); + } + + /* Insert this live interval into the active list which is sorted + * by increasing end points. + */ + insert_interval_by_end(&activeIntervals, live); + } + } + + if (maxTemp + 1 < liveIntervals.Num) { + /* OK, we've reduced the number of registers needed. + * Scan the program and replace all the old temporary register + * indexes with the new indexes. + */ + replace_regs(prog, PROGRAM_TEMPORARY, registerMap); + + prog->NumTemporaries = maxTemp + 1; + } + + if (dbg) { + _mesa_printf("Optimize: End live-interval register reallocation\n"); + _mesa_printf("Num temp regs before: %u after: %u\n", + liveIntervals.Num, maxTemp + 1); + _mesa_print_program(prog); + } +} + + + + /** * Apply optimizations to the given program to eliminate unnecessary * instructions, temp regs, etc. @@ -424,4 +808,6 @@ _mesa_optimize_program(GLcontext *ctx, struct gl_program *program) if (1) _mesa_consolidate_registers(program); + else /*NEW*/ + _mesa_reallocate_registers(program); } -- cgit v1.2.3 From 1f45ae0813f72fa92f52e0ebc440922362dc7cce Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 20 Mar 2009 20:15:58 -0600 Subject: mesa: add new internal state var for window size Actually, window width - 1, height - 1 --- src/mesa/shader/prog_statevars.c | 13 +++++++++++++ src/mesa/shader/prog_statevars.h | 1 + 2 files changed, 14 insertions(+) (limited to 'src') diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c index f51d9e2651..aeb7cf6de2 100644 --- a/src/mesa/shader/prog_statevars.c +++ b/src/mesa/shader/prog_statevars.c @@ -506,6 +506,13 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], } } return; + case STATE_FB_SIZE: + value[0] = (GLfloat) (ctx->DrawBuffer->Width - 1); + value[1] = (GLfloat) (ctx->DrawBuffer->Height - 1); + value[2] = 0.0F; + value[3] = 0.0F; + return; + case STATE_ROT_MATRIX_0: { const int unit = (int) state[2]; @@ -628,6 +635,9 @@ _mesa_program_state_flags(const gl_state_index state[STATE_LENGTH]) case STATE_PCM_BIAS: return _NEW_PIXEL; + case STATE_FB_SIZE: + return _NEW_BUFFERS; + default: /* unknown state indexes are silently ignored and * no flag set, since it is handled by the driver. @@ -828,6 +838,9 @@ append_token(char *dst, gl_state_index k) case STATE_SHADOW_AMBIENT: append(dst, "CompareFailValue"); break; + case STATE_FB_SIZE: + append(dst, "FbSize"); + break; case STATE_ROT_MATRIX_0: append(dst, "rotMatrixRow0"); break; diff --git a/src/mesa/shader/prog_statevars.h b/src/mesa/shader/prog_statevars.h index d563080db1..1180d9eaa4 100644 --- a/src/mesa/shader/prog_statevars.h +++ b/src/mesa/shader/prog_statevars.h @@ -117,6 +117,7 @@ typedef enum gl_state_index_ { STATE_PCM_SCALE, /**< Post color matrix RGBA scale */ STATE_PCM_BIAS, /**< Post color matrix RGBA bias */ STATE_SHADOW_AMBIENT, /**< ARB_shadow_ambient fail value; token[2] is texture unit index */ + STATE_FB_SIZE, /**< (width-1, height-1, 0, 0) */ STATE_ROT_MATRIX_0, /**< ATI_envmap_bumpmap, rot matrix row 0 */ STATE_ROT_MATRIX_1, /**< ATI_envmap_bumpmap, rot matrix row 1 */ STATE_INTERNAL_DRIVER /* first available state index for drivers (must be last) */ -- cgit v1.2.3 From 401cbd0d2365e5b2d371a2a01edf1cecca4a99dd Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 20 Mar 2009 20:25:34 -0600 Subject: gallium: remove use of origin_lower_left This was used to indicate OpenGL's lower-left origin for fragment window coordinates for polygon stipple and gl_FragCoord. Now: - fragment coordinate origin is always upper-left corner - GL polygon stipple is inverted and shifted before given to gallium - GL fragment programs that use INPUT[WPOS] are modified to use an inverted window coord which is placed in a temp register. Note: the origin_lower_left field still exists in pipe_rasterizer_state. Remove it when all the drivers, etc. no longer reference it. --- src/gallium/drivers/softpipe/sp_quad_stipple.c | 18 +-- src/gallium/drivers/softpipe/sp_setup.c | 13 +- src/mesa/state_tracker/st_atom_rasterizer.c | 1 + src/mesa/state_tracker/st_atom_stipple.c | 38 +++++- src/mesa/state_tracker/st_context.h | 3 +- src/mesa/state_tracker/st_mesa_to_tgsi.c | 179 ++++++++++++++++++++++--- 6 files changed, 203 insertions(+), 49 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index 05e862f097..07162db7b6 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -22,21 +22,11 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) if (quad->input.prim == QUAD_PRIM_TRI) { struct softpipe_context *softpipe = qs->softpipe; /* need to invert Y to index into OpenGL's stipple pattern */ - int y0, y1; - uint stipple0, stipple1; const int col0 = quad->input.x0 % 32; - - if (softpipe->rasterizer->origin_lower_left) { - y0 = softpipe->framebuffer.height - 1 - quad->input.y0; - y1 = y0 - 1; - } - else { - y0 = quad->input.y0; - y1 = y0 + 1; - } - - stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; - stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; + const int y0 = quad->input.y0; + const int y1 = y0 + 1; + const uint stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; + const uint stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; /* turn off quad mask bits that fail the stipple test */ if ((stipple0 & (bit31 >> col0)) == 0) diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 0925653b5d..96cb09b905 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -732,18 +732,9 @@ setup_fragcoord_coeff(struct setup_context *setup, uint slot) setup->coef[slot].dadx[0] = 1.0; setup->coef[slot].dady[0] = 0.0; /*Y*/ - if (setup->softpipe->rasterizer->origin_lower_left) { - /* y=0=bottom */ - const int winHeight = setup->softpipe->framebuffer.height; - setup->coef[slot].a0[1] = (float) (winHeight - 1); - setup->coef[slot].dady[1] = -1.0; - } - else { - /* y=0=top */ - setup->coef[slot].a0[1] = 0.0; - setup->coef[slot].dady[1] = 1.0; - } + setup->coef[slot].a0[1] = 0.0; setup->coef[slot].dadx[1] = 0.0; + setup->coef[slot].dady[1] = 1.0; /*Z*/ setup->coef[slot].a0[2] = setup->posCoef.a0[2]; setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index ea76487bcf..5bdcaa4a01 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -79,6 +79,7 @@ static void update_raster_state( struct st_context *st ) memset(raster, 0, sizeof(*raster)); + /* XXX obsolete field, remove someday */ raster->origin_lower_left = 1; /* Always true for OpenGL */ /* _NEW_POLYGON, _NEW_BUFFERS diff --git a/src/mesa/state_tracker/st_atom_stipple.c b/src/mesa/state_tracker/st_atom_stipple.c index f395930ab4..31e124b329 100644 --- a/src/mesa/state_tracker/st_atom_stipple.c +++ b/src/mesa/state_tracker/st_atom_stipple.c @@ -39,24 +39,52 @@ #include "pipe/p_defines.h" +/** + * OpenGL's polygon stipple is indexed with window coordinates in which + * the origin (0,0) is the lower-left corner of the window. + * With Gallium, the origin is the upper-left corner of the window. + * To convert GL's polygon stipple to what gallium expects we need to + * invert the pattern vertically and rotate the stipple rows according + * to the window height. + */ +static void +invert_stipple(GLuint dest[32], const GLuint src[32], GLuint winHeight) +{ + GLuint i; + + for (i = 0; i < 32; i++) { + dest[i] = src[(winHeight - 1 - i) & 0x1f]; + } +} + + + static void update_stipple( struct st_context *st ) { - const GLuint sz = sizeof(st->state.poly_stipple.stipple); + const GLuint sz = sizeof(st->state.poly_stipple); assert(sz == sizeof(st->ctx->PolygonStipple)); - if (memcmp(&st->state.poly_stipple.stipple, st->ctx->PolygonStipple, sz)) { + if (memcmp(st->state.poly_stipple, st->ctx->PolygonStipple, sz)) { /* state has changed */ - memcpy(st->state.poly_stipple.stipple, st->ctx->PolygonStipple, sz); - st->pipe->set_polygon_stipple(st->pipe, &st->state.poly_stipple); + struct pipe_poly_stipple newStipple; + + memcpy(st->state.poly_stipple, st->ctx->PolygonStipple, sz); + + invert_stipple(newStipple.stipple, st->ctx->PolygonStipple, + st->ctx->DrawBuffer->Height); + + st->pipe->set_polygon_stipple(st->pipe, &newStipple); } } +/** Update the stipple when the pattern or window height changes */ const struct st_tracked_state st_update_polygon_stipple = { "st_update_polygon_stipple", /* name */ { /* dirty */ - (_NEW_POLYGONSTIPPLE), /* mesa */ + (_NEW_POLYGONSTIPPLE | + _NEW_BUFFERS), /* mesa */ 0, /* st */ }, update_stipple /* update */ diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index d7518ab689..ae8c2978bf 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -93,12 +93,13 @@ struct st_context struct pipe_constant_buffer constants[2]; struct pipe_framebuffer_state framebuffer; struct pipe_texture *sampler_texture[PIPE_MAX_SAMPLERS]; - struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_viewport_state viewport; GLuint num_samplers; GLuint num_textures; + + GLuint poly_stipple[32]; /**< In OpenGL's bottom-to-top order */ } state; struct { diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index cbf3f334c0..ffa607dd87 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -219,8 +219,9 @@ compile_instruction( const GLuint immediateMapping[], GLboolean indirectAccess, GLuint preamble_size, - GLuint processor, - GLboolean *insideSubroutine) + GLuint procType, + GLboolean *insideSubroutine, + GLint wposTemp) { GLuint i; struct tgsi_full_dst_register *fulldst; @@ -247,19 +248,29 @@ compile_instruction( GLuint j; fullsrc = &fullinst->FullSrcRegisters[i]; - fullsrc->SrcRegister.File = map_register_file( - inst->SrcReg[i].File, - inst->SrcReg[i].Index, - immediateMapping, - indirectAccess ); - fullsrc->SrcRegister.Index = map_register_file_index( - fullsrc->SrcRegister.File, - inst->SrcReg[i].Index, - inputMapping, - outputMapping, - immediateMapping, - indirectAccess ); + if (procType == TGSI_PROCESSOR_FRAGMENT && + inst->SrcReg[i].File == PROGRAM_INPUT && + inst->SrcReg[i].Index == FRAG_ATTRIB_WPOS) { + /* special case of INPUT[WPOS] */ + fullsrc->SrcRegister.File = TGSI_FILE_TEMPORARY; + fullsrc->SrcRegister.Index = wposTemp; + } + else { + /* any other src register */ + fullsrc->SrcRegister.File = map_register_file( + inst->SrcReg[i].File, + inst->SrcReg[i].Index, + immediateMapping, + indirectAccess ); + fullsrc->SrcRegister.Index = map_register_file_index( + fullsrc->SrcRegister.File, + inst->SrcReg[i].Index, + inputMapping, + outputMapping, + immediateMapping, + indirectAccess ); + } /* swizzle (ext swizzle also depends on negation) */ { @@ -733,6 +744,111 @@ find_temporaries(const struct gl_program *program, } +/** + * Find an unused temporary in the tempsUsed array. + */ +static int +find_free_temporary(GLboolean tempsUsed[MAX_PROGRAM_TEMPS]) +{ + int i; + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (!tempsUsed[i]) { + tempsUsed[i] = GL_TRUE; + return i; + } + } + return -1; +} + + +/** helper for building simple TGSI instruction, one src register */ +static void +build_tgsi_instruction1(struct tgsi_full_instruction *inst, + int opcode, + int dstFile, int dstIndex, int writemask, + int srcFile1, int srcIndex1) +{ + *inst = tgsi_default_full_instruction(); + + inst->Instruction.Opcode = opcode; + + inst->Instruction.NumDstRegs = 1; + inst->FullDstRegisters[0].DstRegister.File = dstFile; + inst->FullDstRegisters[0].DstRegister.Index = dstIndex; + inst->FullDstRegisters[0].DstRegister.WriteMask = writemask; + + inst->Instruction.NumSrcRegs = 1; + inst->FullSrcRegisters[0].SrcRegister.File = srcFile1; + inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1; +} + + +/** helper for building simple TGSI instruction, two src registers */ +static void +build_tgsi_instruction2(struct tgsi_full_instruction *inst, + int opcode, + int dstFile, int dstIndex, int writemask, + int srcFile1, int srcIndex1, + int srcFile2, int srcIndex2) +{ + *inst = tgsi_default_full_instruction(); + + inst->Instruction.Opcode = opcode; + + inst->Instruction.NumDstRegs = 1; + inst->FullDstRegisters[0].DstRegister.File = dstFile; + inst->FullDstRegisters[0].DstRegister.Index = dstIndex; + inst->FullDstRegisters[0].DstRegister.WriteMask = writemask; + + inst->Instruction.NumSrcRegs = 2; + inst->FullSrcRegisters[0].SrcRegister.File = srcFile1; + inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1; + inst->FullSrcRegisters[1].SrcRegister.File = srcFile2; + inst->FullSrcRegisters[1].SrcRegister.Index = srcIndex2; +} + + + +/** + * Emit the TGSI instructions for inverting the WPOS y coordinate. + */ +static int +emit_inverted_wpos(struct tgsi_token *tokens, + int wpos_temp, + int winsize_const, + int wpos_input, + struct tgsi_header *header, int maxTokens) +{ + struct tgsi_full_instruction fullinst; + int ti = 0; + + /* MOV wpos_temp.xzw, input[wpos]; */ + build_tgsi_instruction1(&fullinst, + TGSI_OPCODE_MOV, + TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_XZW, + TGSI_FILE_INPUT, 0); + + ti += tgsi_build_full_instruction(&fullinst, + &tokens[ti], + header, + maxTokens - ti); + + /* SUB wpos_temp.y, const[winsize_const] - input[wpos_input]; */ + build_tgsi_instruction2(&fullinst, + TGSI_OPCODE_SUB, + TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_Y, + TGSI_FILE_CONSTANT, winsize_const, + TGSI_FILE_INPUT, wpos_input); + + ti += tgsi_build_full_instruction(&fullinst, + &tokens[ti], + header, + maxTokens - ti); + + return ti; +} + + /** @@ -778,16 +894,34 @@ st_translate_mesa_program( GLuint ti; /* token index */ struct tgsi_header *header; struct tgsi_processor *processor; - struct tgsi_full_instruction fullinst; GLuint preamble_size = 0; GLuint immediates[1000]; GLuint numImmediates = 0; GLboolean insideSubroutine = GL_FALSE; GLboolean indirectAccess = GL_FALSE; + GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1]; + GLint wposTemp = -1, winHeightConst = -1; assert(procType == TGSI_PROCESSOR_FRAGMENT || procType == TGSI_PROCESSOR_VERTEX); + find_temporaries(program, tempsUsed); + + if (procType == TGSI_PROCESSOR_FRAGMENT) { + if (program->InputsRead & FRAG_BIT_WPOS) { + /* Fragment program uses fragment position input. + * Need to replace instances of INPUT[WPOS] with temp T + * where T = INPUT[WPOS] by y is inverted. + */ + static const gl_state_index winSizeState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 }; + winHeightConst = _mesa_add_state_reference(program->Parameters, + winSizeState); + wposTemp = find_free_temporary(tempsUsed); + } + } + + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); header = (struct tgsi_header *) &tokens[1]; @@ -884,11 +1018,9 @@ st_translate_mesa_program( /* temporary decls */ { - GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1]; GLboolean inside_range = GL_FALSE; GLuint start_range = 0; - find_temporaries(program, tempsUsed); tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE; for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) { if (tempsUsed[i] && !inside_range) { @@ -1018,7 +1150,17 @@ st_translate_mesa_program( } } + /* invert WPOS fragment input */ + if (wposTemp >= 0) { + ti += emit_inverted_wpos(&tokens[ti], wposTemp, winHeightConst, + inputMapping[FRAG_ATTRIB_WPOS], + header, maxTokens - ti); + preamble_size = 2; /* two instructions added */ + } + for (i = 0; i < program->NumInstructions; i++) { + struct tgsi_full_instruction fullinst; + compile_instruction( &program->Instructions[i], &fullinst, @@ -1028,7 +1170,8 @@ st_translate_mesa_program( indirectAccess, preamble_size, procType, - &insideSubroutine ); + &insideSubroutine, + wposTemp); ti += tgsi_build_full_instruction( &fullinst, -- cgit v1.2.3 From d7b7b63bd7cca80e99ad9701f8b56ee365053647 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 21 Mar 2009 11:46:54 +0100 Subject: st: Silence compiler warnings. --- src/mesa/state_tracker/st_cb_texture.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index edfa8854d8..311d812ccf 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -467,7 +467,7 @@ st_TexImage(GLcontext * ctx, */ if (stObj->pt) { if (stObj->teximage_realloc || - level > stObj->pt->last_level || + level > (GLint) stObj->pt->last_level || (stObj->pt->last_level == level && stObj->pt->target != PIPE_TEXTURE_CUBE && !st_texture_match_image(stObj->pt, &stImage->base, @@ -803,7 +803,6 @@ st_TexSubimage(GLcontext * ctx, PIPE_TRANSFER_WRITE, xoffset, yoffset, width, height); - dstRowStride = stImage->transfer->stride; } if (!texImage->Data) { @@ -812,6 +811,7 @@ st_TexSubimage(GLcontext * ctx, } src = (const GLubyte *) pixels; + dstRowStride = stImage->transfer->stride; for (i = 0; i++ < depth;) { if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, -- cgit v1.2.3 From 1bb60d25e09d71861bdb4485378880140b65b062 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 21 Mar 2009 10:37:33 +0000 Subject: gallium: remove remaining references to origin_lower_left --- src/gallium/drivers/trace/tr_state.c | 1 - src/gallium/include/pipe/p_state.h | 1 - src/gallium/state_trackers/g3dvl/vl_context.c | 1 - src/mesa/state_tracker/st_atom_rasterizer.c | 3 --- 4 files changed, 6 deletions(-) (limited to 'src') diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index b6a1ce0d62..f9fbe9aee7 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -123,7 +123,6 @@ void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state) trace_dump_member(uint, state, line_stipple_pattern); trace_dump_member(bool, state, line_last_pixel); trace_dump_member(bool, state, bypass_vs_clip_and_viewport); - trace_dump_member(bool, state, origin_lower_left); trace_dump_member(bool, state, flatshade_first); trace_dump_member(bool, state, gl_rasterization_rules); diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index aad41fab11..9c7baa3d92 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -117,7 +117,6 @@ struct pipe_rasterizer_state */ unsigned bypass_vs_clip_and_viewport:1; - unsigned origin_lower_left:1; /**< Is (0,0) the lower-left corner? */ unsigned flatshade_first:1; /**< take color attribute from the first vertex of a primitive */ unsigned gl_rasterization_rules:1; /**< enable tweaks for GL rasterization? */ diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 1d8ad0b046..5cfd233c4c 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -42,7 +42,6 @@ static int vlInitCommon(struct vlContext *context) rast.line_stipple_pattern = 0; rast.line_last_pixel = 0; rast.bypass_vs_clip_and_viewport = 0; - rast.origin_lower_left = 0; rast.line_width = 1; rast.point_smooth = 0; rast.point_size = 1; diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index 5bdcaa4a01..61687fbc3e 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -79,9 +79,6 @@ static void update_raster_state( struct st_context *st ) memset(raster, 0, sizeof(*raster)); - /* XXX obsolete field, remove someday */ - raster->origin_lower_left = 1; /* Always true for OpenGL */ - /* _NEW_POLYGON, _NEW_BUFFERS */ { -- cgit v1.2.3 From 699897e81c623e53be51fba0488f535b0a8d7761 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 21 Mar 2009 12:18:09 +0100 Subject: tgsi: Document KIL, KILP instructions. --- src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index b83abd4093..5b21a2be0b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -382,7 +382,9 @@ TGSI Instruction Specification 1.5.7 KILP - Predicated Discard - TBD + if (cc.x || cc.y || cc.z || cc.w) + discard + endif 1.5.8 LG2 - Logarithm Base 2 @@ -599,7 +601,9 @@ TGSI Instruction Specification 1.8.2 KIL - Conditional Discard - TBD + if (src.x < 0.0 || src.y < 0.0 || src.z < 0.0 || src.w < 0.0) + discard + endif 1.8.3 SCS - Sine Cosine -- cgit v1.2.3