diff options
Diffstat (limited to 'src/mesa/drivers')
223 files changed, 5212 insertions, 3774 deletions
diff --git a/src/mesa/drivers/Android.mk b/src/mesa/drivers/Android.mk index 00de3c1a82..7fb540af11 100644 --- a/src/mesa/drivers/Android.mk +++ b/src/mesa/drivers/Android.mk @@ -1,15 +1,6 @@ -LOCAL_PATH := $(call my-dir) +# Android.mk for DRI drivers -# from dri/Makefile.template -COMMON_SOURCES = \ - dri/common/utils.c \ - dri/common/vblank.c \ - dri/common/dri_util.c \ - dri/common/xmlconfig.c \ - dri/common/texmem.c \ - dri/common/drirenderbuffer.c \ - dri/common/dri_metaops.c \ - common/driverfuncs.c +LOCAL_PATH := $(call my-dir) # from dri/i915/Makefile i915_DRIVER_SOURCES = \ @@ -59,6 +50,7 @@ i915_DRIVER_SOURCES = \ intel_fbo.c i915_DRIVER_SOURCES := $(addprefix dri/i915/, $(i915_DRIVER_SOURCES)) +# from dri/i965/Makefile i965_DRIVER_SOURCES = \ intel_batchbuffer.c \ intel_blit.c \ @@ -158,11 +150,22 @@ i965_CXX_SOURCES = \ brw_fs.cpp \ brw_fs_channel_expressions.cpp \ brw_fs_reg_allocate.cpp \ + brw_fs_schedule_instructions.cpp \ brw_fs_vector_splitting.cpp i965_DRIVER_SOURCES := \ $(addprefix dri/i965/, $(i965_DRIVER_SOURCES)) \ $(addprefix dri/i965/, $(i965_CXX_SOURCES)) +# from dri/Makefile.template +common_SOURCES = \ + dri/common/utils.c \ + dri/common/vblank.c \ + dri/common/dri_util.c \ + dri/common/xmlconfig.c \ + dri/common/texmem.c \ + dri/common/drirenderbuffer.c \ + common/driverfuncs.c + common_CFLAGS := \ -DPTHREADS \ -DFEATURE_GL=1 \ @@ -175,7 +178,6 @@ common_C_INCLUDES := \ external/mesa/include \ external/mesa/src/mapi \ external/mesa/src/glsl \ - external/mesa/src/talloc \ external/mesa/src/mesa \ external/mesa/src/mesa/drivers/dri/common \ external/drm/ \ @@ -185,27 +187,23 @@ common_C_INCLUDES := \ common_STATIC_LIBRARIES := \ libmesa_classic_mesa \ libmesa_glsl \ - libmesa_classic_mesa \ - libmesa_talloc \ - libmesa_classic_egl - -common_WHOLE_STATIC_LIBRARIES := \ - libmesa_egl \ - libmesa_glapi + libmesa_classic_mesa common_SHARED_LIBRARIES := \ + libglapi \ libdl \ libdrm \ libexpat \ - libhardware \ liblog \ libcutils +common_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/dri + ifeq ($(strip $(MESA_BUILD_I915C)),true) include $(CLEAR_VARS) LOCAL_SRC_FILES := \ - $(COMMON_SOURCES) \ + $(common_SOURCES) \ $(i915_DRIVER_SOURCES) LOCAL_CFLAGS := \ @@ -220,15 +218,12 @@ LOCAL_C_INCLUDES := \ LOCAL_STATIC_LIBRARIES := \ $(common_STATIC_LIBRARIES) -LOCAL_WHOLE_STATIC_LIBRARIES := \ - $(common_WHOLE_STATIC_LIBRARIES) - LOCAL_SHARED_LIBRARIES := \ $(common_SHARED_LIBRARIES) \ libdrm_intel -LOCAL_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/egl -LOCAL_MODULE := libGLES_i915c +LOCAL_MODULE := i915_dri +LOCAL_MODULE_PATH := $(common_MODULE_PATH) include $(BUILD_SHARED_LIBRARY) endif # MESA_BUILD_I915C @@ -237,7 +232,7 @@ ifeq ($(strip $(MESA_BUILD_I965C)),true) include $(CLEAR_VARS) LOCAL_SRC_FILES := \ - $(COMMON_SOURCES) \ + $(common_SOURCES) \ $(i965_DRIVER_SOURCES) LOCAL_CFLAGS := \ @@ -252,15 +247,12 @@ LOCAL_C_INCLUDES := \ LOCAL_STATIC_LIBRARIES := \ $(common_STATIC_LIBRARIES) -LOCAL_WHOLE_STATIC_LIBRARIES := \ - $(common_WHOLE_STATIC_LIBRARIES) - LOCAL_SHARED_LIBRARIES := \ $(common_SHARED_LIBRARIES) \ libdrm_intel -LOCAL_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/egl -LOCAL_MODULE := libGLES_i965c +LOCAL_MODULE := i965_dri +LOCAL_MODULE_PATH := $(common_MODULE_PATH) include $(BUILD_SHARED_LIBRARY) endif # MESA_BUILD_I965C diff --git a/src/mesa/drivers/beos/GLView.cpp b/src/mesa/drivers/beos/GLView.cpp index ee3415b3d1..57c4614f32 100644 --- a/src/mesa/drivers/beos/GLView.cpp +++ b/src/mesa/drivers/beos/GLView.cpp @@ -50,7 +50,6 @@ extern "C" { #include "swrast/s_depth.h" #include "swrast/s_lines.h" #include "swrast/s_triangle.h" -#include "swrast/s_trispan.h" #include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_pipeline.h" @@ -319,7 +318,8 @@ BGLView::BGLView(BRect rect, char *name, functions.Viewport = md->Viewport; // create core context - struct gl_context *ctx = _mesa_create_context(visual, NULL, &functions, md); + struct gl_context *ctx = _mesa_create_context(API_OPENGL, visual, + NULL, &functions, md); if (! ctx) { _mesa_destroy_visual(visual); delete md; diff --git a/src/mesa/drivers/common/driverfuncs.c b/src/mesa/drivers/common/driverfuncs.c index fc67bee98c..854dea9450 100644 --- a/src/mesa/drivers/common/driverfuncs.c +++ b/src/mesa/drivers/common/driverfuncs.c @@ -42,6 +42,7 @@ #include "main/fbobject.h" #include "main/texrender.h" #include "main/syncobj.h" +#include "main/texturebarrier.h" #include "main/transformfeedback.h" #include "program/program.h" @@ -184,9 +185,12 @@ _mesa_init_driver_functions(struct dd_function_table *driver) driver->RenderTexture = _mesa_render_texture; driver->FinishRenderTexture = _mesa_finish_render_texture; driver->FramebufferRenderbuffer = _mesa_framebuffer_renderbuffer; + driver->ValidateFramebuffer = _mesa_validate_framebuffer; driver->BlitFramebuffer = _swrast_BlitFramebuffer; + _mesa_init_texture_barrier_functions(driver); + /* APPLE_vertex_array_object */ driver->NewArrayObject = _mesa_new_array_object; driver->DeleteArrayObject = _mesa_delete_array_object; @@ -231,13 +235,14 @@ _mesa_init_driver_state(struct gl_context *ctx) ctx->Driver.BlendColor(ctx, ctx->Color.BlendColor); ctx->Driver.BlendEquationSeparate(ctx, - ctx->Color.BlendEquationRGB, - ctx->Color.BlendEquationA); + ctx->Color.Blend[0].EquationRGB, + ctx->Color.Blend[0].EquationA); ctx->Driver.BlendFuncSeparate(ctx, - ctx->Color.BlendSrcRGB, - ctx->Color.BlendDstRGB, - ctx->Color.BlendSrcA, ctx->Color.BlendDstA); + ctx->Color.Blend[0].SrcRGB, + ctx->Color.Blend[0].DstRGB, + ctx->Color.Blend[0].SrcA, + ctx->Color.Blend[0].DstA); if (ctx->Driver.ColorMaskIndexed) { GLuint i; diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 5e0e9a12df..f71aac5e54 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -48,6 +48,7 @@ #include "main/macros.h" #include "main/matrix.h" #include "main/mipmap.h" +#include "main/pbo.h" #include "main/polygon.h" #include "main/readpix.h" #include "main/scissor.h" @@ -266,7 +267,7 @@ struct gen_mipmap_state GLuint FBO; }; - +#define MAX_META_OPS_DEPTH 2 /** * State for glDrawTex() */ @@ -282,7 +283,10 @@ struct drawtex_state */ struct gl_meta_state { - struct save_state Save; /**< state saved during meta-ops */ + /** Stack of state saved during meta-ops */ + struct save_state Save[MAX_META_OPS_DEPTH]; + /** Save stack depth */ + GLuint SaveStackDepth; struct temp_texture TempTex; @@ -335,8 +339,13 @@ _mesa_meta_free(struct gl_context *ctx) static void _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) { - struct save_state *save = &ctx->Meta->Save; + struct save_state *save; + + /* hope MAX_META_OPS_DEPTH is large enough */ + assert(ctx->Meta->SaveStackDepth < MAX_META_OPS_DEPTH); + save = &ctx->Meta->Save[ctx->Meta->SaveStackDepth++]; + memset(save, 0, sizeof(*save)); save->SavedState = state; if (state & META_ALPHA_TEST) { @@ -586,7 +595,7 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state) static void _mesa_meta_end(struct gl_context *ctx) { - struct save_state *save = &ctx->Meta->Save; + struct save_state *save = &ctx->Meta->Save[--ctx->Meta->SaveStackDepth]; const GLbitfield state = save->SavedState; if (state & META_ALPHA_TEST) { @@ -2263,7 +2272,6 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, const GLuint maxLevel = texObj->MaxLevel; const GLenum minFilterSave = texObj->MinFilter; const GLenum magFilterSave = texObj->MagFilter; - const GLint baseLevelSave = texObj->BaseLevel; const GLint maxLevelSave = texObj->MaxLevel; const GLboolean genMipmapSave = texObj->GenerateMipmap; const GLenum wrapSSave = texObj->WrapS; @@ -2323,7 +2331,7 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, } _mesa_BindFramebufferEXT(GL_FRAMEBUFFER_EXT, mipmap->FBO); - _mesa_TexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + _mesa_TexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); _mesa_TexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); _mesa_TexParameteri(target, GL_GENERATE_MIPMAP, GL_FALSE); _mesa_TexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); @@ -2499,8 +2507,7 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, } } - /* limit sampling to src level */ - _mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, srcLevel); + /* limit minification to src level */ _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, srcLevel); /* Set to draw into the current dstLevel */ @@ -2552,7 +2559,6 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target, _mesa_TexParameteri(target, GL_TEXTURE_MIN_FILTER, minFilterSave); _mesa_TexParameteri(target, GL_TEXTURE_MAG_FILTER, magFilterSave); - _mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, baseLevelSave); _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, maxLevelSave); _mesa_TexParameteri(target, GL_GENERATE_MIPMAP, genMipmapSave); _mesa_TexParameteri(target, GL_TEXTURE_WRAP_S, wrapSSave); diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template index 4ecddbc048..d1a119379e 100644 --- a/src/mesa/drivers/dri/Makefile.template +++ b/src/mesa/drivers/dri/Makefile.template @@ -1,7 +1,5 @@ # -*-makefile-*- -MESA_MODULES = $(TOP)/src/mesa/libmesa.a - COMMON_GALLIUM_SOURCES = \ ../common/utils.c \ ../common/vblank.c \ @@ -11,8 +9,7 @@ COMMON_GALLIUM_SOURCES = \ COMMON_SOURCES = $(COMMON_GALLIUM_SOURCES) \ ../../common/driverfuncs.c \ ../common/texmem.c \ - ../common/drirenderbuffer.c \ - ../common/dri_metaops.c + ../common/drirenderbuffer.c INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES) @@ -39,13 +36,13 @@ CXXFLAGS += $(API_DEFINES) ##### RULES ##### .c.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + $(CC) -c $(INCLUDES) $(DRI_CFLAGS) $(DRIVER_DEFINES) $< -o $@ .cpp.o: - $(CC) -c $(INCLUDES) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ + $(CC) -c $(INCLUDES) $(DRI_CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ .S.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + $(CC) -c $(INCLUDES) $(DRI_CFLAGS) $(DRIVER_DEFINES) $< -o $@ ##### TARGETS ##### @@ -57,10 +54,10 @@ default: subdirs lib lib: symlinks subdirs depend @$(MAKE) $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME) -$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(EXTRA_MODULES) Makefile \ +$(LIBNAME): $(OBJECTS) $(EXTRA_MODULES) $(MESA_MODULES) Makefile \ $(TOP)/src/mesa/drivers/dri/Makefile.template $(TOP)/src/mesa/drivers/dri/common/dri_test.o $(MKLIB) -o $@.tmp -noprefix -linker '$(CXX)' -ldflags '$(LDFLAGS)' \ - $(OBJECTS) $(MESA_MODULES) $(EXTRA_MODULES) $(DRI_LIB_DEPS) + $(OBJECTS) $(EXTRA_MODULES) $(DRI_LIB_DEPS) $(CXX) $(CFLAGS) -o $@.test $(TOP)/src/mesa/drivers/dri/common/dri_test.o $@.tmp $(DRI_LIB_DEPS) @rm -f $@.test mv -f $@.tmp $@ diff --git a/src/mesa/drivers/dri/common/dri_metaops.c b/src/mesa/drivers/dri/common/dri_metaops.c deleted file mode 100644 index e0bc3b88ec..0000000000 --- a/src/mesa/drivers/dri/common/dri_metaops.c +++ /dev/null @@ -1,291 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * Copyright 2009 Intel Corporation. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "main/arbprogram.h" -#include "main/arrayobj.h" -#include "main/bufferobj.h" -#include "main/context.h" -#include "main/enable.h" -#include "main/matrix.h" -#include "main/texstate.h" -#include "main/varray.h" -#include "main/viewport.h" -#include "program/program.h" -#include "dri_metaops.h" - -void -meta_set_passthrough_transform(struct dri_metaops *meta) -{ - struct gl_context *ctx = meta->ctx; - - meta->saved_vp_x = ctx->Viewport.X; - meta->saved_vp_y = ctx->Viewport.Y; - meta->saved_vp_width = ctx->Viewport.Width; - meta->saved_vp_height = ctx->Viewport.Height; - meta->saved_matrix_mode = ctx->Transform.MatrixMode; - - meta->internal_viewport_call = GL_TRUE; - _mesa_Viewport(0, 0, ctx->DrawBuffer->Width, ctx->DrawBuffer->Height); - meta->internal_viewport_call = GL_FALSE; - - _mesa_MatrixMode(GL_PROJECTION); - _mesa_PushMatrix(); - _mesa_LoadIdentity(); - _mesa_Ortho(0, ctx->DrawBuffer->Width, 0, ctx->DrawBuffer->Height, 1, -1); - - _mesa_MatrixMode(GL_MODELVIEW); - _mesa_PushMatrix(); - _mesa_LoadIdentity(); -} - -void -meta_restore_transform(struct dri_metaops *meta) -{ - _mesa_MatrixMode(GL_PROJECTION); - _mesa_PopMatrix(); - _mesa_MatrixMode(GL_MODELVIEW); - _mesa_PopMatrix(); - - _mesa_MatrixMode(meta->saved_matrix_mode); - - meta->internal_viewport_call = GL_TRUE; - _mesa_Viewport(meta->saved_vp_x, meta->saved_vp_y, - meta->saved_vp_width, meta->saved_vp_height); - meta->internal_viewport_call = GL_FALSE; -} - - -/** - * Set up a vertex program to pass through the position and first texcoord - * for pixel path. - */ -void -meta_set_passthrough_vertex_program(struct dri_metaops *meta) -{ - struct gl_context *ctx = meta->ctx; - static const char *vp = - "!!ARBvp1.0\n" - "TEMP vertexClip;\n" - "DP4 vertexClip.x, state.matrix.mvp.row[0], vertex.position;\n" - "DP4 vertexClip.y, state.matrix.mvp.row[1], vertex.position;\n" - "DP4 vertexClip.z, state.matrix.mvp.row[2], vertex.position;\n" - "DP4 vertexClip.w, state.matrix.mvp.row[3], vertex.position;\n" - "MOV result.position, vertexClip;\n" - "MOV result.texcoord[0], vertex.texcoord[0];\n" - "MOV result.color, vertex.color;\n" - "END\n"; - - assert(meta->saved_vp == NULL); - - _mesa_reference_vertprog(ctx, &meta->saved_vp, - ctx->VertexProgram.Current); - if (meta->passthrough_vp == NULL) { - GLuint prog_name; - _mesa_GenPrograms(1, &prog_name); - _mesa_BindProgram(GL_VERTEX_PROGRAM_ARB, prog_name); - _mesa_ProgramStringARB(GL_VERTEX_PROGRAM_ARB, - GL_PROGRAM_FORMAT_ASCII_ARB, - strlen(vp), (const GLubyte *)vp); - _mesa_reference_vertprog(ctx, &meta->passthrough_vp, - ctx->VertexProgram.Current); - _mesa_DeletePrograms(1, &prog_name); - } - - FLUSH_VERTICES(ctx, _NEW_PROGRAM); - _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, - meta->passthrough_vp); - ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB, - &meta->passthrough_vp->Base); - - meta->saved_vp_enable = ctx->VertexProgram.Enabled; - _mesa_Enable(GL_VERTEX_PROGRAM_ARB); -} - -/** - * Restores the previous vertex program after - * meta_set_passthrough_vertex_program() - */ -void -meta_restore_vertex_program(struct dri_metaops *meta) -{ - struct gl_context *ctx = meta->ctx; - - FLUSH_VERTICES(ctx, _NEW_PROGRAM); - _mesa_reference_vertprog(ctx, &ctx->VertexProgram.Current, - meta->saved_vp); - _mesa_reference_vertprog(ctx, &meta->saved_vp, NULL); - ctx->Driver.BindProgram(ctx, GL_VERTEX_PROGRAM_ARB, - &ctx->VertexProgram.Current->Base); - - if (!meta->saved_vp_enable) - _mesa_Disable(GL_VERTEX_PROGRAM_ARB); -} - -/** - * Binds the given program string to GL_FRAGMENT_PROGRAM_ARB, caching the - * program object. - */ -void -meta_set_fragment_program(struct dri_metaops *meta, - struct gl_fragment_program **prog, - const char *prog_string) -{ - struct gl_context *ctx = meta->ctx; - assert(meta->saved_fp == NULL); - - _mesa_reference_fragprog(ctx, &meta->saved_fp, - ctx->FragmentProgram.Current); - if (*prog == NULL) { - GLuint prog_name; - _mesa_GenPrograms(1, &prog_name); - _mesa_BindProgram(GL_FRAGMENT_PROGRAM_ARB, prog_name); - _mesa_ProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, - GL_PROGRAM_FORMAT_ASCII_ARB, - strlen(prog_string), (const GLubyte *)prog_string); - _mesa_reference_fragprog(ctx, prog, ctx->FragmentProgram.Current); - /* Note that DeletePrograms unbinds the program on us */ - _mesa_DeletePrograms(1, &prog_name); - } - - FLUSH_VERTICES(ctx, _NEW_PROGRAM); - _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, *prog); - ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, &((*prog)->Base)); - - meta->saved_fp_enable = ctx->FragmentProgram.Enabled; - _mesa_Enable(GL_FRAGMENT_PROGRAM_ARB); -} - -/** - * Restores the previous fragment program after - * meta_set_fragment_program() - */ -void -meta_restore_fragment_program(struct dri_metaops *meta) -{ - struct gl_context *ctx = meta->ctx; - - FLUSH_VERTICES(ctx, _NEW_PROGRAM); - _mesa_reference_fragprog(ctx, &ctx->FragmentProgram.Current, - meta->saved_fp); - _mesa_reference_fragprog(ctx, &meta->saved_fp, NULL); - ctx->Driver.BindProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, - &ctx->FragmentProgram.Current->Base); - - if (!meta->saved_fp_enable) - _mesa_Disable(GL_FRAGMENT_PROGRAM_ARB); -} - -static const float default_texcoords[4][2] = { { 0.0, 0.0 }, - { 1.0, 0.0 }, - { 1.0, 1.0 }, - { 0.0, 1.0 } }; - -void -meta_set_default_texrect(struct dri_metaops *meta) -{ - struct gl_context *ctx = meta->ctx; - struct gl_client_array *old_texcoord_array; - - meta->saved_active_texture = ctx->Texture.CurrentUnit; - if (meta->saved_array_vbo == NULL) { - _mesa_reference_buffer_object(ctx, &meta->saved_array_vbo, - ctx->Array.ArrayBufferObj); - } - - old_texcoord_array = &ctx->Array.ArrayObj->TexCoord[0]; - meta->saved_texcoord_type = old_texcoord_array->Type; - meta->saved_texcoord_size = old_texcoord_array->Size; - meta->saved_texcoord_stride = old_texcoord_array->Stride; - meta->saved_texcoord_enable = old_texcoord_array->Enabled; - meta->saved_texcoord_ptr = old_texcoord_array->Ptr; - _mesa_reference_buffer_object(ctx, &meta->saved_texcoord_vbo, - old_texcoord_array->BufferObj); - - _mesa_ClientActiveTextureARB(GL_TEXTURE0); - - if (meta->texcoord_vbo == NULL) { - GLuint vbo_name; - - _mesa_GenBuffersARB(1, &vbo_name); - _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, vbo_name); - _mesa_BufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(default_texcoords), - default_texcoords, GL_STATIC_DRAW_ARB); - _mesa_reference_buffer_object(ctx, &meta->texcoord_vbo, - ctx->Array.ArrayBufferObj); - } else { - _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, - meta->texcoord_vbo->Name); - } - _mesa_TexCoordPointer(2, GL_FLOAT, 2 * sizeof(GLfloat), NULL); - - _mesa_Enable(GL_TEXTURE_COORD_ARRAY); -} - -void -meta_restore_texcoords(struct dri_metaops *meta) -{ - struct gl_context *ctx = meta->ctx; - - /* Restore the old TexCoordPointer */ - if (meta->saved_texcoord_vbo) { - _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, - meta->saved_texcoord_vbo->Name); - _mesa_reference_buffer_object(ctx, &meta->saved_texcoord_vbo, NULL); - } else { - _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); - } - - _mesa_TexCoordPointer(meta->saved_texcoord_size, - meta->saved_texcoord_type, - meta->saved_texcoord_stride, - meta->saved_texcoord_ptr); - if (!meta->saved_texcoord_enable) - _mesa_Disable(GL_TEXTURE_COORD_ARRAY); - - _mesa_ClientActiveTextureARB(GL_TEXTURE0 + - meta->saved_active_texture); - - if (meta->saved_array_vbo) { - _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, - meta->saved_array_vbo->Name); - _mesa_reference_buffer_object(ctx, &meta->saved_array_vbo, NULL); - } else { - _mesa_BindBufferARB(GL_ARRAY_BUFFER_ARB, 0); - } -} - - -void meta_init_metaops(struct gl_context *ctx, struct dri_metaops *meta) -{ - meta->ctx = ctx; -} - -void meta_destroy_metaops(struct dri_metaops *meta) -{ - -} diff --git a/src/mesa/drivers/dri/common/dri_metaops.h b/src/mesa/drivers/dri/common/dri_metaops.h deleted file mode 100644 index aa7d4baa6e..0000000000 --- a/src/mesa/drivers/dri/common/dri_metaops.h +++ /dev/null @@ -1,81 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * Copyright 2009 Intel Corporation. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef DRI_METAOPS_H -#define DRI_METAOPS_H - - -struct dri_metaops { - struct gl_context *ctx; - GLboolean internal_viewport_call; - struct gl_fragment_program *bitmap_fp; - struct gl_vertex_program *passthrough_vp; - struct gl_buffer_object *texcoord_vbo; - - struct gl_fragment_program *saved_fp; - GLboolean saved_fp_enable; - struct gl_vertex_program *saved_vp; - GLboolean saved_vp_enable; - - struct gl_fragment_program *tex2d_fp; - - GLboolean saved_texcoord_enable; - struct gl_buffer_object *saved_array_vbo, *saved_texcoord_vbo; - GLenum saved_texcoord_type; - GLsizei saved_texcoord_size, saved_texcoord_stride; - const void *saved_texcoord_ptr; - int saved_active_texture; - - GLint saved_vp_x, saved_vp_y; - GLsizei saved_vp_width, saved_vp_height; - GLenum saved_matrix_mode; -}; - - -void meta_set_passthrough_transform(struct dri_metaops *meta); - -void meta_restore_transform(struct dri_metaops *meta); - -void meta_set_passthrough_vertex_program(struct dri_metaops *meta); - -void meta_restore_vertex_program(struct dri_metaops *meta); - -void meta_set_fragment_program(struct dri_metaops *meta, - struct gl_fragment_program **prog, - const char *prog_string); - -void meta_restore_fragment_program(struct dri_metaops *meta); - -void meta_set_default_texrect(struct dri_metaops *meta); - -void meta_restore_texcoords(struct dri_metaops *meta); - -void meta_init_metaops(struct gl_context *ctx, struct dri_metaops *meta); -void meta_destroy_metaops(struct dri_metaops *meta); - -#endif diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index a5b71bd40a..82638fa720 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -481,6 +481,22 @@ dri2CreateNewDrawable(__DRIscreen *screen, return pdraw; } +static __DRIbuffer * +dri2AllocateBuffer(__DRIscreen *screen, + unsigned int attachment, unsigned int format, + int width, int height) +{ + return (*screen->DriverAPI.AllocateBuffer)(screen, attachment, format, + width, height); +} + +static void +dri2ReleaseBuffer(__DRIscreen *screen, __DRIbuffer *buffer) +{ + (*screen->DriverAPI.ReleaseBuffer)(screen, buffer); +} + + static int dri2ConfigQueryb(__DRIscreen *screen, const char *var, GLboolean *val) { @@ -790,6 +806,9 @@ driCreateNewScreen(int scrn, static const __DRIextension *emptyExtensionList[] = { NULL }; __DRIscreen *psp; + if (driDriverAPI.InitScreen == NULL) + return NULL; + psp = calloc(1, sizeof *psp); if (!psp) return NULL; @@ -927,7 +946,9 @@ const __DRIdri2Extension driDRI2Extension = { dri2CreateNewDrawable, dri2CreateNewContext, dri2GetAPIMask, - dri2CreateNewContextForAPI + dri2CreateNewContextForAPI, + dri2AllocateBuffer, + dri2ReleaseBuffer }; const __DRI2configQueryExtension dri2ConfigQueryExtension = { diff --git a/src/mesa/drivers/dri/common/dri_util.h b/src/mesa/drivers/dri/common/dri_util.h index ffffb99b30..3d3d5c9cd2 100644 --- a/src/mesa/drivers/dri/common/dri_util.h +++ b/src/mesa/drivers/dri/common/dri_util.h @@ -223,6 +223,12 @@ struct __DriverAPIRec { /* DRI2 Entry point */ const __DRIconfig **(*InitScreen2) (__DRIscreen * priv); + + __DRIbuffer *(*AllocateBuffer) (__DRIscreen *screenPrivate, + unsigned int attachment, + unsigned int format, + int width, int height); + void (*ReleaseBuffer) (__DRIscreen *screenPrivate, __DRIbuffer *buffer); }; extern const struct __DriverAPIRec driDriverAPI; diff --git a/src/mesa/drivers/dri/common/drisw_util.c b/src/mesa/drivers/dri/common/drisw_util.c index 1529c23b16..1bdb6d8939 100644 --- a/src/mesa/drivers/dri/common/drisw_util.c +++ b/src/mesa/drivers/dri/common/drisw_util.c @@ -121,6 +121,48 @@ driCreateNewContext(__DRIscreen *psp, const __DRIconfig *config, return pcp; } +static __DRIcontext * +driCreateNewContextForAPI(__DRIscreen *psp, int api, + const __DRIconfig *config, + __DRIcontext *shared, void *data) +{ + __DRIcontext *pcp; + void * const shareCtx = (shared != NULL) ? shared->driverPrivate : NULL; + gl_api mesa_api; + + switch (api) { + case __DRI_API_OPENGL: + mesa_api = API_OPENGL; + break; + case __DRI_API_GLES: + mesa_api = API_OPENGLES; + break; + case __DRI_API_GLES2: + mesa_api = API_OPENGLES2; + break; + default: + return NULL; + } + + pcp = CALLOC_STRUCT(__DRIcontextRec); + if (!pcp) + return NULL; + + pcp->loaderPrivate = data; + + pcp->driScreenPriv = psp; + pcp->driDrawablePriv = NULL; + pcp->driReadablePriv = NULL; + + if (!driDriverAPI.CreateContext(mesa_api, + &config->modes, pcp, shareCtx)) { + FREE(pcp); + return NULL; + } + + return pcp; +} + static void driDestroyContext(__DRIcontext *pcp) { @@ -269,5 +311,6 @@ const __DRIcoreExtension driCoreExtension = { const __DRIswrastExtension driSWRastExtension = { { __DRI_SWRAST, __DRI_SWRAST_VERSION }, driCreateNewScreen, - driCreateNewDrawable + driCreateNewDrawable, + driCreateNewContextForAPI }; diff --git a/src/mesa/drivers/dri/common/spantmp2.h b/src/mesa/drivers/dri/common/spantmp2.h index abd79562f9..f436d1398c 100644 --- a/src/mesa/drivers/dri/common/spantmp2.h +++ b/src/mesa/drivers/dri/common/spantmp2.h @@ -48,6 +48,15 @@ #define HW_WRITE_CLIPLOOP() HW_CLIPLOOP() #endif +#ifdef SPANTMP_MESA_FMT +#define SPANTMP_PIXEL_FMT GL_NONE +#define SPANTMP_PIXEL_TYPE GL_NONE +#endif + +#ifndef SPANTMP_MESA_FMT +#define SPANTMP_MESA_FMT MESA_FORMAT_COUNT +#endif + #if (SPANTMP_PIXEL_FMT == GL_RGB) && (SPANTMP_PIXEL_TYPE == GL_UNSIGNED_SHORT_5_6_5) /** @@ -445,6 +454,118 @@ rgba[3] = p; \ } while (0) +#elif (SPANTMP_MESA_FMT == MESA_FORMAT_R8) + +#ifndef GET_VALUE +#ifndef GET_PTR +#define GET_PTR(_x, _y) ( buf + (_x) + (_y) * pitch) +#endif + +#define GET_VALUE(_x, _y) *(volatile GLubyte *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLubyte *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + +# define INIT_MONO_PIXEL(p, color) \ + p = color[0] + +# define WRITE_RGBA(_x, _y, r, g, b, a) \ + PUT_VALUE(_x, _y, r) + +#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p) + +#define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLubyte p = GET_VALUE(_x, _y); \ + rgba[0] = p; \ + rgba[1] = 0; \ + rgba[2] = 0; \ + rgba[3] = 0; \ + } while (0) + +#elif (SPANTMP_MESA_FMT == MESA_FORMAT_RG88) + +#ifndef GET_VALUE +#ifndef GET_PTR +#define GET_PTR(_x, _y) ( buf + (_x) * 2 + (_y) * pitch) +#endif + +#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + +# define INIT_MONO_PIXEL(p, color) \ + PACK_COLOR_8888(color[0], color[1], 0, 0) + +# define WRITE_RGBA(_x, _y, r, g, b, a) \ + PUT_VALUE(_x, _y, r) + +#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p) + +#define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLushort p = GET_VALUE(_x, _y); \ + rgba[0] = p & 0xff; \ + rgba[1] = (p >> 8) & 0xff; \ + rgba[2] = 0; \ + rgba[3] = 0; \ + } while (0) + +#elif (SPANTMP_MESA_FMT == MESA_FORMAT_R16) + +#ifndef GET_VALUE +#ifndef GET_PTR +#define GET_PTR(_x, _y) ( buf + (_x) * 2 + (_y) * pitch) +#endif + +#define GET_VALUE(_x, _y) *(volatile GLushort *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLushort *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + +# define INIT_MONO_PIXEL(p, color) \ + p = color[0] + +# define WRITE_RGBA(_x, _y, r, g, b, a) \ + PUT_VALUE(_x, _y, r) + +#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p) + +#define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLushort p = GET_VALUE(_x, _y); \ + rgba[0] = p; \ + rgba[1] = 0; \ + rgba[2] = 0; \ + rgba[3] = 0; \ + } while (0) + +#elif (SPANTMP_MESA_FMT == MESA_FORMAT_RG1616) + +#ifndef GET_VALUE +#ifndef GET_PTR +#define GET_PTR(_x, _y) ( buf + (_x) * 4 + (_y) * pitch) +#endif + +#define GET_VALUE(_x, _y) *(volatile GLuint *)(GET_PTR(_x, _y)) +#define PUT_VALUE(_x, _y, _v) *(volatile GLuint *)(GET_PTR(_x, _y)) = (_v) +#endif /* GET_VALUE */ + +# define INIT_MONO_PIXEL(p, color) \ + ((color[1] << 16) | (color[0])) + +# define WRITE_RGBA(_x, _y, r, g, b, a) \ + PUT_VALUE(_x, _y, r) + +#define WRITE_PIXEL(_x, _y, p) PUT_VALUE(_x, _y, p) + +#define READ_RGBA( rgba, _x, _y ) \ + do { \ + GLuint p = GET_VALUE(_x, _y); \ + rgba[0] = p & 0xffff; \ + rgba[1] = (p >> 16) & 0xffff; \ + rgba[2] = 0; \ + rgba[3] = 0; \ + } while (0) + #else #error SPANTMP_PIXEL_FMT must be set to a valid value! #endif @@ -914,3 +1035,4 @@ static void TAG(InitPointers)(struct gl_renderbuffer *rb) #undef GET_PTR #undef SPANTMP_PIXEL_FMT #undef SPANTMP_PIXEL_TYPE +#undef SPANTMP_MESA_FMT diff --git a/src/mesa/drivers/dri/common/utils.c b/src/mesa/drivers/dri/common/utils.c index 42be77fd7c..083edfaa9b 100644 --- a/src/mesa/drivers/dri/common/utils.c +++ b/src/mesa/drivers/dri/common/utils.c @@ -648,6 +648,8 @@ driCreateConfigs(GLenum fb_format, GLenum fb_type, __DRI_ATTRIB_TEXTURE_1D_BIT | __DRI_ATTRIB_TEXTURE_2D_BIT | __DRI_ATTRIB_TEXTURE_RECTANGLE_BIT; + + modes->sRGBCapable = GL_FALSE; } } } @@ -727,6 +729,7 @@ static const struct { unsigned int attrib, offset; } attribMap[] = { __ATTRIB(__DRI_ATTRIB_BIND_TO_MIPMAP_TEXTURE, bindToMipmapTexture), __ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_TARGETS, bindToTextureTargets), __ATTRIB(__DRI_ATTRIB_YINVERTED, yInverted), + __ATTRIB(__DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE, sRGBCapable), /* The struct field doesn't matter here, these are handled by the * switch in driGetConfigAttribIndex. We need them in the array diff --git a/src/mesa/drivers/dri/i810/i810context.c b/src/mesa/drivers/dri/i810/i810context.c index dc58e91e8c..90dbb6bbe8 100644 --- a/src/mesa/drivers/dri/i810/i810context.c +++ b/src/mesa/drivers/dri/i810/i810context.c @@ -204,7 +204,7 @@ i810CreateContext( gl_api api, shareCtx = ((i810ContextPtr) sharedContextPrivate)->glCtx; else shareCtx = NULL; - imesa->glCtx = _mesa_create_context(mesaVis, shareCtx, + imesa->glCtx = _mesa_create_context(API_OPENGL, mesaVis, shareCtx, &functions, (void*) imesa); if (!imesa->glCtx) { FREE(imesa); diff --git a/src/mesa/drivers/dri/i810/i810state.c b/src/mesa/drivers/dri/i810/i810state.c index 7c3fbb1424..6040abf7fa 100644 --- a/src/mesa/drivers/dri/i810/i810state.c +++ b/src/mesa/drivers/dri/i810/i810state.c @@ -95,7 +95,7 @@ static void i810BlendFuncSeparate( struct gl_context *ctx, GLenum sfactorRGB, GLuint a = SDM_UPDATE_SRC_BLEND | SDM_UPDATE_DST_BLEND; GLboolean fallback = GL_FALSE; - switch (ctx->Color.BlendSrcRGB) { + switch (ctx->Color.Blend[0].SrcRGB) { case GL_ZERO: a |= SDM_SRC_ZERO; break; case GL_ONE: a |= SDM_SRC_ONE; break; case GL_SRC_COLOR: a |= SDM_SRC_SRC_COLOR; break; @@ -124,7 +124,7 @@ static void i810BlendFuncSeparate( struct gl_context *ctx, GLenum sfactorRGB, return; } - switch (ctx->Color.BlendDstRGB) { + switch (ctx->Color.Blend[0].DstRGB) { case GL_ZERO: a |= SDM_DST_ZERO; break; case GL_ONE: a |= SDM_DST_ONE; break; case GL_SRC_COLOR: a |= SDM_DST_SRC_COLOR; break; diff --git a/src/mesa/drivers/dri/i915/i830_reg.h b/src/mesa/drivers/dri/i915/i830_reg.h index ae1317029a..99ee1bb4e9 100644 --- a/src/mesa/drivers/dri/i915/i830_reg.h +++ b/src/mesa/drivers/dri/i915/i830_reg.h @@ -585,6 +585,8 @@ #define TM0S2_VERITCAL_LINE_STRIDE_OFF (1<<12) #define TM0S2_OUTPUT_CHAN_SHIFT 10 #define TM0S2_OUTPUT_CHAN_MASK (3<<10) +#define TM0S2_BASE_MIP_LEVEL_SHIFT 1 +#define TM0S2_LOD_PRECLAMP (1 << 0) #define TM0S3_MIP_FILTER_MASK (0x3<<30) #define TM0S3_MIP_FILTER_SHIFT 30 @@ -605,6 +607,8 @@ #define TM0S3_MAX_MIP_MASK (0xff<<9) #define TM0S3_MIN_MIP_SHIFT 3 #define TM0S3_MIN_MIP_MASK (0x3f<<3) +#define TM0S3_MIN_MIP_SHIFT_830 5 +#define TM0S3_MIN_MIP_MASK_830 (0x3f<<5) #define TM0S3_KILL_PIXEL (1<<2) #define TM0S3_KEYED_FILTER (1<<1) #define TM0S3_CHROMA_KEY (1<<0) diff --git a/src/mesa/drivers/dri/i915/i830_state.c b/src/mesa/drivers/dri/i915/i830_state.c index 147192adc7..9fecab10db 100644 --- a/src/mesa/drivers/dri/i915/i830_state.c +++ b/src/mesa/drivers/dri/i915/i830_state.c @@ -31,6 +31,7 @@ #include "main/macros.h" #include "main/enums.h" #include "main/dd.h" +#include "main/state.h" #include "texmem.h" @@ -234,7 +235,7 @@ i830EvalLogicOpBlendState(struct gl_context * ctx) I830_STATECHANGE(i830, I830_UPLOAD_CTX); - if (RGBA_LOGICOP_ENABLED(ctx)) { + if (_mesa_rgba_logicop_enabled(ctx)) { i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~(ENABLE_COLOR_BLEND | ENABLE_LOGIC_OP_MASK); i830->state.Ctx[I830_CTXREG_ENABLES_1] |= (DISABLE_COLOR_BLEND | @@ -291,10 +292,10 @@ i830_set_blend_state(struct gl_context * ctx) funcRGB = - SRC_BLND_FACT(intel_translate_blend_factor(ctx->Color.BlendSrcRGB)) - | DST_BLND_FACT(intel_translate_blend_factor(ctx->Color.BlendDstRGB)); + SRC_BLND_FACT(intel_translate_blend_factor(ctx->Color.Blend[0].SrcRGB)) + | DST_BLND_FACT(intel_translate_blend_factor(ctx->Color.Blend[0].DstRGB)); - switch (ctx->Color.BlendEquationRGB) { + switch (ctx->Color.Blend[0].EquationRGB) { case GL_FUNC_ADD: eqnRGB = BLENDFUNC_ADD; break; @@ -314,15 +315,15 @@ i830_set_blend_state(struct gl_context * ctx) break; default: fprintf(stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB); + __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationRGB); return; } - funcA = SRC_ABLEND_FACT(intel_translate_blend_factor(ctx->Color.BlendSrcA)) - | DST_ABLEND_FACT(intel_translate_blend_factor(ctx->Color.BlendDstA)); + funcA = SRC_ABLEND_FACT(intel_translate_blend_factor(ctx->Color.Blend[0].SrcA)) + | DST_ABLEND_FACT(intel_translate_blend_factor(ctx->Color.Blend[0].DstA)); - switch (ctx->Color.BlendEquationA) { + switch (ctx->Color.Blend[0].EquationA) { case GL_FUNC_ADD: eqnA = BLENDFUNC_ADD; break; @@ -342,7 +343,7 @@ i830_set_blend_state(struct gl_context * ctx) break; default: fprintf(stderr, "[%s:%u] Invalid alpha blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.BlendEquationA); + __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationA); return; } @@ -679,7 +680,7 @@ update_specular(struct gl_context * ctx) I830_STATECHANGE(i830, I830_UPLOAD_CTX); i830->state.Ctx[I830_CTXREG_ENABLES_1] &= ~ENABLE_SPEC_ADD_MASK; - if (NEED_SECONDARY_COLOR(ctx)) + if (_mesa_need_secondary_color(ctx)) i830->state.Ctx[I830_CTXREG_ENABLES_1] |= ENABLE_SPEC_ADD; else i830->state.Ctx[I830_CTXREG_ENABLES_1] |= DISABLE_SPEC_ADD; diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index b3bb8837cc..c35b4b5ed0 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -28,13 +28,14 @@ #include "main/mtypes.h" #include "main/enums.h" #include "main/colormac.h" +#include "main/macros.h" #include "intel_mipmap_tree.h" #include "intel_tex.h" #include "i830_context.h" #include "i830_reg.h" - +#include "intel_chipset.h" static GLuint @@ -139,9 +140,9 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) /* Get first image here, since intelObj->firstLevel will get set in * the intel_finalize_mipmap_tree() call above. */ - firstImage = tObj->Image[0][intelObj->firstLevel]; + firstImage = tObj->Image[0][tObj->BaseLevel]; - intel_miptree_get_image_offset(intelObj->mt, intelObj->firstLevel, 0, 0, + intel_miptree_get_image_offset(intelObj->mt, tObj->BaseLevel, 0, 0, &dst_x, &dst_y); drm_intel_bo_reference(intelObj->mt->region->buffer); @@ -189,6 +190,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) { GLuint minFilt, mipFilt, magFilt; + float maxlod; + uint32_t minlod_fixed, maxlod_fixed; switch (tObj->MinFilter) { case GL_NEAREST: @@ -252,10 +255,24 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) state[I830_TEXREG_TM0S3] |= SS2_COLORSPACE_CONVERSION; #endif - state[I830_TEXREG_TM0S3] |= ((intelObj->lastLevel - - intelObj->firstLevel) * - 4) << TM0S3_MIN_MIP_SHIFT; - + /* We get one field with fraction bits for the maximum + * addressable (smallest resolution) LOD. Use it to cover both + * MAX_LEVEL and MAX_LOD. + */ + minlod_fixed = U_FIXED(CLAMP(tObj->MinLod, 0.0, 11), 4); + maxlod = MIN2(tObj->MaxLod, tObj->_MaxLevel - tObj->BaseLevel); + if (intel->intelScreen->deviceID == PCI_CHIP_I855_GM || + intel->intelScreen->deviceID == PCI_CHIP_I865_G) { + maxlod_fixed = U_FIXED(CLAMP(maxlod, 0.0, 11.75), 2); + maxlod_fixed = MAX2(maxlod_fixed, (minlod_fixed + 3) >> 2); + state[I830_TEXREG_TM0S3] |= maxlod_fixed << TM0S3_MIN_MIP_SHIFT; + state[I830_TEXREG_TM0S2] |= TM0S2_LOD_PRECLAMP; + } else { + maxlod_fixed = U_FIXED(CLAMP(maxlod, 0.0, 11), 0); + maxlod_fixed = MAX2(maxlod_fixed, (minlod_fixed + 15) >> 4); + state[I830_TEXREG_TM0S3] |= maxlod_fixed << TM0S3_MIN_MIP_SHIFT_830; + } + state[I830_TEXREG_TM0S3] |= minlod_fixed << TM0S3_MAX_MIP_SHIFT; state[I830_TEXREG_TM0S3] |= ((minFilt << TM0S3_MIN_FILTER_SHIFT) | (mipFilt << TM0S3_MIP_FILTER_SHIFT) | (magFilt << TM0S3_MAG_FILTER_SHIFT)); diff --git a/src/mesa/drivers/dri/i915/i830_vtbl.c b/src/mesa/drivers/dri/i915/i830_vtbl.c index f7fdb78d05..19f0807759 100644 --- a/src/mesa/drivers/dri/i915/i830_vtbl.c +++ b/src/mesa/drivers/dri/i915/i830_vtbl.c @@ -364,7 +364,7 @@ i830_emit_invarient_state(struct intel_context *intel) #define emit( intel, state, size ) \ - intel_batchbuffer_data(intel->batch, state, size ) + intel_batchbuffer_data(intel, state, size, false) static GLuint get_dirty(struct i830_hw_state *state) @@ -428,14 +428,15 @@ i830_emit_state(struct intel_context *intel) * scheduling is allowed, rather than assume that it is whenever a * batchbuffer fills up. */ - intel_batchbuffer_require_space(intel->batch, - get_state_size(state) + INTEL_PRIM_EMIT_SIZE); + intel_batchbuffer_require_space(intel, + get_state_size(state) + INTEL_PRIM_EMIT_SIZE, + false); count = 0; again: aper_count = 0; dirty = get_dirty(state); - aper_array[aper_count++] = intel->batch->buf; + aper_array[aper_count++] = intel->batch.bo; if (dirty & I830_UPLOAD_BUFFERS) { aper_array[aper_count++] = state->draw_region->buffer; if (state->depth_region) @@ -452,7 +453,7 @@ i830_emit_state(struct intel_context *intel) if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) { if (count == 0) { count++; - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); goto again; } else { _mesa_error(ctx, GL_OUT_OF_MEMORY, "i830 emit state"); @@ -534,14 +535,9 @@ i830_emit_state(struct intel_context *intel) BEGIN_BATCH(I830_TEX_SETUP_SIZE + 1); OUT_BATCH(state->Tex[i][I830_TEXREG_TM0LI]); - if (state->tex_buffer[i]) { - OUT_RELOC(state->tex_buffer[i], - I915_GEM_DOMAIN_SAMPLER, 0, - state->tex_offset[i]); - } - else { - OUT_BATCH(state->tex_offset[i]); - } + OUT_RELOC(state->tex_buffer[i], + I915_GEM_DOMAIN_SAMPLER, 0, + state->tex_offset[i]); OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S1]); OUT_BATCH(state->Tex[i][I830_TEXREG_TM0S2]); @@ -560,9 +556,7 @@ i830_emit_state(struct intel_context *intel) } } - intel->batch->dirty_state &= ~dirty; assert(get_dirty(state) == 0); - assert((intel->batch->dirty_state & (1<<1)) == 0); } static void @@ -584,6 +578,27 @@ i830_destroy_context(struct intel_context *intel) _tnl_free_vertices(&intel->ctx); } +static uint32_t i830_render_target_format_for_mesa_format[MESA_FORMAT_COUNT] = +{ + [MESA_FORMAT_ARGB8888] = DV_PF_8888, + [MESA_FORMAT_XRGB8888] = DV_PF_8888, + [MESA_FORMAT_RGB565] = DV_PF_565, + [MESA_FORMAT_ARGB1555] = DV_PF_1555, + [MESA_FORMAT_ARGB4444] = DV_PF_4444, +}; + +static bool +i830_render_target_supported(gl_format format) +{ + if (format == MESA_FORMAT_S8_Z24 || + format == MESA_FORMAT_X8_Z24 || + format == MESA_FORMAT_Z16) { + return true; + } + + return i830_render_target_format_for_mesa_format[format] != 0; +} + static void i830_set_draw_region(struct intel_context *intel, struct intel_region *color_regions[], @@ -623,24 +638,7 @@ i830_set_draw_region(struct intel_context *intel, DSTORG_VERT_BIAS(0x8) | DEPTH_IS_Z); /* .5 */ if (irb != NULL) { - switch (irb->Base.Format) { - case MESA_FORMAT_ARGB8888: - case MESA_FORMAT_XRGB8888: - value |= DV_PF_8888; - break; - case MESA_FORMAT_RGB565: - value |= DV_PF_565; - break; - case MESA_FORMAT_ARGB1555: - value |= DV_PF_1555; - break; - case MESA_FORMAT_ARGB4444: - value |= DV_PF_4444; - break; - default: - _mesa_problem(ctx, "Bad renderbuffer format: %d\n", - irb->Base.Format); - } + value |= i830_render_target_format_for_mesa_format[irb->Base.Format]; } if (depth_region && depth_region->cpp == 4) { @@ -728,4 +726,5 @@ i830InitVtbl(struct i830_context *i830) i830->intel.vtbl.assert_not_dirty = i830_assert_not_dirty; i830->intel.vtbl.finish_batch = intel_finish_vb; i830->intel.vtbl.invalidate_state = i830_invalidate_state; + i830->intel.vtbl.render_target_supported = i830_render_target_supported; } diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c index f32f3cf602..7389a1d57a 100644 --- a/src/mesa/drivers/dri/i915/i915_context.c +++ b/src/mesa/drivers/dri/i915/i915_context.c @@ -168,15 +168,37 @@ i915CreateContext(int api, MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, ctx->Const.FragmentProgram.MaxEnvParams); + /* i915 stores all values in single-precision floats. Values aren't set + * for other program targets because software is used for those targets. + */ + ctx->Const.FragmentProgram.MediumFloat.RangeMin = 127; + ctx->Const.FragmentProgram.MediumFloat.RangeMax = 127; + ctx->Const.FragmentProgram.MediumFloat.Precision = 23; + ctx->Const.FragmentProgram.LowFloat = ctx->Const.FragmentProgram.HighFloat = + ctx->Const.FragmentProgram.MediumFloat; + ctx->Const.FragmentProgram.MediumInt.RangeMin = 24; + ctx->Const.FragmentProgram.MediumInt.RangeMax = 24; + ctx->Const.FragmentProgram.MediumInt.Precision = 0; + ctx->Const.FragmentProgram.LowInt = ctx->Const.FragmentProgram.HighInt = + ctx->Const.FragmentProgram.MediumInt; + ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE; /* FINISHME: Are there other options that should be enabled for software * FINISHME: vertex shaders? */ ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].EmitCondCodes = GL_TRUE; - ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoIfs = GL_TRUE; - ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoNoise = GL_TRUE; - ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT].EmitNoPow = GL_TRUE; + + struct gl_shader_compiler_options *const fs_options = + & ctx->ShaderCompilerOptions[MESA_SHADER_FRAGMENT]; + fs_options->EmitNoIfs = GL_TRUE; + fs_options->EmitNoNoise = GL_TRUE; + fs_options->EmitNoPow = GL_TRUE; + fs_options->EmitNoMainReturn = GL_TRUE; + fs_options->EmitNoIndirectInput = GL_TRUE; + fs_options->EmitNoIndirectOutput = GL_TRUE; + fs_options->EmitNoIndirectUniform = GL_TRUE; + fs_options->EmitNoIndirectTemp = GL_TRUE; ctx->Const.MaxDrawBuffers = 1; diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h index 2c80ded075..601620275f 100644 --- a/src/mesa/drivers/dri/i915/i915_context.h +++ b/src/mesa/drivers/dri/i915/i915_context.h @@ -29,7 +29,6 @@ #define I915CONTEXT_INC #include "intel_context.h" -#include "i915_reg.h" #define I915_FALLBACK_TEXTURE 0x1000 #define I915_FALLBACK_COLORMASK 0x2000 @@ -51,6 +50,7 @@ #define I915_UPLOAD_INVARIENT 0x40 #define I915_UPLOAD_DEFAULTS 0x80 #define I915_UPLOAD_RASTER_RULES 0x100 +#define I915_UPLOAD_BLEND 0x200 #define I915_UPLOAD_TEX(i) (0x00010000<<(i)) #define I915_UPLOAD_TEX_ALL (0x00ff0000) #define I915_UPLOAD_TEX_0_SHIFT 16 @@ -77,17 +77,19 @@ #define I915_DEST_SETUP_SIZE 18 #define I915_CTXREG_STATE4 0 -#define I915_CTXREG_LI 1 -#define I915_CTXREG_LIS2 2 -#define I915_CTXREG_LIS4 3 -#define I915_CTXREG_LIS5 4 -#define I915_CTXREG_LIS6 5 -#define I915_CTXREG_IAB 6 -#define I915_CTXREG_BLENDCOLOR0 7 -#define I915_CTXREG_BLENDCOLOR1 8 -#define I915_CTXREG_BF_STENCIL_OPS 9 -#define I915_CTXREG_BF_STENCIL_MASKS 10 -#define I915_CTX_SETUP_SIZE 11 +#define I915_CTXREG_LI 1 +#define I915_CTXREG_LIS2 2 +#define I915_CTXREG_LIS4 3 +#define I915_CTXREG_LIS5 4 +#define I915_CTXREG_LIS6 5 +#define I915_CTXREG_BF_STENCIL_OPS 6 +#define I915_CTXREG_BF_STENCIL_MASKS 7 +#define I915_CTX_SETUP_SIZE 8 + +#define I915_BLENDREG_IAB 0 +#define I915_BLENDREG_BLENDCOLOR0 1 +#define I915_BLENDREG_BLENDCOLOR1 2 +#define I915_BLEND_SETUP_SIZE 3 #define I915_FOGREG_COLOR 0 #define I915_FOGREG_MODE0 1 @@ -123,6 +125,12 @@ enum { #define I915_MAX_CONSTANT 32 #define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT)) +#define I915_MAX_TEX_INDIRECT 4 +#define I915_MAX_TEX_INSN 32 +#define I915_MAX_ALU_INSN 64 +#define I915_MAX_DECL_INSN 27 +#define I915_MAX_TEMPORARY 16 + #define I915_MAX_INSN (I915_MAX_DECL_INSN + \ I915_MAX_TEX_INSN + \ I915_MAX_ALU_INSN) @@ -216,6 +224,7 @@ struct i915_fragment_program struct i915_hw_state { GLuint Ctx[I915_CTX_SETUP_SIZE]; + GLuint Blend[I915_BLEND_SETUP_SIZE]; GLuint Buffer[I915_DEST_SETUP_SIZE]; GLuint Stipple[I915_STP_SETUP_SIZE]; GLuint Fog[I915_FOG_SETUP_SIZE]; @@ -260,8 +269,12 @@ struct i915_context struct i915_fragment_program *current_program; + drm_intel_bo *current_vb_bo; + unsigned int current_vertex_size; + struct i915_hw_state state; uint32_t last_draw_offset; + GLuint last_sampler; }; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 7a9fb7f088..2bfe665cb6 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -269,7 +269,7 @@ translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit) #define EMIT_TEX( OP ) \ do { \ GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget ); \ - const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current; \ + const struct gl_fragment_program *program = &p->FragProg; \ GLuint unit = program->Base.SamplerUnits[inst->TexSrcUnit]; \ GLuint sampler = i915_emit_decl(p, REG_TYPE_S, \ unit, dim); \ @@ -304,8 +304,9 @@ do { \ */ static void calc_live_regs( struct i915_fragment_program *p ) { - const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current; + const struct gl_fragment_program *program = &p->FragProg; GLuint regsUsed = 0xffff0000; + uint8_t live_components[16] = { 0, }; GLint i; for (i = program->Base.NumInstructions - 1; i >= 0; i--) { @@ -314,13 +315,26 @@ static void calc_live_regs( struct i915_fragment_program *p ) int a; /* Register is written to: unmark as live for this and preceeding ops */ - if (inst->DstReg.File == PROGRAM_TEMPORARY) - regsUsed &= ~(1 << inst->DstReg.Index); + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask; + if (live_components[inst->DstReg.Index] == 0) + regsUsed &= ~(1 << inst->DstReg.Index); + } for (a = 0; a < opArgs; a++) { /* Register is read from: mark as live for this and preceeding ops */ - if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) + if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) { + unsigned c; + regsUsed |= 1 << inst->SrcReg[a].Index; + + for (c = 0; c < 4; c++) { + const unsigned field = GET_SWZ(inst->SrcReg[a].Swizzle, c); + + if (field <= SWIZZLE_W) + live_components[inst->SrcReg[a].Index] |= (1U << field); + } + } } p->usedRegs[i] = regsUsed; @@ -330,7 +344,7 @@ static void calc_live_regs( struct i915_fragment_program *p ) static GLuint get_live_regs( struct i915_fragment_program *p, const struct prog_instruction *inst ) { - const struct gl_fragment_program *program = p->ctx->FragmentProgram._Current; + const struct gl_fragment_program *program = &p->FragProg; GLuint nr = inst - program->Base.Instructions; return p->usedRegs[nr]; @@ -351,8 +365,7 @@ static GLuint get_live_regs( struct i915_fragment_program *p, static void upload_program(struct i915_fragment_program *p) { - const struct gl_fragment_program *program = - p->ctx->FragmentProgram._Current; + const struct gl_fragment_program *program = &p->FragProg; const struct prog_instruction *inst = program->Base.Instructions; if (INTEL_DEBUG & DEBUG_WM) @@ -795,18 +808,18 @@ upload_program(struct i915_fragment_program *p) flags = get_result_flags(inst); dst = get_result_vector(p, inst); - /* dst = src1 >= src2 */ + /* tmp = src1 >= src2 */ i915_emit_arith(p, A0_SGE, - dst, + tmp, flags, 0, src_vector(p, &inst->SrcReg[0], program), src_vector(p, &inst->SrcReg[1], program), 0); - /* tmp = src1 <= src2 */ + /* dst = src1 <= src2 */ i915_emit_arith(p, A0_SGE, - tmp, + dst, flags, 0, negate(src_vector(p, &inst->SrcReg[0], program), 1, 1, 1, 1), @@ -944,18 +957,18 @@ upload_program(struct i915_fragment_program *p) flags = get_result_flags(inst); dst = get_result_vector(p, inst); - /* dst = src1 < src2 */ + /* tmp = src1 < src2 */ i915_emit_arith(p, A0_SLT, - dst, + tmp, flags, 0, src_vector(p, &inst->SrcReg[0], program), src_vector(p, &inst->SrcReg[1], program), 0); - /* tmp = src1 > src2 */ + /* dst = src1 > src2 */ i915_emit_arith(p, A0_SLT, - tmp, + dst, flags, 0, negate(src_vector(p, &inst->SrcReg[0], program), 1, 1, 1, 1), @@ -1152,7 +1165,7 @@ translate_program(struct i915_fragment_program *p) if (INTEL_DEBUG & DEBUG_WM) { printf("fp:\n"); - _mesa_print_program(&p->ctx->FragmentProgram._Current->Base); + _mesa_print_program(&p->FragProg.Base); printf("\n"); } @@ -1162,11 +1175,6 @@ translate_program(struct i915_fragment_program *p) fixup_depth_write(p); i915_fini_program(p); - if (INTEL_DEBUG & DEBUG_WM) { - printf("i915:\n"); - i915_disassemble_program(i915->state.Program, i915->state.ProgramSize); - } - p->translated = 1; } @@ -1413,6 +1421,10 @@ i915ValidateFragmentProgram(struct i915_context *i915) intel->vertex_attr_count, intel->ViewportMatrix.m, 0); + assert(intel->prim.current_offset == intel->prim.start_offset); + intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size; + intel->prim.current_offset = intel->prim.start_offset; + intel->vertex_size >>= 2; i915->state.Ctx[I915_CTXREG_LIS2] = s2; @@ -1427,6 +1439,11 @@ i915ValidateFragmentProgram(struct i915_context *i915) if (!p->on_hardware) i915_upload_program(i915, p); + + if (INTEL_DEBUG & DEBUG_WM) { + printf("i915:\n"); + i915_disassemble_program(i915->state.Program, i915->state.ProgramSize); + } } void diff --git a/src/mesa/drivers/dri/i915/i915_program.c b/src/mesa/drivers/dri/i915/i915_program.c index ca1949b223..507adf1d3d 100644 --- a/src/mesa/drivers/dri/i915/i915_program.c +++ b/src/mesa/drivers/dri/i915/i915_program.c @@ -538,6 +538,7 @@ i915_upload_program(struct i915_context *i915, { GLuint program_size = p->csr - p->program; GLuint decl_size = p->decl - p->declarations; + GLuint nr; if (p->error) return; @@ -554,32 +555,32 @@ i915_upload_program(struct i915_context *i915, i915->state.ProgramSize = decl_size + program_size; } - /* Always seemed to get a failure if I used memcmp() to - * shortcircuit this state upload. Needs further investigation? - */ - if (p->nr_constants) { - GLuint nr = p->nr_constants; - - I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 1); - I915_STATECHANGE(i915, I915_UPLOAD_CONSTANTS); - - i915->state.Constant[0] = _3DSTATE_PIXEL_SHADER_CONSTANTS | ((nr) * 4); - i915->state.Constant[1] = (1 << (nr - 1)) | ((1 << (nr - 1)) - 1); - - memcpy(&i915->state.Constant[2], p->constant, 4 * sizeof(int) * (nr)); - i915->state.ConstantSize = 2 + (nr) * 4; - - if (0) { - GLuint i; - for (i = 0; i < nr; i++) { - fprintf(stderr, "const[%d]: %f %f %f %f\n", i, - p->constant[i][0], - p->constant[i][1], p->constant[i][2], p->constant[i][3]); - } + nr = p->nr_constants; + if (i915->state.ConstantSize != 2 + nr*4 || + memcmp(i915->state.Constant + 2, + p->constant, 4*sizeof(int)*nr)) { + if (nr) { + I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 1); + I915_STATECHANGE(i915, I915_UPLOAD_CONSTANTS); + + i915->state.Constant[0] = _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4); + i915->state.Constant[1] = (1 << nr) -1; + + memcpy(&i915->state.Constant[2], p->constant, 4 * sizeof(int) * nr); + i915->state.ConstantSize = 2 + nr * 4; + + if (0) { + GLuint i; + for (i = 0; i < nr; i++) { + fprintf(stderr, "const[%d]: %f %f %f %f\n", i, + p->constant[i][0], + p->constant[i][1], p->constant[i][2], p->constant[i][3]); + } + } + } + else { + I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 0); } - } - else { - I915_ACTIVESTATE(i915, I915_UPLOAD_CONSTANTS, 0); } p->on_hardware = 1; diff --git a/src/mesa/drivers/dri/i915/i915_reg.h b/src/mesa/drivers/dri/i915/i915_reg.h index 7f31ff674f..766547a4c6 100644 --- a/src/mesa/drivers/dri/i915/i915_reg.h +++ b/src/mesa/drivers/dri/i915/i915_reg.h @@ -361,13 +361,6 @@ /* p222 */ -#define I915_MAX_TEX_INDIRECT 4 -#define I915_MAX_TEX_INSN 32 -#define I915_MAX_ALU_INSN 64 -#define I915_MAX_DECL_INSN 27 -#define I915_MAX_TEMPORARY 16 - - /* Each instruction is 3 dwords long, though most don't require all * this space. Maximum of 123 instructions. Smaller maxes per insn * type. diff --git a/src/mesa/drivers/dri/i915/i915_state.c b/src/mesa/drivers/dri/i915/i915_state.c index 9508fbaf94..3b1af4c455 100644 --- a/src/mesa/drivers/dri/i915/i915_state.c +++ b/src/mesa/drivers/dri/i915/i915_state.c @@ -31,6 +31,7 @@ #include "main/macros.h" #include "main/enums.h" #include "main/dd.h" +#include "main/state.h" #include "tnl/tnl.h" #include "tnl/t_context.h" @@ -56,8 +57,7 @@ i915_update_stencil(struct gl_context * ctx) GLenum front_func, front_fail, front_pass_z_fail, front_pass_z_pass; GLuint back_ref, back_writemask, back_mask; GLenum back_func, back_fail, back_pass_z_fail, back_pass_z_pass; - - I915_STATECHANGE(i915, I915_UPLOAD_CTX); + GLuint dirty = 0; /* The 915 considers CW to be "front" for two-sided stencil, so choose * appropriately. @@ -94,56 +94,68 @@ i915_update_stencil(struct gl_context * ctx) back_pass_z_fail = ctx->Stencil.ZFailFunc[0]; back_pass_z_pass = ctx->Stencil.ZPassFunc[0]; } +#define set_ctx_bits(reg, mask, set) do{ \ + GLuint dw = i915->state.Ctx[reg]; \ + dw &= ~(mask); \ + dw |= (set); \ + dirty |= dw != i915->state.Ctx[reg]; \ + i915->state.Ctx[reg] = dw; \ +} while(0) /* Set front state. */ - i915->state.Ctx[I915_CTXREG_STATE4] &= ~(MODE4_ENABLE_STENCIL_TEST_MASK | - MODE4_ENABLE_STENCIL_WRITE_MASK); - i915->state.Ctx[I915_CTXREG_STATE4] |= (ENABLE_STENCIL_TEST_MASK | - ENABLE_STENCIL_WRITE_MASK | - STENCIL_TEST_MASK(front_mask) | - STENCIL_WRITE_MASK(front_writemask)); - - i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_REF_MASK | - S5_STENCIL_TEST_FUNC_MASK | - S5_STENCIL_FAIL_MASK | - S5_STENCIL_PASS_Z_FAIL_MASK | - S5_STENCIL_PASS_Z_PASS_MASK); - - i915->state.Ctx[I915_CTXREG_LIS5] |= - (front_ref << S5_STENCIL_REF_SHIFT) | - (intel_translate_compare_func(front_func) << S5_STENCIL_TEST_FUNC_SHIFT) | - (intel_translate_stencil_op(front_fail) << S5_STENCIL_FAIL_SHIFT) | - (intel_translate_stencil_op(front_pass_z_fail) << - S5_STENCIL_PASS_Z_FAIL_SHIFT) | - (intel_translate_stencil_op(front_pass_z_pass) << - S5_STENCIL_PASS_Z_PASS_SHIFT); + set_ctx_bits(I915_CTXREG_STATE4, + MODE4_ENABLE_STENCIL_TEST_MASK | + MODE4_ENABLE_STENCIL_WRITE_MASK, + ENABLE_STENCIL_TEST_MASK | + ENABLE_STENCIL_WRITE_MASK | + STENCIL_TEST_MASK(front_mask) | + STENCIL_WRITE_MASK(front_writemask)); + + set_ctx_bits(I915_CTXREG_LIS5, + S5_STENCIL_REF_MASK | + S5_STENCIL_TEST_FUNC_MASK | + S5_STENCIL_FAIL_MASK | + S5_STENCIL_PASS_Z_FAIL_MASK | + S5_STENCIL_PASS_Z_PASS_MASK, + (front_ref << S5_STENCIL_REF_SHIFT) | + (intel_translate_compare_func(front_func) << S5_STENCIL_TEST_FUNC_SHIFT) | + (intel_translate_stencil_op(front_fail) << S5_STENCIL_FAIL_SHIFT) | + (intel_translate_stencil_op(front_pass_z_fail) << + S5_STENCIL_PASS_Z_FAIL_SHIFT) | + (intel_translate_stencil_op(front_pass_z_pass) << + S5_STENCIL_PASS_Z_PASS_SHIFT)); /* Set back state if different from front. */ if (ctx->Stencil._TestTwoSide) { - i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] &= - ~(BFO_STENCIL_REF_MASK | - BFO_STENCIL_TEST_MASK | - BFO_STENCIL_FAIL_MASK | - BFO_STENCIL_PASS_Z_FAIL_MASK | - BFO_STENCIL_PASS_Z_PASS_MASK); - i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] |= BFO_STENCIL_TWO_SIDE | - (back_ref << BFO_STENCIL_REF_SHIFT) | - (intel_translate_compare_func(back_func) << BFO_STENCIL_TEST_SHIFT) | - (intel_translate_stencil_op(back_fail) << BFO_STENCIL_FAIL_SHIFT) | - (intel_translate_stencil_op(back_pass_z_fail) << - BFO_STENCIL_PASS_Z_FAIL_SHIFT) | - (intel_translate_stencil_op(back_pass_z_pass) << - BFO_STENCIL_PASS_Z_PASS_SHIFT); - - i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] &= - ~(BFM_STENCIL_TEST_MASK_MASK | - BFM_STENCIL_WRITE_MASK_MASK); - i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] |= - BFM_STENCIL_TEST_MASK(back_mask) | - BFM_STENCIL_WRITE_MASK(back_writemask); + set_ctx_bits(I915_CTXREG_BF_STENCIL_OPS, + BFO_STENCIL_REF_MASK | + BFO_STENCIL_TEST_MASK | + BFO_STENCIL_FAIL_MASK | + BFO_STENCIL_PASS_Z_FAIL_MASK | + BFO_STENCIL_PASS_Z_PASS_MASK, + BFO_STENCIL_TWO_SIDE | + (back_ref << BFO_STENCIL_REF_SHIFT) | + (intel_translate_compare_func(back_func) << BFO_STENCIL_TEST_SHIFT) | + (intel_translate_stencil_op(back_fail) << BFO_STENCIL_FAIL_SHIFT) | + (intel_translate_stencil_op(back_pass_z_fail) << + BFO_STENCIL_PASS_Z_FAIL_SHIFT) | + (intel_translate_stencil_op(back_pass_z_pass) << + BFO_STENCIL_PASS_Z_PASS_SHIFT)); + + set_ctx_bits(I915_CTXREG_BF_STENCIL_MASKS, + BFM_STENCIL_TEST_MASK_MASK | + BFM_STENCIL_WRITE_MASK_MASK, + BFM_STENCIL_TEST_MASK(back_mask) | + BFM_STENCIL_WRITE_MASK(back_writemask)); } else { - i915->state.Ctx[I915_CTXREG_BF_STENCIL_OPS] &= ~BFO_STENCIL_TWO_SIDE; + set_ctx_bits(I915_CTXREG_BF_STENCIL_OPS, + BFO_STENCIL_TWO_SIDE, 0); } + +#undef set_ctx_bits + + if (dirty) + I915_STATECHANGE(i915, I915_UPLOAD_CTX); } static void @@ -169,15 +181,18 @@ i915AlphaFunc(struct gl_context * ctx, GLenum func, GLfloat ref) struct i915_context *i915 = I915_CONTEXT(ctx); int test = intel_translate_compare_func(func); GLubyte refByte; + GLuint dw; UNCLAMPED_FLOAT_TO_UBYTE(refByte, ref); - I915_STATECHANGE(i915, I915_UPLOAD_CTX); - i915->state.Ctx[I915_CTXREG_LIS6] &= ~(S6_ALPHA_TEST_FUNC_MASK | - S6_ALPHA_REF_MASK); - i915->state.Ctx[I915_CTXREG_LIS6] |= ((test << S6_ALPHA_TEST_FUNC_SHIFT) | - (((GLuint) refByte) << - S6_ALPHA_REF_SHIFT)); + dw = i915->state.Ctx[I915_CTXREG_LIS6]; + dw &= ~(S6_ALPHA_TEST_FUNC_MASK | S6_ALPHA_REF_MASK); + dw |= ((test << S6_ALPHA_TEST_FUNC_SHIFT) | + (((GLuint) refByte) << S6_ALPHA_REF_SHIFT)); + if (dw != i915->state.Ctx[I915_CTXREG_LIS6]) { + i915->state.Ctx[I915_CTXREG_LIS6] = dw; + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + } } /* This function makes sure that the proper enables are @@ -190,23 +205,32 @@ static void i915EvalLogicOpBlendState(struct gl_context * ctx) { struct i915_context *i915 = I915_CONTEXT(ctx); + GLuint dw0, dw1; - I915_STATECHANGE(i915, I915_UPLOAD_CTX); + dw0 = i915->state.Ctx[I915_CTXREG_LIS5]; + dw1 = i915->state.Ctx[I915_CTXREG_LIS6]; - if (RGBA_LOGICOP_ENABLED(ctx)) { - i915->state.Ctx[I915_CTXREG_LIS5] |= S5_LOGICOP_ENABLE; - i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_CBUF_BLEND_ENABLE; + if (_mesa_rgba_logicop_enabled(ctx)) { + dw0 |= S5_LOGICOP_ENABLE; + dw1 &= ~S6_CBUF_BLEND_ENABLE; } else { - i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_LOGICOP_ENABLE; + dw0 &= ~S5_LOGICOP_ENABLE; if (ctx->Color.BlendEnabled) { - i915->state.Ctx[I915_CTXREG_LIS6] |= S6_CBUF_BLEND_ENABLE; + dw1 |= S6_CBUF_BLEND_ENABLE; } else { - i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_CBUF_BLEND_ENABLE; + dw1 &= ~S6_CBUF_BLEND_ENABLE; } } + if (dw0 != i915->state.Ctx[I915_CTXREG_LIS5] || + dw1 != i915->state.Ctx[I915_CTXREG_LIS6]) { + i915->state.Ctx[I915_CTXREG_LIS5] = dw0; + i915->state.Ctx[I915_CTXREG_LIS6] = dw1; + + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + } } static void @@ -214,6 +238,7 @@ i915BlendColor(struct gl_context * ctx, const GLfloat color[4]) { struct i915_context *i915 = I915_CONTEXT(ctx); GLubyte r, g, b, a; + GLuint dw; DBG("%s\n", __FUNCTION__); @@ -222,9 +247,11 @@ i915BlendColor(struct gl_context * ctx, const GLfloat color[4]) UNCLAMPED_FLOAT_TO_UBYTE(b, color[BCOMP]); UNCLAMPED_FLOAT_TO_UBYTE(a, color[ACOMP]); - I915_STATECHANGE(i915, I915_UPLOAD_CTX); - i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = - (a << 24) | (r << 16) | (g << 8) | b; + dw = (a << 24) | (r << 16) | (g << 8) | b; + if (dw != i915->state.Blend[I915_BLENDREG_BLENDCOLOR1]) { + i915->state.Blend[I915_BLENDREG_BLENDCOLOR1] = dw; + I915_STATECHANGE(i915, I915_UPLOAD_BLEND); + } } @@ -258,7 +285,7 @@ static void i915UpdateBlendState(struct gl_context * ctx) { struct i915_context *i915 = I915_CONTEXT(ctx); - GLuint iab = (i915->state.Ctx[I915_CTXREG_IAB] & + GLuint iab = (i915->state.Blend[I915_BLENDREG_IAB] & ~(IAB_SRC_FACTOR_MASK | IAB_DST_FACTOR_MASK | (BLENDFUNC_MASK << IAB_FUNC_SHIFT) | IAB_ENABLE)); @@ -267,12 +294,12 @@ i915UpdateBlendState(struct gl_context * ctx) ~(S6_CBUF_SRC_BLEND_FACT_MASK | S6_CBUF_DST_BLEND_FACT_MASK | S6_CBUF_BLEND_FUNC_MASK)); - GLuint eqRGB = ctx->Color.BlendEquationRGB; - GLuint eqA = ctx->Color.BlendEquationA; - GLuint srcRGB = ctx->Color.BlendSrcRGB; - GLuint dstRGB = ctx->Color.BlendDstRGB; - GLuint srcA = ctx->Color.BlendSrcA; - GLuint dstA = ctx->Color.BlendDstA; + GLuint eqRGB = ctx->Color.Blend[0].EquationRGB; + GLuint eqA = ctx->Color.Blend[0].EquationA; + GLuint srcRGB = ctx->Color.Blend[0].SrcRGB; + GLuint dstRGB = ctx->Color.Blend[0].DstRGB; + GLuint srcA = ctx->Color.Blend[0].SrcA; + GLuint dstA = ctx->Color.Blend[0].DstA; if (eqRGB == GL_MIN || eqRGB == GL_MAX) { srcRGB = dstRGB = GL_ONE; @@ -293,11 +320,13 @@ i915UpdateBlendState(struct gl_context * ctx) if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) iab |= IAB_ENABLE; - if (iab != i915->state.Ctx[I915_CTXREG_IAB] || - lis6 != i915->state.Ctx[I915_CTXREG_LIS6]) { - I915_STATECHANGE(i915, I915_UPLOAD_CTX); - i915->state.Ctx[I915_CTXREG_IAB] = iab; + if (iab != i915->state.Blend[I915_BLENDREG_IAB]) { + i915->state.Blend[I915_BLENDREG_IAB] = iab; + I915_STATECHANGE(i915, I915_UPLOAD_BLEND); + } + if (lis6 != i915->state.Ctx[I915_CTXREG_LIS6]) { i915->state.Ctx[I915_CTXREG_LIS6] = lis6; + I915_STATECHANGE(i915, I915_UPLOAD_CTX); } /* This will catch a logicop blend equation */ @@ -325,27 +354,36 @@ i915DepthFunc(struct gl_context * ctx, GLenum func) { struct i915_context *i915 = I915_CONTEXT(ctx); int test = intel_translate_compare_func(func); + GLuint dw; DBG("%s\n", __FUNCTION__); - I915_STATECHANGE(i915, I915_UPLOAD_CTX); - i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_FUNC_MASK; - i915->state.Ctx[I915_CTXREG_LIS6] |= test << S6_DEPTH_TEST_FUNC_SHIFT; + dw = i915->state.Ctx[I915_CTXREG_LIS6]; + dw &= ~S6_DEPTH_TEST_FUNC_MASK; + dw |= test << S6_DEPTH_TEST_FUNC_SHIFT; + if (dw != i915->state.Ctx[I915_CTXREG_LIS6]) { + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + i915->state.Ctx[I915_CTXREG_LIS6] = dw; + } } static void i915DepthMask(struct gl_context * ctx, GLboolean flag) { struct i915_context *i915 = I915_CONTEXT(ctx); + GLuint dw; DBG("%s flag (%d)\n", __FUNCTION__, flag); - - I915_STATECHANGE(i915, I915_UPLOAD_CTX); + dw = i915->state.Ctx[I915_CTXREG_LIS6]; if (flag && ctx->Depth.Test) - i915->state.Ctx[I915_CTXREG_LIS6] |= S6_DEPTH_WRITE_ENABLE; + dw |= S6_DEPTH_WRITE_ENABLE; else - i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_WRITE_ENABLE; + dw &= ~S6_DEPTH_WRITE_ENABLE; + if (dw != i915->state.Ctx[I915_CTXREG_LIS6]) { + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + i915->state.Ctx[I915_CTXREG_LIS6] = dw; + } } @@ -532,7 +570,7 @@ static void i915CullFaceFrontFace(struct gl_context * ctx, GLenum unused) { struct i915_context *i915 = I915_CONTEXT(ctx); - GLuint mode; + GLuint mode, dw; DBG("%s %d\n", __FUNCTION__, ctx->DrawBuffer ? ctx->DrawBuffer->Name : 0); @@ -554,9 +592,13 @@ i915CullFaceFrontFace(struct gl_context * ctx, GLenum unused) mode = S4_CULLMODE_BOTH; } - I915_STATECHANGE(i915, I915_UPLOAD_CTX); - i915->state.Ctx[I915_CTXREG_LIS4] &= ~S4_CULLMODE_MASK; - i915->state.Ctx[I915_CTXREG_LIS4] |= mode; + dw = i915->state.Ctx[I915_CTXREG_LIS4]; + dw &= ~S4_CULLMODE_MASK; + dw |= mode; + if (dw != i915->state.Ctx[I915_CTXREG_LIS4]) { + i915->state.Ctx[I915_CTXREG_LIS4] = dw; + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + } } static void @@ -690,6 +732,7 @@ i915_update_fog(struct gl_context * ctx) GLenum mode; GLboolean enabled; GLboolean try_pixel_fog; + GLuint dw; if (ctx->FragmentProgram._Current) { /* Pull in static fog state from program */ @@ -765,12 +808,16 @@ i915_update_fog(struct gl_context * ctx) i915->vertex_fog = I915_FOG_VERTEX; } - I915_STATECHANGE(i915, I915_UPLOAD_CTX); I915_ACTIVESTATE(i915, I915_UPLOAD_FOG, enabled); + dw = i915->state.Ctx[I915_CTXREG_LIS5]; if (enabled) - i915->state.Ctx[I915_CTXREG_LIS5] |= S5_FOG_ENABLE; + dw |= S5_FOG_ENABLE; else - i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_FOG_ENABLE; + dw &= ~S5_FOG_ENABLE; + if (dw != i915->state.Ctx[I915_CTXREG_LIS5]) { + i915->state.Ctx[I915_CTXREG_LIS5] = dw; + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + } /* Always enable pixel fog. Vertex fog using fog coord will conflict * with fog code appended onto fragment program. @@ -837,6 +884,7 @@ static void i915Enable(struct gl_context * ctx, GLenum cap, GLboolean state) { struct i915_context *i915 = I915_CONTEXT(ctx); + GLuint dw; switch (cap) { case GL_TEXTURE_2D: @@ -848,11 +896,15 @@ i915Enable(struct gl_context * ctx, GLenum cap, GLboolean state) break; case GL_ALPHA_TEST: - I915_STATECHANGE(i915, I915_UPLOAD_CTX); + dw = i915->state.Ctx[I915_CTXREG_LIS6]; if (state) - i915->state.Ctx[I915_CTXREG_LIS6] |= S6_ALPHA_TEST_ENABLE; + dw |= S6_ALPHA_TEST_ENABLE; else - i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_ALPHA_TEST_ENABLE; + dw &= ~S6_ALPHA_TEST_ENABLE; + if (dw != i915->state.Ctx[I915_CTXREG_LIS6]) { + i915->state.Ctx[I915_CTXREG_LIS6] = dw; + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + } break; case GL_BLEND: @@ -872,19 +924,27 @@ i915Enable(struct gl_context * ctx, GLenum cap, GLboolean state) break; case GL_DITHER: - I915_STATECHANGE(i915, I915_UPLOAD_CTX); + dw = i915->state.Ctx[I915_CTXREG_LIS5]; if (state) - i915->state.Ctx[I915_CTXREG_LIS5] |= S5_COLOR_DITHER_ENABLE; + dw |= S5_COLOR_DITHER_ENABLE; else - i915->state.Ctx[I915_CTXREG_LIS5] &= ~S5_COLOR_DITHER_ENABLE; + dw &= ~S5_COLOR_DITHER_ENABLE; + if (dw != i915->state.Ctx[I915_CTXREG_LIS5]) { + i915->state.Ctx[I915_CTXREG_LIS5] = dw; + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + } break; case GL_DEPTH_TEST: - I915_STATECHANGE(i915, I915_UPLOAD_CTX); + dw = i915->state.Ctx[I915_CTXREG_LIS6]; if (state) - i915->state.Ctx[I915_CTXREG_LIS6] |= S6_DEPTH_TEST_ENABLE; + dw |= S6_DEPTH_TEST_ENABLE; else - i915->state.Ctx[I915_CTXREG_LIS6] &= ~S6_DEPTH_TEST_ENABLE; + dw &= ~S6_DEPTH_TEST_ENABLE; + if (dw != i915->state.Ctx[I915_CTXREG_LIS6]) { + i915->state.Ctx[I915_CTXREG_LIS6] = dw; + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + } i915DepthMask(ctx, ctx->Depth.Mask); break; @@ -900,11 +960,15 @@ i915Enable(struct gl_context * ctx, GLenum cap, GLboolean state) break; case GL_LINE_SMOOTH: - I915_STATECHANGE(i915, I915_UPLOAD_CTX); + dw = i915->state.Ctx[I915_CTXREG_LIS4]; if (state) - i915->state.Ctx[I915_CTXREG_LIS4] |= S4_LINE_ANTIALIAS_ENABLE; + dw |= S4_LINE_ANTIALIAS_ENABLE; else - i915->state.Ctx[I915_CTXREG_LIS4] &= ~S4_LINE_ANTIALIAS_ENABLE; + dw &= ~S4_LINE_ANTIALIAS_ENABLE; + if (dw != i915->state.Ctx[I915_CTXREG_LIS4]) { + i915->state.Ctx[I915_CTXREG_LIS4] = dw; + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + } break; case GL_FOG: @@ -923,13 +987,15 @@ i915Enable(struct gl_context * ctx, GLenum cap, GLboolean state) hw_stencil = (irbStencil && irbStencil->region); } if (hw_stencil) { - I915_STATECHANGE(i915, I915_UPLOAD_CTX); + dw = i915->state.Ctx[I915_CTXREG_LIS5]; if (state) - i915->state.Ctx[I915_CTXREG_LIS5] |= (S5_STENCIL_TEST_ENABLE | - S5_STENCIL_WRITE_ENABLE); + dw |= (S5_STENCIL_TEST_ENABLE | S5_STENCIL_WRITE_ENABLE); else - i915->state.Ctx[I915_CTXREG_LIS5] &= ~(S5_STENCIL_TEST_ENABLE | - S5_STENCIL_WRITE_ENABLE); + dw &= ~(S5_STENCIL_TEST_ENABLE | S5_STENCIL_WRITE_ENABLE); + if (dw != i915->state.Ctx[I915_CTXREG_LIS5]) { + i915->state.Ctx[I915_CTXREG_LIS5] = dw; + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + } } else { FALLBACK(&i915->intel, I915_FALLBACK_STENCIL, state); @@ -959,11 +1025,15 @@ i915Enable(struct gl_context * ctx, GLenum cap, GLboolean state) /* This state change is handled in i915_reduced_primitive_state because * the hardware bit should only be set when rendering points. */ - I915_STATECHANGE(i915, I915_UPLOAD_CTX); + dw = i915->state.Ctx[I915_CTXREG_LIS4]; if (state) - i915->state.Ctx[I915_CTXREG_LIS4] |= S4_SPRITE_POINT_ENABLE; + dw |= S4_SPRITE_POINT_ENABLE; else - i915->state.Ctx[I915_CTXREG_LIS4] &= ~S4_SPRITE_POINT_ENABLE; + dw &= ~S4_SPRITE_POINT_ENABLE; + if (dw != i915->state.Ctx[I915_CTXREG_LIS4]) { + i915->state.Ctx[I915_CTXREG_LIS4] = dw; + I915_STATECHANGE(i915, I915_UPLOAD_CTX); + } break; case GL_POINT_SMOOTH: @@ -984,6 +1054,7 @@ i915_init_packets(struct i915_context *i915) { I915_STATECHANGE(i915, I915_UPLOAD_CTX); + I915_STATECHANGE(i915, I915_UPLOAD_BLEND); /* Probably don't want to upload all this stuff every time one * piece changes. */ @@ -1010,13 +1081,13 @@ i915_init_packets(struct i915_context *i915) ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff)); - i915->state.Ctx[I915_CTXREG_IAB] = + i915->state.Blend[I915_BLENDREG_IAB] = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE | IAB_MODIFY_FUNC | IAB_MODIFY_SRC_FACTOR | IAB_MODIFY_DST_FACTOR); - i915->state.Ctx[I915_CTXREG_BLENDCOLOR0] = + i915->state.Blend[I915_BLENDREG_BLENDCOLOR0] = _3DSTATE_CONST_BLEND_COLOR_CMD; - i915->state.Ctx[I915_CTXREG_BLENDCOLOR1] = 0; + i915->state.Blend[I915_BLENDREG_BLENDCOLOR1] = 0; i915->state.Ctx[I915_CTXREG_BF_STENCIL_MASKS] = _3DSTATE_BACKFACE_STENCIL_MASKS | @@ -1087,6 +1158,7 @@ i915_init_packets(struct i915_context *i915) i915->state.active = (I915_UPLOAD_PROGRAM | I915_UPLOAD_STIPPLE | I915_UPLOAD_CTX | + I915_UPLOAD_BLEND | I915_UPLOAD_BUFFERS | I915_UPLOAD_INVARIENT | I915_UPLOAD_RASTER_RULES); diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index bc05a5687c..442adbb63c 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -158,7 +158,7 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) /* Get first image here, since intelObj->firstLevel will get set in * the intel_finalize_mipmap_tree() call above. */ - firstImage = tObj->Image[0][intelObj->firstLevel]; + firstImage = tObj->Image[0][tObj->BaseLevel]; drm_intel_bo_reference(intelObj->mt->region->buffer); i915->state.tex_buffer[unit] = intelObj->mt->region->buffer; diff --git a/src/mesa/drivers/dri/i915/i915_vtbl.c b/src/mesa/drivers/dri/i915/i915_vtbl.c index 59dfe08563..921183b81d 100644 --- a/src/mesa/drivers/dri/i915/i915_vtbl.c +++ b/src/mesa/drivers/dri/i915/i915_vtbl.c @@ -217,7 +217,7 @@ i915_emit_invarient_state(struct intel_context *intel) #define emit(intel, state, size ) \ - intel_batchbuffer_data(intel->batch, state, size) + intel_batchbuffer_data(intel, state, size, false) static GLuint get_dirty(struct i915_hw_state *state) @@ -251,6 +251,9 @@ get_state_size(struct i915_hw_state *state) if (dirty & I915_UPLOAD_CTX) sz += sizeof(state->Ctx); + if (dirty & I915_UPLOAD_BLEND) + sz += sizeof(state->Blend); + if (dirty & I915_UPLOAD_BUFFERS) sz += sizeof(state->Buffer); @@ -299,14 +302,15 @@ i915_emit_state(struct intel_context *intel) * scheduling is allowed, rather than assume that it is whenever a * batchbuffer fills up. */ - intel_batchbuffer_require_space(intel->batch, - get_state_size(state) + INTEL_PRIM_EMIT_SIZE); + intel_batchbuffer_require_space(intel, + get_state_size(state) + INTEL_PRIM_EMIT_SIZE, + false); count = 0; again: aper_count = 0; dirty = get_dirty(state); - aper_array[aper_count++] = intel->batch->buf; + aper_array[aper_count++] = intel->batch.bo; if (dirty & I915_UPLOAD_BUFFERS) { aper_array[aper_count++] = state->draw_region->buffer; if (state->depth_region) @@ -326,7 +330,7 @@ i915_emit_state(struct intel_context *intel) if (dri_bufmgr_check_aperture_space(aper_array, aper_count)) { if (count == 0) { count++; - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); goto again; } else { _mesa_error(ctx, GL_OUT_OF_MEMORY, "i915 emit state"); @@ -365,6 +369,13 @@ i915_emit_state(struct intel_context *intel) emit(intel, state->Ctx, sizeof(state->Ctx)); } + if (dirty & I915_UPLOAD_BLEND) { + if (INTEL_DEBUG & DEBUG_STATE) + fprintf(stderr, "I915_UPLOAD_BLEND:\n"); + + emit(intel, state->Blend, sizeof(state->Blend)); + } + if (dirty & I915_UPLOAD_BUFFERS) { GLuint count; @@ -425,6 +436,7 @@ i915_emit_state(struct intel_context *intel) */ if (dirty & I915_UPLOAD_TEX_ALL) { int nr = 0; + GLuint unwind; for (i = 0; i < I915_TEX_UNITS; i++) if (dirty & I915_UPLOAD_TEX(i)) @@ -435,21 +447,16 @@ i915_emit_state(struct intel_context *intel) OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT); for (i = 0; i < I915_TEX_UNITS; i++) if (dirty & I915_UPLOAD_TEX(i)) { - - if (state->tex_buffer[i]) { - OUT_RELOC(state->tex_buffer[i], - I915_GEM_DOMAIN_SAMPLER, 0, - state->tex_offset[i]); - } - else { - OUT_BATCH(state->tex_offset[i]); - } + OUT_RELOC(state->tex_buffer[i], + I915_GEM_DOMAIN_SAMPLER, 0, + state->tex_offset[i]); OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]); OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]); } ADVANCE_BATCH(); + unwind = intel->batch.used; BEGIN_BATCH(2 + nr * 3); OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr)); OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT); @@ -460,6 +467,13 @@ i915_emit_state(struct intel_context *intel) OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]); } ADVANCE_BATCH(); + if (i915->last_sampler && + memcmp(intel->batch.map + i915->last_sampler, + intel->batch.map + unwind, + (2 + nr*3)*sizeof(int)) == 0) + intel->batch.used = unwind; + else + i915->last_sampler = unwind; } if (dirty & I915_UPLOAD_CONSTANTS) { @@ -481,9 +495,7 @@ i915_emit_state(struct intel_context *intel) } } - intel->batch->dirty_state &= ~dirty; assert(get_dirty(state) == 0); - assert((intel->batch->dirty_state & (1<<1)) == 0); } static void @@ -523,6 +535,27 @@ i915_set_buf_info_for_region(uint32_t *state, struct intel_region *region, } } +static uint32_t i915_render_target_format_for_mesa_format[MESA_FORMAT_COUNT] = +{ + [MESA_FORMAT_ARGB8888] = DV_PF_8888, + [MESA_FORMAT_XRGB8888] = DV_PF_8888, + [MESA_FORMAT_RGB565] = DV_PF_565 | DITHER_FULL_ALWAYS, + [MESA_FORMAT_ARGB1555] = DV_PF_1555 | DITHER_FULL_ALWAYS, + [MESA_FORMAT_ARGB4444] = DV_PF_4444 | DITHER_FULL_ALWAYS, +}; + +static bool +i915_render_target_supported(gl_format format) +{ + if (format == MESA_FORMAT_S8_Z24 || + format == MESA_FORMAT_X8_Z24 || + format == MESA_FORMAT_Z16) { + return true; + } + + return i915_render_target_format_for_mesa_format[format] != 0; +} + static void i915_set_draw_region(struct intel_context *intel, struct intel_region *color_regions[], @@ -562,24 +595,7 @@ i915_set_draw_region(struct intel_context *intel, DSTORG_VERT_BIAS(0x8) | /* .5 */ LOD_PRECLAMP_OGL | TEX_DEFAULT_COLOR_OGL); if (irb != NULL) { - switch (irb->Base.Format) { - case MESA_FORMAT_ARGB8888: - case MESA_FORMAT_XRGB8888: - value |= DV_PF_8888; - break; - case MESA_FORMAT_RGB565: - value |= DV_PF_565 | DITHER_FULL_ALWAYS; - break; - case MESA_FORMAT_ARGB1555: - value |= DV_PF_1555 | DITHER_FULL_ALWAYS; - break; - case MESA_FORMAT_ARGB4444: - value |= DV_PF_4444 | DITHER_FULL_ALWAYS; - break; - default: - _mesa_problem(ctx, "Bad renderbuffer format: %d\n", - irb->Base.Format); - } + value |= i915_render_target_format_for_mesa_format[irb->Base.Format]; } /* This isn't quite safe, thus being hidden behind an option. When changing @@ -661,6 +677,10 @@ i915_new_batch(struct intel_context *intel) */ i915->state.emitted = 0; i915->last_draw_offset = 0; + i915->last_sampler = 0; + + i915->current_vb_bo = NULL; + i915->current_vertex_size = 0; } static void @@ -686,4 +706,5 @@ i915InitVtbl(struct i915_context *i915) i915->intel.vtbl.update_texture_state = i915UpdateTextureState; i915->intel.vtbl.assert_not_dirty = i915_assert_not_dirty; i915->intel.vtbl.finish_batch = intel_finish_vb; + i915->intel.vtbl.render_target_supported = i915_render_target_supported; } diff --git a/src/mesa/drivers/dri/i915/intel_render.c b/src/mesa/drivers/dri/i915/intel_render.c index 0d8ab4b507..2d361ca0a9 100644 --- a/src/mesa/drivers/dri/i915/intel_render.c +++ b/src/mesa/drivers/dri/i915/intel_render.c @@ -124,7 +124,7 @@ static INLINE GLuint intel_get_vb_max(struct intel_context *intel) GLuint ret; if (intel->intelScreen->no_vbo) - ret = intel->batch->size - 1500; + ret = sizeof(intel->batch.map) - 1500; else ret = INTEL_VB_SIZE; ret /= (intel->vertex_size * 4); diff --git a/src/mesa/drivers/dri/i915/intel_tris.c b/src/mesa/drivers/dri/i915/intel_tris.c index b9a8aeb12f..7bcb72f42d 100644 --- a/src/mesa/drivers/dri/i915/intel_tris.c +++ b/src/mesa/drivers/dri/i915/intel_tris.c @@ -54,6 +54,7 @@ #include "intel_span.h" #include "i830_context.h" #include "i830_reg.h" +#include "i915_context.h" static void intelRenderPrimitive(struct gl_context * ctx, GLenum prim); static void intelRasterPrimitive(struct gl_context * ctx, GLenum rprim, @@ -62,22 +63,22 @@ static void intelRasterPrimitive(struct gl_context * ctx, GLenum rprim, static void intel_flush_inline_primitive(struct intel_context *intel) { - GLuint used = intel->batch->ptr - intel->prim.start_ptr; + GLuint used = intel->batch.used - intel->prim.start_ptr; assert(intel->prim.primitive != ~0); /* printf("/\n"); */ - if (used < 8) + if (used < 2) goto do_discard; - *(int *) intel->prim.start_ptr = (_3DPRIMITIVE | - intel->prim.primitive | (used / 4 - 2)); + intel->batch.map[intel->prim.start_ptr] = + _3DPRIMITIVE | intel->prim.primitive | (used - 2); goto finished; do_discard: - intel->batch->ptr -= used; + intel->batch.used = intel->prim.start_ptr; finished: intel->prim.primitive = ~0; @@ -100,9 +101,7 @@ static void intel_start_inline(struct intel_context *intel, uint32_t prim) */ BEGIN_BATCH(1); - assert((intel->batch->dirty_state & (1<<1)) == 0); - - intel->prim.start_ptr = intel->batch->ptr; + intel->prim.start_ptr = intel->batch.used; intel->prim.primitive = prim; intel->prim.flush = intel_flush_inline_primitive; @@ -118,26 +117,25 @@ static void intel_wrap_inline(struct intel_context *intel) GLuint prim = intel->prim.primitive; intel_flush_inline_primitive(intel); - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); intel_start_inline(intel, prim); /* ??? */ } static GLuint *intel_extend_inline(struct intel_context *intel, GLuint dwords) { - GLuint sz = dwords * sizeof(GLuint); GLuint *ptr; assert(intel->prim.flush == intel_flush_inline_primitive); - if (intel_batchbuffer_space(intel->batch) < sz) + if (intel_batchbuffer_space(intel) < dwords * sizeof(GLuint)) intel_wrap_inline(intel); /* printf("."); */ intel->vtbl.assert_not_dirty(intel); - ptr = (GLuint *) intel->batch->ptr; - intel->batch->ptr += sz; + ptr = intel->batch.map + intel->batch.used; + intel->batch.used += dwords; return ptr; } @@ -218,15 +216,15 @@ void intel_flush_prim(struct intel_context *intel) offset = intel->prim.start_offset; intel->prim.start_offset = intel->prim.current_offset; if (intel->gen < 3) - intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128); + intel->prim.current_offset = intel->prim.start_offset = ALIGN(intel->prim.start_offset, 128); intel->prim.flush = NULL; intel->vtbl.emit_state(intel); - aper_array[0] = intel->batch->buf; + aper_array[0] = intel->batch.bo; aper_array[1] = vb_bo; if (dri_bufmgr_check_aperture_space(aper_array, 2)) { - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); intel->vtbl.emit_state(intel); } @@ -236,11 +234,6 @@ void intel_flush_prim(struct intel_context *intel) */ intel->no_batch_wrap = GL_TRUE; - /* Check that we actually emitted the state into this batch, using the - * UPLOAD_CTX bit as the signal. - */ - assert((intel->batch->dirty_state & (1<<1)) == 0); - #if 0 printf("emitting %d..%d=%d vertices size %d\n", offset, intel->prim.current_offset, count, @@ -248,20 +241,39 @@ void intel_flush_prim(struct intel_context *intel) #endif if (intel->gen >= 3) { - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(0) | I1_LOAD_S(1) | 1); - assert((offset & ~S0_VB_OFFSET_MASK) == 0); - OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, offset); - OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) | - (intel->vertex_size << S1_VERTEX_PITCH_SHIFT)); + struct i915_context *i915 = i915_context(&intel->ctx); + unsigned int cmd = 0, len = 0; + + if (vb_bo != i915->current_vb_bo) { + cmd |= I1_LOAD_S(0); + len++; + } + if (intel->vertex_size != i915->current_vertex_size) { + cmd |= I1_LOAD_S(1); + len++; + } + if (len) + len++; + + BEGIN_BATCH(2+len); + if (cmd) + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | cmd | (len - 2)); + if (vb_bo != i915->current_vb_bo) { + OUT_RELOC(vb_bo, I915_GEM_DOMAIN_VERTEX, 0, 0); + i915->current_vb_bo = vb_bo; + } + if (intel->vertex_size != i915->current_vertex_size) { + OUT_BATCH((intel->vertex_size << S1_VERTEX_WIDTH_SHIFT) | + (intel->vertex_size << S1_VERTEX_PITCH_SHIFT)); + i915->current_vertex_size = intel->vertex_size; + } OUT_BATCH(_3DPRIMITIVE | PRIM_INDIRECT | PRIM_INDIRECT_SEQUENTIAL | intel->prim.primitive | count); - OUT_BATCH(0); /* Beginning vertex index */ + OUT_BATCH(offset / (intel->vertex_size * 4)); ADVANCE_BATCH(); } else { struct i830_context *i830 = i830_context(&intel->ctx); @@ -478,6 +490,7 @@ static void intel_wpos_triangle(struct intel_context *intel, intelVertexPtr v0, intelVertexPtr v1, intelVertexPtr v2) { + const struct gl_framebuffer *fb = intel->ctx.DrawBuffer; GLuint offset = intel->wpos_offset; GLuint size = intel->wpos_size; GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset); @@ -488,10 +501,11 @@ intel_wpos_triangle(struct intel_context *intel, __memcpy(v1_wpos, v1, size); __memcpy(v2_wpos, v2, size); - v0_wpos[1] = -v0_wpos[1] + intel->ctx.DrawBuffer->Height; - v1_wpos[1] = -v1_wpos[1] + intel->ctx.DrawBuffer->Height; - v2_wpos[1] = -v2_wpos[1] + intel->ctx.DrawBuffer->Height; - + if (!fb->Name) { + v0_wpos[1] = -v0_wpos[1] + fb->Height; + v1_wpos[1] = -v1_wpos[1] + fb->Height; + v2_wpos[1] = -v2_wpos[1] + fb->Height; + } intel_draw_triangle(intel, v0, v1, v2); } @@ -501,6 +515,7 @@ static void intel_wpos_line(struct intel_context *intel, intelVertexPtr v0, intelVertexPtr v1) { + const struct gl_framebuffer *fb = intel->ctx.DrawBuffer; GLuint offset = intel->wpos_offset; GLuint size = intel->wpos_size; GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset); @@ -509,8 +524,10 @@ intel_wpos_line(struct intel_context *intel, __memcpy(v0_wpos, v0, size); __memcpy(v1_wpos, v1, size); - v0_wpos[1] = -v0_wpos[1] + intel->ctx.DrawBuffer->Height; - v1_wpos[1] = -v1_wpos[1] + intel->ctx.DrawBuffer->Height; + if (!fb->Name) { + v0_wpos[1] = -v0_wpos[1] + fb->Height; + v1_wpos[1] = -v1_wpos[1] + fb->Height; + } intel_draw_line(intel, v0, v1); } @@ -519,12 +536,15 @@ intel_wpos_line(struct intel_context *intel, static void intel_wpos_point(struct intel_context *intel, intelVertexPtr v0) { + const struct gl_framebuffer *fb = intel->ctx.DrawBuffer; GLuint offset = intel->wpos_offset; GLuint size = intel->wpos_size; GLfloat *v0_wpos = (GLfloat *)((char *)v0 + offset); __memcpy(v0_wpos, v0, size); - v0_wpos[1] = -v0_wpos[1] + intel->ctx.DrawBuffer->Height; + + if (!fb->Name) + v0_wpos[1] = -v0_wpos[1] + fb->Height; intel_draw_point(intel, v0); } diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 7c3ac0c14e..b05ba35d65 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -108,6 +108,7 @@ CXX_SOURCES = \ brw_fs.cpp \ brw_fs_channel_expressions.cpp \ brw_fs_reg_allocate.cpp \ + brw_fs_schedule_instructions.cpp \ brw_fs_vector_splitting.cpp ASM_SOURCES = diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index d3a1233aac..412d82ab3c 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -35,6 +35,7 @@ #include "brw_defines.h" #include "brw_util.h" #include "main/macros.h" +#include "intel_batchbuffer.h" void brw_update_cc_vp(struct brw_context *brw) @@ -92,61 +93,61 @@ static void upload_cc_unit(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &brw->intel.ctx; - struct brw_cc_unit_state cc; - void *map; + struct brw_cc_unit_state *cc; - memset(&cc, 0, sizeof(cc)); + cc = brw_state_batch(brw, sizeof(*cc), 64, &brw->cc.state_offset); + memset(cc, 0, sizeof(*cc)); /* _NEW_STENCIL */ if (ctx->Stencil._Enabled) { const unsigned back = ctx->Stencil._BackFace; - cc.cc0.stencil_enable = 1; - cc.cc0.stencil_func = + cc->cc0.stencil_enable = 1; + cc->cc0.stencil_func = intel_translate_compare_func(ctx->Stencil.Function[0]); - cc.cc0.stencil_fail_op = + cc->cc0.stencil_fail_op = intel_translate_stencil_op(ctx->Stencil.FailFunc[0]); - cc.cc0.stencil_pass_depth_fail_op = + cc->cc0.stencil_pass_depth_fail_op = intel_translate_stencil_op(ctx->Stencil.ZFailFunc[0]); - cc.cc0.stencil_pass_depth_pass_op = + cc->cc0.stencil_pass_depth_pass_op = intel_translate_stencil_op(ctx->Stencil.ZPassFunc[0]); - cc.cc1.stencil_ref = ctx->Stencil.Ref[0]; - cc.cc1.stencil_write_mask = ctx->Stencil.WriteMask[0]; - cc.cc1.stencil_test_mask = ctx->Stencil.ValueMask[0]; + cc->cc1.stencil_ref = ctx->Stencil.Ref[0]; + cc->cc1.stencil_write_mask = ctx->Stencil.WriteMask[0]; + cc->cc1.stencil_test_mask = ctx->Stencil.ValueMask[0]; if (ctx->Stencil._TestTwoSide) { - cc.cc0.bf_stencil_enable = 1; - cc.cc0.bf_stencil_func = + cc->cc0.bf_stencil_enable = 1; + cc->cc0.bf_stencil_func = intel_translate_compare_func(ctx->Stencil.Function[back]); - cc.cc0.bf_stencil_fail_op = + cc->cc0.bf_stencil_fail_op = intel_translate_stencil_op(ctx->Stencil.FailFunc[back]); - cc.cc0.bf_stencil_pass_depth_fail_op = + cc->cc0.bf_stencil_pass_depth_fail_op = intel_translate_stencil_op(ctx->Stencil.ZFailFunc[back]); - cc.cc0.bf_stencil_pass_depth_pass_op = + cc->cc0.bf_stencil_pass_depth_pass_op = intel_translate_stencil_op(ctx->Stencil.ZPassFunc[back]); - cc.cc1.bf_stencil_ref = ctx->Stencil.Ref[back]; - cc.cc2.bf_stencil_write_mask = ctx->Stencil.WriteMask[back]; - cc.cc2.bf_stencil_test_mask = ctx->Stencil.ValueMask[back]; + cc->cc1.bf_stencil_ref = ctx->Stencil.Ref[back]; + cc->cc2.bf_stencil_write_mask = ctx->Stencil.WriteMask[back]; + cc->cc2.bf_stencil_test_mask = ctx->Stencil.ValueMask[back]; } /* Not really sure about this: */ if (ctx->Stencil.WriteMask[0] || (ctx->Stencil._TestTwoSide && ctx->Stencil.WriteMask[back])) - cc.cc0.stencil_write_enable = 1; + cc->cc0.stencil_write_enable = 1; } /* _NEW_COLOR */ if (ctx->Color._LogicOpEnabled && ctx->Color.LogicOp != GL_COPY) { - cc.cc2.logicop_enable = 1; - cc.cc5.logicop_func = intel_translate_logic_op(ctx->Color.LogicOp); + cc->cc2.logicop_enable = 1; + cc->cc5.logicop_func = intel_translate_logic_op(ctx->Color.LogicOp); } else if (ctx->Color.BlendEnabled) { - GLenum eqRGB = ctx->Color.BlendEquationRGB; - GLenum eqA = ctx->Color.BlendEquationA; - GLenum srcRGB = ctx->Color.BlendSrcRGB; - GLenum dstRGB = ctx->Color.BlendDstRGB; - GLenum srcA = ctx->Color.BlendSrcA; - GLenum dstA = ctx->Color.BlendDstA; + GLenum eqRGB = ctx->Color.Blend[0].EquationRGB; + GLenum eqA = ctx->Color.Blend[0].EquationA; + GLenum srcRGB = ctx->Color.Blend[0].SrcRGB; + GLenum dstRGB = ctx->Color.Blend[0].DstRGB; + GLenum srcA = ctx->Color.Blend[0].SrcA; + GLenum dstA = ctx->Color.Blend[0].DstA; /* If the renderbuffer is XRGB, we have to frob the blend function to * force the destination alpha to 1.0. This means replacing GL_DST_ALPHA @@ -167,58 +168,55 @@ static void upload_cc_unit(struct brw_context *brw) srcA = dstA = GL_ONE; } - cc.cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); - cc.cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); - cc.cc6.blend_function = brw_translate_blend_equation(eqRGB); + cc->cc6.dest_blend_factor = brw_translate_blend_factor(dstRGB); + cc->cc6.src_blend_factor = brw_translate_blend_factor(srcRGB); + cc->cc6.blend_function = brw_translate_blend_equation(eqRGB); - cc.cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); - cc.cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); - cc.cc5.ia_blend_function = brw_translate_blend_equation(eqA); + cc->cc5.ia_dest_blend_factor = brw_translate_blend_factor(dstA); + cc->cc5.ia_src_blend_factor = brw_translate_blend_factor(srcA); + cc->cc5.ia_blend_function = brw_translate_blend_equation(eqA); - cc.cc3.blend_enable = 1; - cc.cc3.ia_blend_enable = (srcA != srcRGB || + cc->cc3.blend_enable = 1; + cc->cc3.ia_blend_enable = (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB); } if (ctx->Color.AlphaEnabled) { - cc.cc3.alpha_test = 1; - cc.cc3.alpha_test_func = + cc->cc3.alpha_test = 1; + cc->cc3.alpha_test_func = intel_translate_compare_func(ctx->Color.AlphaFunc); - cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; + cc->cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; - UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], ctx->Color.AlphaRef); + UNCLAMPED_FLOAT_TO_UBYTE(cc->cc7.alpha_ref.ub[0], ctx->Color.AlphaRef); } if (ctx->Color.DitherFlag) { - cc.cc5.dither_enable = 1; - cc.cc6.y_dither_offset = 0; - cc.cc6.x_dither_offset = 0; + cc->cc5.dither_enable = 1; + cc->cc6.y_dither_offset = 0; + cc->cc6.x_dither_offset = 0; } /* _NEW_DEPTH */ if (ctx->Depth.Test) { - cc.cc2.depth_test = 1; - cc.cc2.depth_test_function = + cc->cc2.depth_test = 1; + cc->cc2.depth_test_function = intel_translate_compare_func(ctx->Depth.Func); - cc.cc2.depth_write_enable = ctx->Depth.Mask; + cc->cc2.depth_write_enable = ctx->Depth.Mask; } if (intel->stats_wm || unlikely(INTEL_DEBUG & DEBUG_STATS)) - cc.cc5.statistics_enable = 1; + cc->cc5.statistics_enable = 1; /* CACHE_NEW_CC_VP */ - cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */ + cc->cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */ - map = brw_state_batch(brw, sizeof(cc), 64, - &brw->cc.state_bo, &brw->cc.state_offset); - memcpy(map, &cc, sizeof(cc)); brw->state.dirty.cache |= CACHE_NEW_CC_UNIT; /* Emit CC viewport relocation */ - drm_intel_bo_emit_reloc(brw->cc.state_bo, (brw->cc.state_offset + - offsetof(struct brw_cc_unit_state, - cc4)), + drm_intel_bo_emit_reloc(brw->intel.batch.bo, + (brw->cc.state_offset + + offsetof(struct brw_cc_unit_state, cc4)), brw->cc.vp_bo, 0, I915_GEM_DOMAIN_INSTRUCTION, 0); } @@ -235,18 +233,16 @@ const struct brw_tracked_state brw_cc_unit = { static void upload_blend_constant_color(struct brw_context *brw) { - struct gl_context *ctx = &brw->intel.ctx; - struct brw_blend_constant_color bcc; - - memset(&bcc, 0, sizeof(bcc)); - bcc.header.opcode = CMD_BLEND_CONSTANT_COLOR; - bcc.header.length = sizeof(bcc)/4-2; - bcc.blend_constant_color[0] = ctx->Color.BlendColor[0]; - bcc.blend_constant_color[1] = ctx->Color.BlendColor[1]; - bcc.blend_constant_color[2] = ctx->Color.BlendColor[2]; - bcc.blend_constant_color[3] = ctx->Color.BlendColor[3]; - - BRW_CACHED_BATCH_STRUCT(brw, &bcc); + struct intel_context *intel = &brw->intel; + struct gl_context *ctx = &intel->ctx; + + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_BLEND_CONSTANT_COLOR << 16 | (5-2)); + OUT_BATCH_F(ctx->Color.BlendColor[0]); + OUT_BATCH_F(ctx->Color.BlendColor[1]); + OUT_BATCH_F(ctx->Color.BlendColor[2]); + OUT_BATCH_F(ctx->Color.BlendColor[3]); + CACHED_BATCH(); } const struct brw_tracked_state brw_blend_constant_color = { diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 28549f2574..9483ec69d9 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -151,6 +151,22 @@ GLboolean brwCreateContext( int api, MIN2(ctx->Const.FragmentProgram.MaxNativeParameters, ctx->Const.FragmentProgram.MaxEnvParams); + /* Fragment shaders use real, 32-bit twos-complement integers for all + * integer types. + */ + ctx->Const.FragmentProgram.LowInt.RangeMin = 31; + ctx->Const.FragmentProgram.LowInt.RangeMax = 30; + ctx->Const.FragmentProgram.LowInt.Precision = 0; + ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.MediumInt + = ctx->Const.FragmentProgram.LowInt; + + /* Gen6 converts quads to polygon in beginning of 3D pipeline, + but we're not sure how it's actually done for vertex order, + that affect provoking vertex decision. Always use last vertex + convention for quad primitive which works as expected for now. */ + if (intel->gen == 6) + ctx->Const.QuadsFollowProvokingVertexConvention = GL_FALSE; + if (intel->is_g4x || intel->gen >= 5) { brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 7069724466..7b0551a92b 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -145,7 +145,7 @@ struct brw_context; #define BRW_NEW_NR_VS_SURFACES 0x80000 #define BRW_NEW_INDEX_BUFFER 0x100000 #define BRW_NEW_VS_CONSTBUF 0x200000 -#define BRW_NEW_WM_CONSTBUF 0x200000 +#define BRW_NEW_WM_CONSTBUF 0x400000 struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -408,21 +408,24 @@ struct brw_cached_batch_item { */ #define ATTRIB_BIT_DWORDS ((VERT_ATTRIB_MAX+31)/32) +struct brw_vertex_buffer { + /** Buffer object containing the uploaded vertex data */ + drm_intel_bo *bo; + uint32_t offset; + /** Byte stride between elements in the uploaded array */ + GLuint stride; +}; struct brw_vertex_element { const struct gl_client_array *glarray; + int buffer; + /** The corresponding Mesa vertex attribute */ gl_vert_attrib attrib; /** Size of a complete element */ GLuint element_size; - /** Number of uploaded elements for this input. */ - GLuint count; - /** Byte stride between elements in the uploaded array */ - GLuint stride; /** Offset of the first element within the buffer object */ unsigned int offset; - /** Buffer object containing the uploaded vertex data */ - drm_intel_bo *bo; }; @@ -457,12 +460,10 @@ struct brw_context GLboolean has_negative_rhw_bug; GLboolean has_aa_line_parameters; GLboolean has_pln; -; + struct { struct brw_state_flags dirty; - GLuint nr_color_regions; - struct intel_region *color_regions[MAX_DRAW_BUFFERS]; struct intel_region *depth_region; /** @@ -485,23 +486,27 @@ struct brw_context struct { struct brw_vertex_element inputs[VERT_ATTRIB_MAX]; + struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX]; + struct { + uint32_t handle; + uint32_t offset; + uint32_t stride; + } current_buffers[VERT_ATTRIB_MAX]; struct brw_vertex_element *enabled[VERT_ATTRIB_MAX]; GLuint nr_enabled; - -#define BRW_NR_UPLOAD_BUFS 17 -#define BRW_UPLOAD_INIT_SIZE (128*1024) - - struct { - drm_intel_bo *bo; - GLuint offset; - } upload; + GLuint nr_buffers, nr_current_buffers; /* Summary of size and varying of active arrays, so we can check * for changes to this state: */ struct brw_vertex_info info; unsigned int min_index, max_index; + + /* Offset from start of vertex buffer so we can avoid redefining + * the same VB packed over and over again. + */ + unsigned int start_vertex_bias; } vb; struct { @@ -512,10 +517,10 @@ struct brw_context */ const struct _mesa_index_buffer *ib; - /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */ + /* Updates are signaled by BRW_NEW_INDEX_BUFFER. */ drm_intel_bo *bo; - unsigned int offset; - unsigned int size; + GLuint type; + /* Offset to index buffer index to use in CMD_3D_PRIM so that we can * avoid re-uploading the IB packet over and over if we're actually * referencing the same index buffer. @@ -528,11 +533,6 @@ struct brw_context const struct gl_vertex_program *vertex_program; const struct gl_fragment_program *fragment_program; - - /* For populating the gtt: - */ - GLuint next_free_page; - /* hw-dependent 3DSTATE_VF_STATISTICS opcode */ uint32_t CMD_VF_STATISTICS; /* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */ @@ -612,9 +612,7 @@ struct brw_context drm_intel_bo *const_bo; /** Binding table of pointers to surf_bo entries */ - drm_intel_bo *bind_bo; uint32_t bind_bo_offset; - drm_intel_bo *surf_bo[BRW_VS_MAX_SURF]; uint32_t surf_offset[BRW_VS_MAX_SURF]; GLuint nr_surfaces; } vs; @@ -666,9 +664,7 @@ struct brw_context drm_intel_bo *sampler_bo; /** Binding table of pointers to surf_bo entries */ - drm_intel_bo *bind_bo; uint32_t bind_bo_offset; - drm_intel_bo *surf_bo[BRW_WM_MAX_SURF]; uint32_t surf_offset[BRW_WM_MAX_SURF]; drm_intel_bo *prog_bo; @@ -693,7 +689,6 @@ struct brw_context drm_intel_bo *depth_stencil_state_bo; drm_intel_bo *color_calc_state_bo; - drm_intel_bo *state_bo; uint32_t state_offset; } cc; @@ -706,6 +701,9 @@ struct brw_context /* Used to give every program string a unique id */ GLuint program_id; + + int num_prepare_atoms, num_emit_atoms; + struct brw_tracked_state prepare_atoms[64], emit_atoms[64]; }; @@ -841,4 +839,3 @@ float convert_param(enum param_conversion conversion, float param) GLboolean brw_do_cubemap_normalize(struct exec_list *instructions); #endif - diff --git a/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp b/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp index 35bea68121..8574169e47 100644 --- a/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp +++ b/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp @@ -51,7 +51,7 @@ brw_cubemap_normalize_visitor::visit_leave(ir_texture *ir) if (ir->sampler->type->sampler_dimensionality != GLSL_SAMPLER_DIM_CUBE) return visit_continue; - void *mem_ctx = talloc_parent(ir); + void *mem_ctx = ralloc_parent(ir); ir_variable *var = new(mem_ctx) ir_variable(ir->coordinate->type, "coordinate", ir_var_auto); diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 877b22fec1..ae11c487a2 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -146,22 +146,24 @@ const struct brw_tracked_state brw_curbe_offsets = { */ void brw_upload_cs_urb_state(struct brw_context *brw) { - struct brw_cs_urb_state cs_urb; - memset(&cs_urb, 0, sizeof(cs_urb)); + struct intel_context *intel = &brw->intel; + BEGIN_BATCH(2); /* It appears that this is the state packet for the CS unit, ie. the * urb entries detailed here are housed in the CS range from the * URB_FENCE command. */ - cs_urb.header.opcode = CMD_CS_URB_STATE; - cs_urb.header.length = sizeof(cs_urb)/4 - 2; + OUT_BATCH(CMD_CS_URB_STATE << 16 | (2-2)); /* BRW_NEW_URB_FENCE */ - cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries; - cs_urb.bits0.urb_entry_size = brw->urb.csize - 1; - - assert(brw->urb.nr_cs_entries); - BRW_CACHED_BATCH_STRUCT(brw, &cs_urb); + if (brw->urb.csize == 0) { + OUT_BATCH(0); + } else { + /* BRW_NEW_URB_FENCE */ + assert(brw->urb.nr_cs_entries); + OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries); + } + CACHED_BATCH(); } static GLfloat fixed_plane[6][4] = { diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 5c5b8259e1..5496b4fdd3 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -35,28 +35,6 @@ /* 3D state: */ -#define _3DOP_3DSTATE_PIPELINED 0x0 -#define _3DOP_3DSTATE_NONPIPELINED 0x1 -#define _3DOP_3DCONTROL 0x2 -#define _3DOP_3DPRIMITIVE 0x3 - -#define _3DSTATE_PIPELINED_POINTERS 0x00 -#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 -#define _3DSTATE_VERTEX_BUFFERS 0x08 -#define _3DSTATE_VERTEX_ELEMENTS 0x09 -#define _3DSTATE_INDEX_BUFFER 0x0A -#define _3DSTATE_VF_STATISTICS 0x0B -#define _3DSTATE_DRAWING_RECTANGLE 0x00 -#define _3DSTATE_CONSTANT_COLOR 0x01 -#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 -#define _3DSTATE_CHROMA_KEY 0x04 -#define _3DSTATE_DEPTH_BUFFER 0x05 -#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 -#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 -#define _3DSTATE_LINE_STIPPLE 0x08 -#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 -#define _3DCONTROL 0x00 - #define PIPE_CONTROL_NOWRITE 0x00 #define PIPE_CONTROL_WRITEIMMEDIATE 0x01 #define PIPE_CONTROL_WRITEDEPTH 0x02 @@ -389,6 +367,7 @@ #define BRW_SURFACEFORMAT_R8_SSCALED 0x149 #define BRW_SURFACEFORMAT_R8_USCALED 0x14A #define BRW_SURFACEFORMAT_L8_UNORM_SRGB 0x14C +#define BRW_SURFACEFORMAT_DXT1_RGB_SRGB 0x180 #define BRW_SURFACEFORMAT_R1_UINT 0x181 #define BRW_SURFACEFORMAT_YCRCB_NORMAL 0x182 #define BRW_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 @@ -700,6 +679,8 @@ #define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 #define BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 #define BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE 0 +#define BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1 #define BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 #define BRW_SAMPLER_MESSAGE_SIMD8_RESINFO 2 #define BRW_SAMPLER_MESSAGE_SIMD16_RESINFO 2 @@ -838,13 +819,13 @@ #define CMD_PIPELINE_SELECT_965 0x6104 #define CMD_PIPELINE_SELECT_GM45 0x6904 -#define CMD_PIPELINED_STATE_POINTERS 0x7800 -#define CMD_BINDING_TABLE_PTRS 0x7801 +#define _3DSTATE_PIPELINED_POINTERS 0x7800 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x7801 # define GEN6_BINDING_TABLE_MODIFY_VS (1 << 8) # define GEN6_BINDING_TABLE_MODIFY_GS (1 << 9) # define GEN6_BINDING_TABLE_MODIFY_PS (1 << 12) -#define CMD_3D_SAMPLER_STATE_POINTERS 0x7802 /* SNB+ */ +#define _3DSTATE_SAMPLER_STATE_POINTERS 0x7802 /* GEN6+ */ # define PS_SAMPLER_STATE_CHANGE (1 << 12) # define GS_SAMPLER_STATE_CHANGE (1 << 9) # define VS_SAMPLER_STATE_CHANGE (1 << 8) @@ -885,27 +866,29 @@ #define CMD_INDEX_BUFFER 0x780a #define CMD_VF_STATISTICS_965 0x780b #define CMD_VF_STATISTICS_GM45 0x680b -#define CMD_3D_CC_STATE_POINTERS 0x780e /* GEN6+ */ +#define _3DSTATE_CC_STATE_POINTERS 0x780e /* GEN6+ */ -#define CMD_URB 0x7805 /* GEN6+ */ +#define _3DSTATE_URB 0x7805 /* GEN6+ */ # define GEN6_URB_VS_SIZE_SHIFT 16 # define GEN6_URB_VS_ENTRIES_SHIFT 0 # define GEN6_URB_GS_ENTRIES_SHIFT 8 # define GEN6_URB_GS_SIZE_SHIFT 0 -#define CMD_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ +#define _3DSTATE_VIEWPORT_STATE_POINTERS 0x780d /* GEN6+ */ # define GEN6_CC_VIEWPORT_MODIFY (1 << 12) # define GEN6_SF_VIEWPORT_MODIFY (1 << 11) # define GEN6_CLIP_VIEWPORT_MODIFY (1 << 10) -#define CMD_3D_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ +#define _3DSTATE_SCISSOR_STATE_POINTERS 0x780f /* GEN6+ */ -#define CMD_3D_VS_STATE 0x7810 /* GEN6+ */ +#define _3DSTATE_VS 0x7810 /* GEN6+ */ /* DW2 */ # define GEN6_VS_SPF_MODE (1 << 31) # define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) # define GEN6_VS_SAMPLER_COUNT_SHIFT 27 # define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16) /* DW4 */ # define GEN6_VS_DISPATCH_START_GRF_SHIFT 20 # define GEN6_VS_URB_READ_LENGTH_SHIFT 11 @@ -916,7 +899,7 @@ # define GEN6_VS_CACHE_DISABLE (1 << 1) # define GEN6_VS_ENABLE (1 << 0) -#define CMD_3D_GS_STATE 0x7811 /* GEN6+ */ +#define _3DSTATE_GS 0x7811 /* GEN6+ */ /* DW2 */ # define GEN6_GS_SPF_MODE (1 << 31) # define GEN6_GS_VECTOR_MASK_ENABLE (1 << 30) @@ -934,7 +917,7 @@ /* DW6 */ # define GEN6_GS_ENABLE (1 << 15) -#define CMD_3D_CLIP_STATE 0x7812 /* GEN6+ */ +#define _3DSTATE_CLIP 0x7812 /* GEN6+ */ /* DW1 */ # define GEN6_CLIP_STATISTICS_ENABLE (1 << 10) /** @@ -964,7 +947,7 @@ # define GEN6_CLIP_MAX_POINT_WIDTH_SHIFT 6 # define GEN6_CLIP_FORCE_ZERO_RTAINDEX (1 << 5) -#define CMD_3D_SF_STATE 0x7813 /* GEN6+ */ +#define _3DSTATE_SF 0x7813 /* GEN6+ */ /* DW1 */ # define GEN6_SF_NUM_OUTPUTS_SHIFT 22 # define GEN6_SF_SWIZZLE_ENABLE (1 << 21) @@ -1029,18 +1012,27 @@ # define ATTRIBUTE_0_CONST_SOURCE_SHIFT 9 # define ATTRIBUTE_0_SWIZZLE_SHIFT 6 # define ATTRIBUTE_0_SOURCE_SHIFT 0 + +# define ATTRIBUTE_SWIZZLE_INPUTATTR 0 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING 1 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_W 2 +# define ATTRIBUTE_SWIZZLE_INPUTATTR_FACING_W 3 +# define ATTRIBUTE_SWIZZLE_SHIFT 6 + /* DW16: Point sprite texture coordinate enables */ /* DW17: Constant interpolation enables */ /* DW18: attr 0-7 wrap shortest enables */ /* DW19: attr 8-16 wrap shortest enables */ -#define CMD_3D_WM_STATE 0x7814 /* GEN6+ */ +#define _3DSTATE_WM 0x7814 /* GEN6+ */ /* DW1: kernel pointer */ /* DW2 */ # define GEN6_WM_SPF_MODE (1 << 31) # define GEN6_WM_VECTOR_MASK_ENABLE (1 << 30) # define GEN6_WM_SAMPLER_COUNT_SHIFT 27 # define GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +# define GEN6_WM_FLOATING_POINT_MODE_IEEE_754 (0 << 16) +# define GEN6_WM_FLOATING_POINT_MODE_ALT (1 << 16) /* DW3: scratch space */ /* DW4 */ # define GEN6_WM_STATISTICS_ENABLE (1 << 31) @@ -1095,34 +1087,34 @@ /* DW7: kernel 1 pointer */ /* DW8: kernel 2 pointer */ -#define CMD_3D_CONSTANT_VS_STATE 0x7815 /* GEN6+ */ -#define CMD_3D_CONSTANT_GS_STATE 0x7816 /* GEN6+ */ -#define CMD_3D_CONSTANT_PS_STATE 0x7817 /* GEN6+ */ +#define _3DSTATE_CONSTANT_VS 0x7815 /* GEN6+ */ +#define _3DSTATE_CONSTANT_GS 0x7816 /* GEN6+ */ +#define _3DSTATE_CONSTANT_PS 0x7817 /* GEN6+ */ # define GEN6_CONSTANT_BUFFER_3_ENABLE (1 << 15) # define GEN6_CONSTANT_BUFFER_2_ENABLE (1 << 14) # define GEN6_CONSTANT_BUFFER_1_ENABLE (1 << 13) # define GEN6_CONSTANT_BUFFER_0_ENABLE (1 << 12) -#define CMD_3D_SAMPLE_MASK 0x7818 /* GEN6+ */ +#define _3DSTATE_SAMPLE_MASK 0x7818 /* GEN6+ */ -#define CMD_DRAW_RECT 0x7900 -#define CMD_BLEND_CONSTANT_COLOR 0x7901 -#define CMD_CHROMA_KEY 0x7904 -#define CMD_DEPTH_BUFFER 0x7905 -#define CMD_POLY_STIPPLE_OFFSET 0x7906 -#define CMD_POLY_STIPPLE_PATTERN 0x7907 -#define CMD_LINE_STIPPLE_PATTERN 0x7908 -#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 -#define CMD_AA_LINE_PARAMETERS 0x790a +#define _3DSTATE_DRAWING_RECTANGLE 0x7900 +#define _3DSTATE_BLEND_CONSTANT_COLOR 0x7901 +#define _3DSTATE_CHROMA_KEY 0x7904 +#define _3DSTATE_DEPTH_BUFFER 0x7905 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x7906 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x7907 +#define _3DSTATE_LINE_STIPPLE_PATTERN 0x7908 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x7909 +#define _3DSTATE_AA_LINE_PARAMETERS 0x790a /* G45+ */ -#define CMD_GS_SVB_INDEX 0x790b /* CTG+ */ +#define _3DSTATE_GS_SVB_INDEX 0x790b /* CTG+ */ /* DW1 */ # define SVB_INDEX_SHIFT 29 # define SVB_LOAD_INTERNAL_VERTEX_COUNT (1 << 0) /* SNB+ */ /* DW2: SVB index */ /* DW3: SVB maximum index */ -#define CMD_3D_MULTISAMPLE 0x790d /* SNB+ */ +#define _3DSTATE_MULTISAMPLE 0x790d /* GEN6+ */ /* DW1 */ # define MS_PIXEL_LOCATION_CENTER (0 << 4) # define MS_PIXEL_LOCATION_UPPER_LEFT (1 << 4) @@ -1130,7 +1122,10 @@ # define MS_NUMSAMPLES_4 (2 << 1) # define MS_NUMSAMPLES_8 (3 << 1) -#define CMD_3D_CLEAR_PARAMS 0x7910 /* ILK+ */ +#define _3DSTATE_STENCIL_BUFFER 0x790e /* ILK, SNB */ +#define _3DSTATE_HIER_DEPTH_BUFFER 0x790f /* ILK, SNB */ + +#define _3DSTATE_CLEAR_PARAMS 0x7910 /* ILK+ */ # define DEPTH_CLEAR_VALID (1 << 15) /* DW1: depth clear value */ diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 6b61f7af15..111cb9974e 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -973,7 +973,7 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen) inst->bits3.dp_render_cache.send_commit_msg, inst->bits3.dp_render_cache.msg_length, inst->bits3.dp_render_cache.response_length); - } else if (gen >= 5) { + } else if (gen >= 5 /* FINISHME: || is_g4x */) { format (file, " (%d, %d, %d)", inst->bits3.dp_read_gen5.binding_table_index, inst->bits3.dp_read_gen5.msg_control, diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index a1f403ca4e..f5abe021c4 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -145,9 +145,14 @@ static void brw_emit_prim(struct brw_context *brw, prim_packet.start_vert_location = prim->start; if (prim->indexed) prim_packet.start_vert_location += brw->ib.start_vertex_offset; + else + prim_packet.start_vert_location += brw->vb.start_vertex_bias; prim_packet.instance_count = 1; prim_packet.start_instance_location = 0; prim_packet.base_vert_location = prim->basevertex; + if (prim->indexed) + prim_packet.base_vert_location += brw->vb.start_vertex_bias; + /* If we're set to always flush, do it before and after the primitive emit. * We want to catch both missed flushes that hurt instruction/state cache @@ -155,14 +160,14 @@ static void brw_emit_prim(struct brw_context *brw, * the besides the draw code. */ if (intel->always_flush_cache) { - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel); } if (prim_packet.verts_per_instance) { - intel_batchbuffer_data( brw->intel.batch, &prim_packet, - sizeof(prim_packet)); + intel_batchbuffer_data(&brw->intel, &prim_packet, + sizeof(prim_packet), false); } if (intel->always_flush_cache) { - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel); } } @@ -172,13 +177,16 @@ static void brw_merge_inputs( struct brw_context *brw, struct brw_vertex_info old = brw->vb.info; GLuint i; - for (i = 0; i < VERT_ATTRIB_MAX; i++) - drm_intel_bo_unreference(brw->vb.inputs[i].bo); + for (i = 0; i < brw->vb.nr_buffers; i++) { + drm_intel_bo_unreference(brw->vb.buffers[i].bo); + brw->vb.buffers[i].bo = NULL; + } + brw->vb.nr_buffers = 0; - memset(&brw->vb.inputs, 0, sizeof(brw->vb.inputs)); memset(&brw->vb.info, 0, sizeof(brw->vb.info)); for (i = 0; i < VERT_ATTRIB_MAX; i++) { + brw->vb.inputs[i].buffer = -1; brw->vb.inputs[i].glarray = arrays[i]; brw->vb.inputs[i].attrib = (gl_vert_attrib) i; @@ -303,7 +311,6 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx, struct brw_context *brw = brw_context(ctx); GLboolean retval = GL_FALSE; GLboolean warn = GL_FALSE; - GLboolean first_time = GL_TRUE; GLuint i; if (ctx->NewState) @@ -351,13 +358,10 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx, * an upper bound of how much we might emit in a single * brw_try_draw_prims(). */ - intel_batchbuffer_require_space(intel->batch, intel->batch->size / 4); + intel_batchbuffer_require_space(intel, 1024, false); hw_prim = brw_set_prim(brw, &prim[i]); - - if (first_time || (brw->state.dirty.brw & BRW_NEW_PRIMITIVE)) { - first_time = GL_FALSE; - + if (brw->state.dirty.brw) { brw_validate_state(brw); /* Various fallback checks: */ @@ -370,7 +374,7 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx, if (dri_bufmgr_check_aperture_space(brw->state.validated_bos, brw->state.validated_bo_count)) { static GLboolean warned; - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); /* Validate the state after we flushed the batch (which would have * changed the set of dirty state). If we still fail to @@ -399,7 +403,7 @@ static GLboolean brw_try_draw_prims( struct gl_context *ctx, } if (intel->always_flush_batch) - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); out: brw_state_cache_check_size(brw); @@ -460,25 +464,32 @@ void brw_draw_init( struct brw_context *brw ) { struct gl_context *ctx = &brw->intel.ctx; struct vbo_context *vbo = vbo_context(ctx); + int i; /* Register our drawing function: */ vbo->draw_prims = brw_draw_prims; + + for (i = 0; i < VERT_ATTRIB_MAX; i++) + brw->vb.inputs[i].buffer = -1; + brw->vb.nr_buffers = 0; + brw->vb.nr_enabled = 0; } void brw_draw_destroy( struct brw_context *brw ) { int i; - if (brw->vb.upload.bo != NULL) { - drm_intel_bo_unreference(brw->vb.upload.bo); - brw->vb.upload.bo = NULL; + for (i = 0; i < brw->vb.nr_buffers; i++) { + drm_intel_bo_unreference(brw->vb.buffers[i].bo); + brw->vb.buffers[i].bo = NULL; } + brw->vb.nr_buffers = 0; - for (i = 0; i < VERT_ATTRIB_MAX; i++) { - drm_intel_bo_unreference(brw->vb.inputs[i].bo); - brw->vb.inputs[i].bo = NULL; + for (i = 0; i < brw->vb.nr_enabled; i++) { + brw->vb.enabled[i]->buffer = -1; } + brw->vb.nr_enabled = 0; drm_intel_bo_unreference(brw->ib.bo); brw->ib.bo = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c index 405e161bdb..78885b58a8 100644 --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#undef NDEBUG #include "main/glheader.h" #include "main/bufferobj.h" @@ -209,7 +210,7 @@ static GLuint get_surface_type( GLenum type, GLuint size, case GL_UNSIGNED_BYTE: return ubyte_types_scale[size]; case GL_FIXED: return float_types[size]; /* was uploaded as floats */ default: assert(0); return 0; - } + } } } @@ -227,11 +228,11 @@ static GLuint get_size( GLenum type ) case GL_UNSIGNED_SHORT: return sizeof(GLushort); case GL_UNSIGNED_BYTE: return sizeof(GLubyte); case GL_FIXED: return sizeof(GLfloat); /* will be uploaded as floats */ - default: return 0; - } + default: assert(0); return 0; + } } -static GLuint get_index_type(GLenum type) +static GLuint get_index_type(GLenum type) { switch (type) { case GL_UNSIGNED_BYTE: return BRW_INDEX_BYTE; @@ -241,60 +242,23 @@ static GLuint get_index_type(GLenum type) } } -static void wrap_buffers( struct brw_context *brw, - GLuint size ) -{ - if (size < BRW_UPLOAD_INIT_SIZE) - size = BRW_UPLOAD_INIT_SIZE; - - brw->vb.upload.offset = 0; - - if (brw->vb.upload.bo != NULL) - drm_intel_bo_unreference(brw->vb.upload.bo); - brw->vb.upload.bo = drm_intel_bo_alloc(brw->intel.bufmgr, "temporary VBO", - size, 1); -} - -static void get_space( struct brw_context *brw, - GLuint size, - drm_intel_bo **bo_return, - GLuint *offset_return ) -{ - size = ALIGN(size, 64); - - if (brw->vb.upload.bo == NULL || - brw->vb.upload.offset + size > brw->vb.upload.bo->size) { - wrap_buffers(brw, size); - } - - assert(*bo_return == NULL); - drm_intel_bo_reference(brw->vb.upload.bo); - *bo_return = brw->vb.upload.bo; - *offset_return = brw->vb.upload.offset; - brw->vb.upload.offset += size; -} - static void -copy_array_to_vbo_array( struct brw_context *brw, - struct brw_vertex_element *element, - GLuint dst_stride) +copy_array_to_vbo_array(struct brw_context *brw, + struct brw_vertex_element *element, + int min, int max, + struct brw_vertex_buffer *buffer, + GLuint dst_stride) { - GLuint size = element->count * dst_stride; - - get_space(brw, size, &element->bo, &element->offset); - - if (element->glarray->StrideB == 0) { - assert(element->count == 1); - element->stride = 0; - } else { - element->stride = dst_stride; - } + int src_stride = element->glarray->StrideB; + const unsigned char *src = element->glarray->Ptr + min * src_stride; + int count = max - min + 1; + GLuint size = count * dst_stride; /* upload as floats */ if (element->glarray->Type == GL_FIXED) { + char * const map = intel_upload_map(&brw->intel, size, dst_stride); + char *dst = map; drm_intel_bo *src_bo = NULL; - const char *src; - char *dst; GLint i, j; /* map source bo */ @@ -305,68 +269,58 @@ copy_array_to_vbo_array( struct brw_context *brw, src_bo = intel_bufferobj_buffer(&brw->intel, intel_buffer, INTEL_READ); drm_intel_gem_bo_map_gtt(src_bo); - src = (const char *) element->bo->virtual + - (unsigned long) element->glarray->Ptr; - } - else { - src = (const char *) element->glarray->Ptr; + src = (const unsigned char *) src_bo->virtual + (unsigned long) src; } - drm_intel_gem_bo_map_gtt(element->bo); - dst = (char *) element->bo->virtual + element->offset; - - for (i = 0; i < element->count; i++) { + while (count--) { const GLint *s = (GLint *) src; GLfloat *d = (GLfloat *) dst; + int i; - for (j = 0; j < element->glarray->Size; j++) - d[j] = s[j] / 65536.0f; + for (i = 0; i < element->glarray->Size; i++) + d[i] = s[i] / 65536.0f; - src += element->glarray->StrideB; - dst += dst_stride; + src += src_stride; + dst += dst_stride; } - drm_intel_gem_bo_unmap_gtt(element->bo); + intel_upload_unmap(&brw->intel, map, size, dst_stride, + &buffer->bo, &buffer->offset); + if (src_bo) drm_intel_gem_bo_unmap_gtt(src_bo); return; } - if (dst_stride == element->glarray->StrideB) { - drm_intel_gem_bo_map_gtt(element->bo); - memcpy((char *)element->bo->virtual + element->offset, - element->glarray->Ptr, size); - drm_intel_gem_bo_unmap_gtt(element->bo); + if (dst_stride == src_stride) { + intel_upload_data(&brw->intel, src, size, dst_stride, + &buffer->bo, &buffer->offset); } else { - char *dest; - const unsigned char *src = element->glarray->Ptr; - int i; - - drm_intel_gem_bo_map_gtt(element->bo); - dest = element->bo->virtual; - dest += element->offset; - - for (i = 0; i < element->count; i++) { - memcpy(dest, src, dst_stride); - src += element->glarray->StrideB; - dest += dst_stride; - } + char * const map = intel_upload_map(&brw->intel, size, dst_stride); + char *dst = map; - drm_intel_gem_bo_unmap_gtt(element->bo); + while (count--) { + memcpy(dst, src, dst_stride); + src += src_stride; + dst += dst_stride; + } + intel_upload_unmap(&brw->intel, map, size, dst_stride, + &buffer->bo, &buffer->offset); } + buffer->stride = dst_stride; } static void brw_prepare_vertices(struct brw_context *brw) { struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = intel_context(ctx); - GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; - GLuint i; + GLbitfield vs_inputs = brw->vs.prog_data->inputs_read; const unsigned char *ptr = NULL; - GLuint interleave = 0; + GLuint interleaved = 0, total_size = 0; unsigned int min_index = brw->vb.min_index; unsigned int max_index = brw->vb.max_index; + int delta, i, j; struct brw_vertex_element *upload[VERT_ATTRIB_MAX]; GLuint nr_uploads = 0; @@ -379,13 +333,20 @@ static void brw_prepare_vertices(struct brw_context *brw) /* Accumulate the list of enabled arrays. */ brw->vb.nr_enabled = 0; while (vs_inputs) { - GLuint i = _mesa_ffsll(vs_inputs) - 1; + GLuint i = ffs(vs_inputs) - 1; struct brw_vertex_element *input = &brw->vb.inputs[i]; vs_inputs &= ~(1 << i); - brw->vb.enabled[brw->vb.nr_enabled++] = input; + if (input->glarray->Size && get_size(input->glarray->Type)) + brw->vb.enabled[brw->vb.nr_enabled++] = input; } + if (brw->vb.nr_enabled == 0) + return; + + if (brw->vb.nr_buffers) + goto validate; + /* XXX: In the rare cases where this happens we fallback all * the way to software rasterization, although a tnl fallback * would be sufficient. I don't know of *any* real world @@ -397,24 +358,44 @@ static void brw_prepare_vertices(struct brw_context *brw) return; } - for (i = 0; i < brw->vb.nr_enabled; i++) { + for (i = j = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; + const struct gl_client_array *glarray = input->glarray; + int type_size = get_size(glarray->Type); - input->element_size = get_size(input->glarray->Type) * input->glarray->Size; + input->element_size = type_size * glarray->Size; - if (_mesa_is_bufferobj(input->glarray->BufferObj) && - input->glarray->Type != GL_FIXED) { + if (_mesa_is_bufferobj(glarray->BufferObj) && + glarray->Type != GL_FIXED) { struct intel_buffer_object *intel_buffer = - intel_buffer_object(input->glarray->BufferObj); - - /* Named buffer object: Just reference its contents directly. */ - drm_intel_bo_unreference(input->bo); - input->bo = intel_bufferobj_buffer(intel, intel_buffer, - INTEL_READ); - drm_intel_bo_reference(input->bo); - input->offset = (unsigned long)input->glarray->Ptr; - input->stride = input->glarray->StrideB; - input->count = input->glarray->_MaxElement; + intel_buffer_object(glarray->BufferObj); + int k; + + for (k = 0; k < i; k++) { + const struct gl_client_array *other = brw->vb.enabled[k]->glarray; + if (glarray->BufferObj == other->BufferObj && + glarray->StrideB == other->StrideB && + (uintptr_t)(glarray->Ptr - other->Ptr) < glarray->StrideB) + { + input->buffer = brw->vb.enabled[k]->buffer; + input->offset = glarray->Ptr - other->Ptr; + break; + } + } + if (k == i) { + struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; + + /* Named buffer object: Just reference its contents directly. */ + buffer->bo = intel_bufferobj_source(intel, + intel_buffer, type_size, + &buffer->offset); + drm_intel_bo_reference(buffer->bo); + buffer->offset += (uintptr_t)glarray->Ptr; + buffer->stride = glarray->StrideB; + + input->buffer = j++; + input->offset = 0; + } /* This is a common place to reach if the user mistakenly supplies * a pointer in place of a VBO offset. If we just let it go through, @@ -428,71 +409,170 @@ static void brw_prepare_vertices(struct brw_context *brw) * probably a service to the poor programmer to do so rather than * trying to just not render. */ - assert(input->offset < input->bo->size); + assert(input->offset < brw->vb.buffers[input->buffer].bo->size); } else { - input->count = input->glarray->StrideB ? max_index + 1 : 1; - if (input->bo != NULL) { - /* Already-uploaded vertex data is present from a previous - * prepare_vertices, but we had to re-validate state due to - * check_aperture failing and a new batch being produced. - */ - continue; - } - /* Queue the buffer object up to be uploaded in the next pass, * when we've decided if we're doing interleaved or not. */ - if (input->attrib == VERT_ATTRIB_POS) { + if (nr_uploads == 0) { /* Position array not properly enabled: */ - if (input->glarray->StrideB == 0) { + if (input->attrib == VERT_ATTRIB_POS && glarray->StrideB == 0) { intel->Fallback = GL_TRUE; /* boolean, not bitfield */ return; } - interleave = input->glarray->StrideB; - ptr = input->glarray->Ptr; + interleaved = glarray->StrideB; + ptr = glarray->Ptr; + } + else if (interleaved != glarray->StrideB || + (uintptr_t)(glarray->Ptr - ptr) > interleaved) + { + interleaved = 0; } - else if (interleave != input->glarray->StrideB || - (const unsigned char *)input->glarray->Ptr - ptr < 0 || - (const unsigned char *)input->glarray->Ptr - ptr > interleave) + else if ((uintptr_t)(glarray->Ptr - ptr) & (type_size -1)) { - interleave = 0; + /* enforce natural alignment (for doubles) */ + interleaved = 0; } upload[nr_uploads++] = input; + total_size = ALIGN(total_size, type_size); + total_size += input->element_size; } } + /* If we need to upload all the arrays, then we can trim those arrays to + * only the used elements [min_index, max_index] so long as we adjust all + * the values used in the 3DPRIMITIVE i.e. by setting the vertex bias. + */ + brw->vb.start_vertex_bias = 0; + delta = min_index; + if (nr_uploads == brw->vb.nr_enabled) { + brw->vb.start_vertex_bias = -delta; + delta = 0; + } + if (delta && !brw->intel.intelScreen->relaxed_relocations) + min_index = delta = 0; + /* Handle any arrays to be uploaded. */ - if (nr_uploads > 1 && interleave && interleave <= 256) { - /* All uploads are interleaved, so upload the arrays together as - * interleaved. First, upload the contents and set up upload[0]. - */ - copy_array_to_vbo_array(brw, upload[0], interleave); - - for (i = 1; i < nr_uploads; i++) { - /* Then, just point upload[i] at upload[0]'s buffer. */ - upload[i]->stride = interleave; - upload[i]->offset = upload[0]->offset + - ((const unsigned char *)upload[i]->glarray->Ptr - ptr); - upload[i]->bo = upload[0]->bo; - drm_intel_bo_reference(upload[i]->bo); + if (nr_uploads > 1) { + if (interleaved && interleaved <= 2*total_size) { + struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; + /* All uploads are interleaved, so upload the arrays together as + * interleaved. First, upload the contents and set up upload[0]. + */ + copy_array_to_vbo_array(brw, upload[0], min_index, max_index, + buffer, interleaved); + buffer->offset -= delta * interleaved; + + for (i = 0; i < nr_uploads; i++) { + /* Then, just point upload[i] at upload[0]'s buffer. */ + upload[i]->offset = + ((const unsigned char *)upload[i]->glarray->Ptr - ptr); + upload[i]->buffer = j; + } + j++; + + nr_uploads = 0; } - } - else { - /* Upload non-interleaved arrays */ - for (i = 0; i < nr_uploads; i++) { - copy_array_to_vbo_array(brw, upload[i], upload[i]->element_size); + else if (total_size < 2048) { + /* Upload non-interleaved arrays into a single interleaved array */ + struct brw_vertex_buffer *buffer; + int count = max_index - min_index + 1; + int offset; + char *map; + + map = intel_upload_map(&brw->intel, total_size * count, total_size); + for (i = offset = 0; i < nr_uploads; i++) { + const unsigned char *src = upload[i]->glarray->Ptr; + int size = upload[i]->element_size; + int stride = upload[i]->glarray->StrideB; + char *dst; + int n; + + offset = ALIGN(offset, get_size(upload[i]->glarray->Type)); + dst = map + offset; + src += min_index * stride; + + if (upload[i]->glarray->Type == GL_FIXED) { + for (n = 0; n < count; n++) { + const GLint *s = (GLint *) src; + GLfloat *d = (GLfloat *) dst; + int k; + + for (k = 0; k < upload[i]->glarray->Size; k++) { + d[k] = s[k] / 65536.0f; + } + + src += stride; + dst += total_size; + } + } + else { + for (n = 0; n < count; n++) { + memcpy(dst, src, size); + src += stride; + dst += total_size; + } + } + + upload[i]->offset = offset; + upload[i]->buffer = j; + + offset += size; + } + assert(offset == total_size); + buffer = &brw->vb.buffers[j++]; + intel_upload_unmap(&brw->intel, map, offset * count, offset, + &buffer->bo, &buffer->offset); + buffer->stride = offset; + buffer->offset -= delta * offset; + + nr_uploads = 0; } } + /* Upload non-interleaved arrays */ + for (i = 0; i < nr_uploads; i++) { + struct brw_vertex_buffer *buffer = &brw->vb.buffers[j]; + copy_array_to_vbo_array(brw, upload[i], min_index, max_index, + buffer, upload[i]->element_size); + buffer->offset -= delta * buffer->stride; + upload[i]->buffer = j++; + upload[i]->offset = 0; + } - brw_prepare_query_begin(brw); + /* can we simply extend the current vb? */ + if (j == brw->vb.nr_current_buffers) { + int delta = 0; + for (i = 0; i < j; i++) { + int d; + + if (brw->vb.current_buffers[i].handle != brw->vb.buffers[i].bo->handle || + brw->vb.current_buffers[i].stride != brw->vb.buffers[i].stride) + break; + + d = brw->vb.buffers[i].offset - brw->vb.current_buffers[i].offset; + if (i == 0) + delta = d / brw->vb.current_buffers[i].stride; + if (delta * brw->vb.current_buffers[i].stride != d) + break; + } - for (i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; + if (i == j) { + brw->vb.start_vertex_bias += delta; + while (--j >= 0) + drm_intel_bo_unreference(brw->vb.buffers[j].bo); + j = 0; + } + } - brw_add_validated_bo(brw, input->bo); + brw->vb.nr_buffers = j; + +validate: + brw_prepare_query_begin(brw); + for (i = 0; i < brw->vb.nr_buffers; i++) { + brw_add_validated_bo(brw, brw->vb.buffers[i].bo); } } @@ -529,49 +609,44 @@ static void brw_emit_vertices(struct brw_context *brw) (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) | (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) | (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT)); - ADVANCE_BATCH(); + CACHED_BATCH(); return; } /* Now emit VB and VEP state packets. - * - * This still defines a hardware VB for each input, even if they - * are interleaved or from the same VBO. TBD if this makes a - * performance difference. */ - BEGIN_BATCH(1 + brw->vb.nr_enabled * 4); - OUT_BATCH((CMD_VERTEX_BUFFER << 16) | - ((1 + brw->vb.nr_enabled * 4) - 2)); - for (i = 0; i < brw->vb.nr_enabled; i++) { - struct brw_vertex_element *input = brw->vb.enabled[i]; - uint32_t dw0; + if (brw->vb.nr_buffers) { + BEGIN_BATCH(1 + 4*brw->vb.nr_buffers); + OUT_BATCH((CMD_VERTEX_BUFFER << 16) | (4*brw->vb.nr_buffers - 1)); + for (i = 0; i < brw->vb.nr_buffers; i++) { + struct brw_vertex_buffer *buffer = &brw->vb.buffers[i]; + uint32_t dw0; + + if (intel->gen >= 6) { + dw0 = GEN6_VB0_ACCESS_VERTEXDATA | (i << GEN6_VB0_INDEX_SHIFT); + } else { + dw0 = BRW_VB0_ACCESS_VERTEXDATA | (i << BRW_VB0_INDEX_SHIFT); + } - if (intel->gen >= 6) { - dw0 = GEN6_VB0_ACCESS_VERTEXDATA | - (i << GEN6_VB0_INDEX_SHIFT); - } else { - dw0 = BRW_VB0_ACCESS_VERTEXDATA | - (i << BRW_VB0_INDEX_SHIFT); + OUT_BATCH(dw0 | (buffer->stride << BRW_VB0_PITCH_SHIFT)); + OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset); + if (intel->gen >= 5) { + OUT_RELOC(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->bo->size - 1); + } else + OUT_BATCH(buffer->bo->size / buffer->stride); + OUT_BATCH(0); /* Instance data step rate */ + + brw->vb.current_buffers[i].handle = buffer->bo->handle; + brw->vb.current_buffers[i].offset = buffer->offset; + brw->vb.current_buffers[i].stride = buffer->stride; } - - OUT_BATCH(dw0 | - (input->stride << BRW_VB0_PITCH_SHIFT)); - OUT_RELOC(input->bo, - I915_GEM_DOMAIN_VERTEX, 0, - input->offset); - if (intel->gen >= 5) { - OUT_RELOC(input->bo, - I915_GEM_DOMAIN_VERTEX, 0, - input->bo->size - 1); - } else - OUT_BATCH(input->stride ? input->count : 0); - OUT_BATCH(0); /* Instance data step rate */ + brw->vb.nr_current_buffers = i; + ADVANCE_BATCH(); } - ADVANCE_BATCH(); BEGIN_BATCH(1 + brw->vb.nr_enabled * 2); - OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | ((1 + brw->vb.nr_enabled * 2) - 2)); + OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | (2*brw->vb.nr_enabled - 1)); for (i = 0; i < brw->vb.nr_enabled; i++) { struct brw_vertex_element *input = brw->vb.enabled[i]; uint32_t format = get_surface_type(input->glarray->Type, @@ -592,15 +667,15 @@ static void brw_emit_vertices(struct brw_context *brw) } if (intel->gen >= 6) { - OUT_BATCH((i << GEN6_VE0_INDEX_SHIFT) | + OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) | GEN6_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } else { - OUT_BATCH((i << BRW_VE0_INDEX_SHIFT) | + OUT_BATCH((input->buffer << BRW_VE0_INDEX_SHIFT) | BRW_VE0_VALID | (format << BRW_VE0_FORMAT_SHIFT) | - (0 << BRW_VE0_SRC_OFFSET_SHIFT)); + (input->offset << BRW_VE0_SRC_OFFSET_SHIFT)); } if (intel->gen >= 5) @@ -615,7 +690,7 @@ static void brw_emit_vertices(struct brw_context *brw) (comp3 << BRW_VE1_COMPONENT_3_SHIFT) | ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT)); } - ADVANCE_BATCH(); + CACHED_BATCH(); } const struct brw_tracked_state brw_vertices = { @@ -644,25 +719,19 @@ static void brw_prepare_indices(struct brw_context *brw) ib_type_size = get_size(index_buffer->type); ib_size = ib_type_size * index_buffer->count; - bufferobj = index_buffer->obj;; + bufferobj = index_buffer->obj; /* Turn into a proper VBO: */ if (!_mesa_is_bufferobj(bufferobj)) { - brw->ib.start_vertex_offset = 0; /* Get new bufferobj, offset: */ - get_space(brw, ib_size, &bo, &offset); - - /* Straight upload - */ - drm_intel_gem_bo_map_gtt(bo); - memcpy((char *)bo->virtual + offset, index_buffer->ptr, ib_size); - drm_intel_gem_bo_unmap_gtt(bo); + intel_upload_data(&brw->intel, index_buffer->ptr, ib_size, ib_type_size, + &bo, &offset); + brw->ib.start_vertex_offset = offset / ib_type_size; } else { offset = (GLuint) (unsigned long) index_buffer->ptr; - brw->ib.start_vertex_offset = 0; /* If the index buffer isn't aligned to its element size, we have to * rebase it into a temporary. @@ -674,41 +743,42 @@ static void brw_prepare_indices(struct brw_context *brw) bufferobj); map += offset; - get_space(brw, ib_size, &bo, &offset); - - drm_intel_bo_subdata(bo, offset, ib_size, map); + intel_upload_data(&brw->intel, map, ib_size, ib_type_size, + &bo, &offset); + brw->ib.start_vertex_offset = offset / ib_type_size; ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER_ARB, bufferobj); } else { - bo = intel_bufferobj_buffer(intel, intel_buffer_object(bufferobj), - INTEL_READ); - drm_intel_bo_reference(bo); - /* Use CMD_3D_PRIM's start_vertex_offset to avoid re-uploading * the index buffer state when we're just moving the start index * of our drawing. */ brw->ib.start_vertex_offset = offset / ib_type_size; - offset = 0; - ib_size = bo->size; + + bo = intel_bufferobj_source(intel, + intel_buffer_object(bufferobj), + ib_type_size, + &offset); + drm_intel_bo_reference(bo); + + brw->ib.start_vertex_offset += offset / ib_type_size; } } - if (brw->ib.bo != bo || - brw->ib.offset != offset || - brw->ib.size != ib_size) - { + if (brw->ib.bo != bo) { drm_intel_bo_unreference(brw->ib.bo); brw->ib.bo = bo; - brw->ib.offset = offset; - brw->ib.size = ib_size; + brw_add_validated_bo(brw, brw->ib.bo); brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; } else { drm_intel_bo_unreference(bo); } - brw_add_validated_bo(brw, brw->ib.bo); + if (index_buffer->type != brw->ib.type) { + brw->ib.type = index_buffer->type; + brw->state.dirty.brw |= BRW_NEW_INDEX_BUFFER; + } } const struct brw_tracked_state brw_indices = { @@ -728,29 +798,18 @@ static void brw_emit_index_buffer(struct brw_context *brw) if (index_buffer == NULL) return; - /* Emit the indexbuffer packet: - */ - { - struct brw_indexbuffer ib; - - memset(&ib, 0, sizeof(ib)); - - ib.header.bits.opcode = CMD_INDEX_BUFFER; - ib.header.bits.length = sizeof(ib)/4 - 2; - ib.header.bits.index_format = get_index_type(index_buffer->type); - ib.header.bits.cut_index_enable = 0; - - BEGIN_BATCH(4); - OUT_BATCH( ib.header.dword ); - OUT_RELOC(brw->ib.bo, - I915_GEM_DOMAIN_VERTEX, 0, - brw->ib.offset); - OUT_RELOC(brw->ib.bo, - I915_GEM_DOMAIN_VERTEX, 0, - brw->ib.offset + brw->ib.size - 1); - OUT_BATCH( 0 ); - ADVANCE_BATCH(); - } + BEGIN_BATCH(3); + OUT_BATCH(CMD_INDEX_BUFFER << 16 | + /* cut index enable << 10 */ + get_index_type(index_buffer->type) << 8 | + 1); + OUT_RELOC(brw->ib.bo, + I915_GEM_DOMAIN_VERTEX, 0, + 0); + OUT_RELOC(brw->ib.bo, + I915_GEM_DOMAIN_VERTEX, 0, + brw->ib.bo->size - 1); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_index_buffer = { diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 4dbdc52210..119ffc7237 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -861,7 +861,8 @@ void brw_fb_WRITE(struct brw_compile *p, GLuint binding_table_index, GLuint msg_length, GLuint response_length, - GLboolean eot); + GLboolean eot, + GLboolean header_present); void brw_SAMPLE(struct brw_compile *p, struct brw_reg dest, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index f62fc7ebfb..88131c432e 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -536,6 +536,16 @@ brw_set_dp_read_message(struct brw_context *brw, insn->bits3.dp_read_gen5.end_of_thread = 0; insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; insn->bits2.send_gen5.end_of_thread = 0; + } else if (intel->is_g4x) { + insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ + insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ + insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ + insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/ + insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/ + insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ + insn->bits3.dp_read_g4x.pad1 = 0; + insn->bits3.dp_read_g4x.end_of_thread = 0; } else { insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ @@ -1708,29 +1718,22 @@ void brw_dp_READ_4_vs(struct brw_compile *p, GLuint location, GLuint bind_table_index) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_reg_nr = 1; - struct brw_reg b; - /* - printf("vs const read msg, location %u, msg_reg_nr %d\n", - location, msg_reg_nr); - */ + if (intel->gen >= 6) + location /= 16; /* Setup MRF[1] with location/offset into const buffer */ brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - /* XXX I think we're setting all the dwords of MRF[1] to 'location'. - * when the docs say only dword[2] should be set. Hmmm. But it works. - */ - b = brw_message_reg(msg_reg_nr); - b = retype(b, BRW_REGISTER_TYPE_UD); - /*b = get_element_ud(b, 2);*/ - brw_MOV(p, b, brw_imm_ud(location)); - + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), + BRW_REGISTER_TYPE_UD), + brw_imm_ud(location)); brw_pop_insn_state(p); insn = next_insn(p, BRW_OPCODE_SEND); @@ -1741,7 +1744,11 @@ void brw_dp_READ_4_vs(struct brw_compile *p, insn->header.mask_control = BRW_MASK_DISABLE; brw_set_dest(p, insn, dest); - brw_set_src0(insn, brw_null_reg()); + if (intel->gen >= 6) { + brw_set_src0(insn, brw_message_reg(msg_reg_nr)); + } else { + brw_set_src0(insn, brw_null_reg()); + } brw_set_dp_read_message(p->brw, insn, @@ -1768,6 +1775,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, /* Setup MRF[1] with offset into const buffer */ brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_predicate_control(p, BRW_PREDICATE_NONE); @@ -1775,7 +1783,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, /* M1.0 is block offset 0, M1.4 is block offset 1, all other * fields ignored. */ - brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), + brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), addr_reg, brw_imm_d(offset)); brw_pop_insn_state(p); @@ -1816,12 +1824,12 @@ void brw_fb_WRITE(struct brw_compile *p, GLuint binding_table_index, GLuint msg_length, GLuint response_length, - GLboolean eot) + GLboolean eot, + GLboolean header_present) { struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_control, msg_type; - GLboolean header_present = GL_TRUE; if (intel->gen >= 6 && binding_table_index == 0) { insn = next_insn(p, BRW_OPCODE_SENDC); @@ -1833,9 +1841,6 @@ void brw_fb_WRITE(struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_NONE; if (intel->gen >= 6) { - if (msg_length == 4) - header_present = GL_FALSE; - /* headerless version, just submit color payload */ src0 = brw_message_reg(msg_reg_nr); @@ -1940,7 +1945,8 @@ void brw_SAMPLE(struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, m1, brw_vec8_grf(0,0)); + brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), + retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); brw_pop_insn_state(p); @@ -2001,7 +2007,8 @@ void brw_SAMPLE(struct brw_compile *p, */ brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, reg, reg); + brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD), + retype(reg, BRW_REGISTER_TYPE_UD)); brw_pop_insn_state(p); } @@ -2033,7 +2040,8 @@ void brw_urb_WRITE(struct brw_compile *p, if (intel->gen >= 6) { brw_push_insn_state(p); brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_MOV(p, brw_message_reg(msg_reg_nr), src0); + brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), + retype(src0, BRW_REGISTER_TYPE_UD)); brw_pop_insn_state(p); src0 = brw_message_reg(msg_reg_nr); } diff --git a/src/mesa/drivers/dri/i965/brw_fallback.c b/src/mesa/drivers/dri/i965/brw_fallback.c index 6796fb208d..d0b0c22abf 100644 --- a/src/mesa/drivers/dri/i965/brw_fallback.c +++ b/src/mesa/drivers/dri/i965/brw_fallback.c @@ -36,8 +36,6 @@ #include "swrast/swrast.h" #include "tnl/tnl.h" #include "brw_context.h" -#include "intel_fbo.h" -#include "intel_regions.h" #define FILE_DEBUG_FLAG DEBUG_FALLBACKS @@ -63,49 +61,14 @@ static GLboolean do_check_fallback(struct brw_context *brw) for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; if (texUnit->_ReallyEnabled) { - struct intel_texture_object *intelObj = intel_texture_object(texUnit->_Current); - struct gl_texture_image *texImage = intelObj->base.Image[0][intelObj->firstLevel]; + struct gl_texture_object *tex_obj = texUnit->_Current; + struct gl_texture_image *texImage = tex_obj->Image[0][tex_obj->BaseLevel]; if (texImage->Border) { DBG("FALLBACK: texture border\n"); return GL_TRUE; } } } - - /* _NEW_STENCIL - */ - if (ctx->Stencil._Enabled && - (ctx->DrawBuffer->Name == 0 && !brw->intel.hw_stencil)) { - DBG("FALLBACK: stencil\n"); - return GL_TRUE; - } - - /* _NEW_BUFFERS */ - if (!brw->has_surface_tile_offset) { - for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; - struct intel_renderbuffer *irb = intel_renderbuffer(rb); - - /* The original gen4 hardware couldn't set up WM surfaces pointing - * at an offset within a tile, which can happen when rendering to - * anything but the base level of a texture or the +X face/0 depth. - * This was fixed with the 4 Series hardware. - * - * For these original chips, you would have to make the depth and - * color destination surfaces include information on the texture - * type, LOD, face, and various limits to use them as a destination. - * I would have done this, but there's also a nasty requirement that - * the depth and the color surfaces all be of the same LOD, which - * may be a worse requirement than this alignment. (Also, we may - * want to just demote the texture to untiled, instead). - */ - if (irb->region && irb->region->tiling != I915_TILING_NONE && - (irb->region->draw_offset & 4095)) { - DBG("FALLBACK: non-tile-aligned destination for tiled FBO\n"); - return GL_TRUE; - } - } - } return GL_FALSE; } @@ -117,7 +80,7 @@ static void check_fallback(struct brw_context *brw) const struct brw_tracked_state brw_check_fallback = { .dirty = { - .mesa = _NEW_BUFFERS | _NEW_RENDERMODE | _NEW_TEXTURE | _NEW_STENCIL, + .mesa = _NEW_RENDERMODE | _NEW_TEXTURE | _NEW_STENCIL, .brw = 0, .cache = 0 }, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 6bb195b487..2c997b4eb3 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -41,13 +41,13 @@ extern "C" { #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" -#include "talloc.h" } #include "brw_fs.h" #include "../glsl/glsl_types.h" #include "../glsl/ir_optimization.h" #include "../glsl/ir_print_visitor.h" +#define MAX_INSTRUCTION (1 << 30) static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg); struct gl_shader * @@ -55,7 +55,7 @@ brw_new_shader(struct gl_context *ctx, GLuint name, GLuint type) { struct brw_shader *shader; - shader = talloc_zero(NULL, struct brw_shader); + shader = rzalloc(NULL, struct brw_shader); if (shader) { shader->base.Type = type; shader->base.Name = name; @@ -69,7 +69,7 @@ struct gl_shader_program * brw_new_shader_program(struct gl_context *ctx, GLuint name) { struct brw_shader_program *prog; - prog = talloc_zero(NULL, struct brw_shader_program); + prog = rzalloc(NULL, struct brw_shader_program); if (prog) { prog->base.Name = name; _mesa_init_shader_program(ctx, &prog->base); @@ -89,14 +89,17 @@ brw_compile_shader(struct gl_context *ctx, struct gl_shader *shader) GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) { + struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = &brw->intel; + struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; if (shader != NULL) { - void *mem_ctx = talloc_new(NULL); + void *mem_ctx = ralloc_context(NULL); bool progress; if (shader->ir) - talloc_free(shader->ir); + ralloc_free(shader->ir); shader->ir = new(shader) exec_list; clone_ir_list(mem_ctx, shader->ir, shader->base.ir); @@ -107,8 +110,24 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) SUB_TO_ADD_NEG | EXP_TO_EXP2 | LOG_TO_LOG2); + + /* Pre-gen6 HW can only nest if-statements 16 deep. Beyond this, + * if-statements need to be flattened. + */ + if (intel->gen < 6) + lower_if_to_cond_assign(shader->ir, 16); + do_lower_texture_projection(shader->ir); + do_vec_index_to_cond_assign(shader->ir); brw_do_cubemap_normalize(shader->ir); + lower_noise(shader->ir); + lower_quadop_vector(shader->ir, false); + lower_variable_index_to_cond_assign(shader->ir, + GL_TRUE, /* input */ + GL_TRUE, /* output */ + GL_TRUE, /* temp */ + GL_TRUE /* uniform */ + ); do { progress = false; @@ -123,22 +142,12 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) ) || progress; progress = do_common_optimization(shader->ir, true, 32) || progress; - - progress = lower_noise(shader->ir) || progress; - progress = - lower_variable_index_to_cond_assign(shader->ir, - GL_TRUE, /* input */ - GL_TRUE, /* output */ - GL_TRUE, /* temp */ - GL_TRUE /* uniform */ - ) || progress; - progress = lower_quadop_vector(shader->ir, false) || progress; } while (progress); validate_ir_tree(shader->ir); reparent_ir(shader->ir, shader->ir); - talloc_free(mem_ctx); + ralloc_free(mem_ctx); } if (!_mesa_ir_link_shader(ctx, prog)) @@ -202,6 +211,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) return 2; case FS_OPCODE_TEX: case FS_OPCODE_TXB: + case FS_OPCODE_TXD: case FS_OPCODE_TXL: return 1; case FS_OPCODE_FB_WRITE: @@ -225,8 +235,8 @@ fs_visitor::virtual_grf_alloc(int size) virtual_grf_array_size = 16; else virtual_grf_array_size *= 2; - virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes, - int, virtual_grf_array_size); + virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, + virtual_grf_array_size); /* This slot is always unused. */ virtual_grf_sizes[0] = 0; @@ -304,7 +314,6 @@ int fs_visitor::setup_uniform_values(int loc, const glsl_type *type) { unsigned int offset = 0; - float *vec_values; if (type->is_matrix()) { const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT, @@ -323,7 +332,6 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type) case GLSL_TYPE_UINT: case GLSL_TYPE_INT: case GLSL_TYPE_BOOL: - vec_values = fp->Base.Parameters->ParameterValues[loc]; for (unsigned int i = 0; i < type->vector_elements; i++) { unsigned int param = c->prog_data.nr_params++; @@ -347,8 +355,8 @@ fs_visitor::setup_uniform_values(int loc, const glsl_type *type) c->prog_data.param_convert[param] = PARAM_NO_CONVERT; break; } - - c->prog_data.param[param] = &vec_values[i]; + this->param_index[param] = loc; + this->param_offset[param] = i; } return 1; @@ -419,7 +427,6 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir) */ int index = _mesa_add_state_reference(this->fp->Base.Parameters, (gl_state_index *)tokens); - float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; /* Add each of the unique swizzles of the element as a * parameter. This'll end up matching the expected layout of @@ -434,7 +441,9 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir) c->prog_data.param_convert[c->prog_data.nr_params] = PARAM_NO_CONVERT; - c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz]; + this->param_index[c->prog_data.nr_params] = index; + this->param_offset[c->prog_data.nr_params] = swiz; + c->prog_data.nr_params++; } } } @@ -474,8 +483,13 @@ fs_visitor::emit_fragcoord_interpolation(ir_variable *ir) wpos.reg_offset++; /* gl_FragCoord.z */ - emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, - interp_reg(FRAG_ATTRIB_WPOS, 2))); + if (intel->gen >= 6) { + emit(fs_inst(BRW_OPCODE_MOV, wpos, + fs_reg(brw_vec8_grf(c->source_depth_reg, 0)))); + } else { + emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y, + interp_reg(FRAG_ATTRIB_WPOS, 2))); + } wpos.reg_offset++; /* gl_FragCoord.w: Already set up in emit_interpolation */ @@ -518,25 +532,40 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) continue; } - for (unsigned int c = 0; c < type->vector_elements; c++) { - struct brw_reg interp = interp_reg(location, c); - emit(fs_inst(FS_OPCODE_LINTERP, - attr, - this->delta_x, - this->delta_y, - fs_reg(interp))); - attr.reg_offset++; - } - - if (intel->gen < 6) { - attr.reg_offset -= type->vector_elements; + if (c->key.flat_shade && (location == FRAG_ATTRIB_COL0 || + location == FRAG_ATTRIB_COL1)) { + /* Constant interpolation (flat shading) case. The SF has + * handed us defined values in only the constant offset + * field of the setup reg. + */ for (unsigned int c = 0; c < type->vector_elements; c++) { - emit(fs_inst(BRW_OPCODE_MUL, - attr, + struct brw_reg interp = interp_reg(location, c); + interp = suboffset(interp, 3); + emit(fs_inst(FS_OPCODE_CINTERP, attr, fs_reg(interp))); + attr.reg_offset++; + } + } else { + /* Perspective interpolation case. */ + for (unsigned int c = 0; c < type->vector_elements; c++) { + struct brw_reg interp = interp_reg(location, c); + emit(fs_inst(FS_OPCODE_LINTERP, attr, - this->pixel_w)); + this->delta_x, + this->delta_y, + fs_reg(interp))); attr.reg_offset++; } + + if (intel->gen < 6) { + attr.reg_offset -= type->vector_elements; + for (unsigned int c = 0; c < type->vector_elements; c++) { + emit(fs_inst(BRW_OPCODE_MUL, + attr, + attr, + this->pixel_w)); + attr.reg_offset++; + } + } } location++; } @@ -631,14 +660,18 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) assert(opcode == FS_OPCODE_POW); if (intel->gen >= 6) { - /* Can't do hstride == 0 args to gen6 math, so expand it out. */ - if (src0.file == UNIFORM) { + /* Can't do hstride == 0 args to gen6 math, so expand it out. + * + * The hardware ignores source modifiers (negate and abs) on math + * instructions, so we also move to a temp to set those up. + */ + if (src0.file == UNIFORM || src0.abs || src0.negate) { fs_reg expanded = fs_reg(this, glsl_type::float_type); emit(fs_inst(BRW_OPCODE_MOV, expanded, src0)); src0 = expanded; } - if (src1.file == UNIFORM) { + if (src1.file == UNIFORM || src1.abs || src1.negate) { fs_reg expanded = fs_reg(this, glsl_type::float_type); emit(fs_inst(BRW_OPCODE_MOV, expanded, src1)); src1 = expanded; @@ -770,6 +803,30 @@ fs_visitor::try_emit_saturate(ir_expression *ir) return true; } +static uint32_t +brw_conditional_for_comparison(unsigned int op) +{ + switch (op) { + case ir_binop_less: + return BRW_CONDITIONAL_L; + case ir_binop_greater: + return BRW_CONDITIONAL_G; + case ir_binop_lequal: + return BRW_CONDITIONAL_LE; + case ir_binop_gequal: + return BRW_CONDITIONAL_GE; + case ir_binop_equal: + case ir_binop_all_equal: /* same as equal for scalars */ + return BRW_CONDITIONAL_Z; + case ir_binop_nequal: + case ir_binop_any_nequal: /* same as nequal for scalars */ + return BRW_CONDITIONAL_NZ; + default: + assert(!"not reached: bad operation for comparison"); + return BRW_CONDITIONAL_NZ; + } +} + void fs_visitor::visit(ir_expression *ir) { @@ -819,6 +876,7 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_abs: op[0].abs = true; + op[0].negate = false; this->result = op[0]; break; case ir_unop_sign: @@ -885,35 +943,20 @@ fs_visitor::visit(ir_expression *ir) break; case ir_binop_less: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_L; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_greater: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_G; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_lequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_LE; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_gequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_GE; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; case ir_binop_equal: - case ir_binop_all_equal: /* same as nequal for scalars */ - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_Z; - emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); - break; + case ir_binop_all_equal: case ir_binop_nequal: - case ir_binop_any_nequal: /* same as nequal for scalars */ - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + case ir_binop_any_nequal: + temp = this->result; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], op[1])); + inst->conditional_mod = brw_conditional_for_comparison(ir->operation); emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1))); break; @@ -958,7 +1001,12 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_f2b: case ir_unop_i2b: - inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f))); + temp = this->result; + /* original gen4 does implicit conversion before comparison. */ + if (intel->gen < 5) + temp.type = op[0].type; + + inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f))); inst->conditional_mod = BRW_CONDITIONAL_NZ; inst = emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(1))); @@ -1151,6 +1199,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) } /* gen4's SIMD8 sampler always has the slots for u,v,r present. */ mlen += 3; + } else if (ir->op == ir_txd) { + assert(!"TXD isn't supported on gen4 yet."); } else { /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod * instructions. We'll need to do SIMD16 here. @@ -1204,6 +1254,8 @@ fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate) inst = emit(fs_inst(FS_OPCODE_TXL, dst)); break; case ir_txd: + inst = emit(fs_inst(FS_OPCODE_TXD, dst)); + break; case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; @@ -1292,6 +1344,37 @@ fs_visitor::visit(ir_texture *ir) ir->coordinate->accept(this); fs_reg coordinate = this->result; + if (ir->offset != NULL) { + ir_constant *offset = ir->offset->as_constant(); + assert(offset != NULL); + + signed char offsets[3]; + for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) + offsets[i] = (signed char) offset->value.i[i]; + + /* Combine all three offsets into a single unsigned dword: + * + * bits 11:8 - U Offset (X component) + * bits 7:4 - V Offset (Y component) + * bits 3:0 - R Offset (Z component) + */ + unsigned offset_bits = 0; + for (unsigned i = 0; i < ir->offset->type->vector_elements; i++) { + const unsigned shift = 4 * (2 - i); + offset_bits |= (offsets[i] << shift) & (0xF << shift); + } + + /* Explicitly set up the message header by copying g0 to msg reg m1. */ + emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, 1, BRW_REGISTER_TYPE_UD), + fs_reg(GRF, 0, BRW_REGISTER_TYPE_UD))); + + /* Then set the offset bits in DWord 2 of the message header. */ + emit(fs_inst(BRW_OPCODE_MOV, + fs_reg(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, 1, 2), + BRW_REGISTER_TYPE_UD)), + fs_reg(brw_imm_uw(offset_bits)))); + } + /* Should be lowered by do_lower_texture_projection */ assert(!ir->projector); @@ -1323,10 +1406,13 @@ fs_visitor::visit(ir_texture *ir) fs_reg scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1); GLuint index = _mesa_add_state_reference(params, (gl_state_index *)tokens); - float *vec_values = this->fp->Base.Parameters->ParameterValues[index]; - c->prog_data.param[c->prog_data.nr_params++] = &vec_values[0]; - c->prog_data.param[c->prog_data.nr_params++] = &vec_values[1]; + this->param_index[c->prog_data.nr_params] = index; + this->param_offset[c->prog_data.nr_params] = 0; + c->prog_data.nr_params++; + this->param_index[c->prog_data.nr_params] = index; + this->param_offset[c->prog_data.nr_params] = 1; + c->prog_data.nr_params++; fs_reg dst = fs_reg(this, ir->coordinate->type); fs_reg src = coordinate; @@ -1349,6 +1435,14 @@ fs_visitor::visit(ir_texture *ir) inst = emit_texture_gen5(ir, dst, coordinate); } + /* If there's an offset, we already set up m1. To avoid the implied move, + * use the null register. Otherwise, we want an implied move from g0. + */ + if (ir->offset != NULL) + inst->src[0] = fs_reg(brw_null_reg()); + else + inst->src[0] = fs_reg(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); + inst->sampler = sampler; this->result = dst; @@ -1356,7 +1450,10 @@ fs_visitor::visit(ir_texture *ir) if (ir->shadow_comparitor) inst->shadow_compare = true; - if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { + if (ir->type == glsl_type::float_type) { + /* Ignore DEPTH_TEXTURE_MODE swizzling. */ + assert(ir->sampler->type->sampler_shadow); + } else if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) { fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type); for (int i = 0; i < 4; i++) { @@ -1541,7 +1638,7 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], fs_reg(0.0f))); } else { - inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_d, op[0])); + inst = emit(fs_inst(BRW_OPCODE_MOV, reg_null_f, op[0])); } inst->conditional_mod = BRW_CONDITIONAL_NZ; break; @@ -1556,31 +1653,18 @@ fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir) break; case ir_binop_greater: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_G; - break; case ir_binop_gequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_GE; - break; case ir_binop_less: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_L; - break; case ir_binop_lequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_LE; - break; case ir_binop_equal: case ir_binop_all_equal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; case ir_binop_nequal: case ir_binop_any_nequal: - inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, op[0], op[1])); + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); break; + default: assert(!"not reached"); this->fail = true; @@ -1659,30 +1743,16 @@ fs_visitor::emit_if_gen6(ir_if *ir) return; case ir_binop_greater: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_G; - return; case ir_binop_gequal: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_GE; - return; case ir_binop_less: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_L; - return; case ir_binop_lequal: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_LE; - return; case ir_binop_equal: case ir_binop_all_equal: - inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_Z; - return; case ir_binop_nequal: case ir_binop_any_nequal: inst = emit(fs_inst(BRW_OPCODE_IF, reg_null_d, op[0], op[1])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; + inst->conditional_mod = + brw_conditional_for_comparison(expr->operation); return; default: assert(!"not reached"); @@ -1764,32 +1834,9 @@ fs_visitor::visit(ir_loop *ir) this->base_ir = ir->to; ir->to->accept(this); - fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_d, + fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null_cmp, counter, this->result)); - switch (ir->cmp) { - case ir_binop_equal: - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; - case ir_binop_nequal: - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - case ir_binop_gequal: - inst->conditional_mod = BRW_CONDITIONAL_GE; - break; - case ir_binop_lequal: - inst->conditional_mod = BRW_CONDITIONAL_LE; - break; - case ir_binop_greater: - inst->conditional_mod = BRW_CONDITIONAL_G; - break; - case ir_binop_less: - inst->conditional_mod = BRW_CONDITIONAL_L; - break; - default: - assert(!"not reached: unknown loop condition"); - this->fail = true; - break; - } + inst->conditional_mod = brw_conditional_for_comparison(ir->cmp); inst = emit(fs_inst(BRW_OPCODE_BREAK)); inst->predicated = true; @@ -2067,7 +2114,7 @@ fs_visitor::emit_fb_writes() } for (int target = 0; target < c->key.nr_color_regions; target++) { - this->current_annotation = talloc_asprintf(this->mem_ctx, + this->current_annotation = ralloc_asprintf(this->mem_ctx, "FB write target %d", target); if (this->frag_color || this->frag_data) { @@ -2093,6 +2140,17 @@ fs_visitor::emit_fb_writes() } if (c->key.nr_color_regions == 0) { + if (c->key.alpha_test && (this->frag_color || this->frag_data)) { + /* If the alpha test is enabled but there's no color buffer, + * we still need to send alpha out the pipeline to our null + * renderbuffer. + */ + color.reg_offset += 3; + emit(fs_inst(BRW_OPCODE_MOV, + fs_reg(MRF, color_mrf + 3), + color)); + } + fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE, reg_undef, reg_undef)); inst->base_mrf = 0; @@ -2158,7 +2216,8 @@ fs_visitor::generate_fb_write(fs_inst *inst) inst->target, inst->mlen, 0, - eot); + eot, + inst->header_present); } void @@ -2244,7 +2303,7 @@ fs_visitor::generate_math(fs_inst *inst, } void -fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) +fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) { int msg_type = -1; int rlen = 4; @@ -2266,6 +2325,16 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5; } break; + case FS_OPCODE_TXL: + if (inst->shadow_compare) { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5; + } else { + msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5; + } + break; + case FS_OPCODE_TXD: + assert(!"TXD isn't supported on gen5+ yet."); + break; } } else { switch (inst->opcode) { @@ -2283,13 +2352,26 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) case FS_OPCODE_TXB: if (inst->shadow_compare) { assert(inst->mlen == 6); - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE; } else { assert(inst->mlen == 9); msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; } break; + case FS_OPCODE_TXL: + if (inst->shadow_compare) { + assert(inst->mlen == 6); + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE; + } else { + assert(inst->mlen == 9); + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD; + simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; + } + break; + case FS_OPCODE_TXD: + assert(!"TXD isn't supported on gen4 yet."); + break; } } assert(msg_type != -1); @@ -2302,7 +2384,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst) brw_SAMPLE(p, retype(dst, BRW_REGISTER_TYPE_UW), inst->base_mrf, - retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + src, SURF_INDEX_TEXTURE(inst->sampler), inst->sampler, WRITEMASK_XYZW, @@ -2502,6 +2584,22 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) } } +/** + * To be called after the last _mesa_add_state_reference() call, to + * set up prog_data.param[] for assign_curb_setup() and + * setup_pull_constants(). + */ +void +fs_visitor::setup_paramvalues_refs() +{ + /* Set up the pointers to ParamValues now that that array is finalized. */ + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + c->prog_data.param[i] = + fp->Base.Parameters->ParameterValues[this->param_index[i]] + + this->param_offset[i]; + } +} + void fs_visitor::assign_curb_setup() { @@ -2575,12 +2673,15 @@ fs_visitor::assign_urb_setup() foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); - if (inst->opcode != FS_OPCODE_LINTERP) - continue; - - assert(inst->src[2].file == FIXED_HW_REG); + if (inst->opcode == FS_OPCODE_LINTERP) { + assert(inst->src[2].file == FIXED_HW_REG); + inst->src[2].fixed_hw_reg.nr += urb_start; + } - inst->src[2].fixed_hw_reg.nr += urb_start; + if (inst->opcode == FS_OPCODE_CINTERP) { + assert(inst->src[0].file == FIXED_HW_REG); + inst->src[0].fixed_hw_reg.nr += urb_start; + } } this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length; @@ -2628,10 +2729,7 @@ fs_visitor::split_virtual_grfs() fs_inst *inst = (fs_inst *)iter.get(); /* Texturing produces 4 contiguous registers, so no splitting. */ - if ((inst->opcode == FS_OPCODE_TEX || - inst->opcode == FS_OPCODE_TXB || - inst->opcode == FS_OPCODE_TXL) && - inst->dst.file == GRF) { + if (inst->is_tex()) { split_grf[inst->dst.reg] = false; } } @@ -2671,6 +2769,7 @@ fs_visitor::split_virtual_grfs() } } } + this->live_intervals_valid = false; } /** @@ -2739,14 +2838,17 @@ void fs_visitor::calculate_live_intervals() { int num_vars = this->virtual_grf_next; - int *def = talloc_array(mem_ctx, int, num_vars); - int *use = talloc_array(mem_ctx, int, num_vars); + int *def = ralloc_array(mem_ctx, int, num_vars); + int *use = ralloc_array(mem_ctx, int, num_vars); int loop_depth = 0; int loop_start = 0; int bb_header_ip = 0; + if (this->live_intervals_valid) + return; + for (int i = 0; i < num_vars; i++) { - def[i] = 1 << 30; + def[i] = MAX_INSTRUCTION; use[i] = -1; } @@ -2820,10 +2922,12 @@ fs_visitor::calculate_live_intervals() } } - talloc_free(this->virtual_grf_def); - talloc_free(this->virtual_grf_use); + ralloc_free(this->virtual_grf_def); + ralloc_free(this->virtual_grf_use); this->virtual_grf_def = def; this->virtual_grf_use = use; + + this->live_intervals_valid = true; } /** @@ -2839,6 +2943,8 @@ fs_visitor::propagate_constants() { bool progress = false; + calculate_live_intervals(); + foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); @@ -2896,6 +3002,7 @@ fs_visitor::propagate_constants() /* Fit this constant in by commuting the operands */ scan_inst->src[0] = scan_inst->src[1]; scan_inst->src[1] = inst->src[0]; + progress = true; } break; case BRW_OPCODE_CMP: @@ -2910,12 +3017,15 @@ fs_visitor::propagate_constants() if (scan_inst->dst.file == GRF && scan_inst->dst.reg == inst->dst.reg && (scan_inst->dst.reg_offset == inst->dst.reg_offset || - scan_inst->opcode == FS_OPCODE_TEX)) { + scan_inst->is_tex())) { break; } } } + if (progress) + this->live_intervals_valid = false; + return progress; } /** @@ -2930,6 +3040,8 @@ fs_visitor::dead_code_eliminate() bool progress = false; int pc = 0; + calculate_live_intervals(); + foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); @@ -2941,6 +3053,9 @@ fs_visitor::dead_code_eliminate() pc++; } + if (progress) + live_intervals_valid = false; + return progress; } @@ -2948,10 +3063,35 @@ bool fs_visitor::register_coalesce() { bool progress = false; + int if_depth = 0; + int loop_depth = 0; foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); + /* Make sure that we dominate the instructions we're going to + * scan for interfering with our coalescing, or we won't have + * scanned enough to see if anything interferes with our + * coalescing. We don't dominate the following instructions if + * we're in a loop or an if block. + */ + switch (inst->opcode) { + case BRW_OPCODE_DO: + loop_depth++; + break; + case BRW_OPCODE_WHILE: + loop_depth--; + break; + case BRW_OPCODE_IF: + if_depth++; + break; + case BRW_OPCODE_ENDIF: + if_depth--; + break; + } + if (loop_depth || if_depth) + continue; + if (inst->opcode != BRW_OPCODE_MOV || inst->predicated || inst->saturate || @@ -2959,6 +3099,8 @@ fs_visitor::register_coalesce() inst->dst.type != inst->src[0].type) continue; + bool has_source_modifiers = inst->src[0].abs || inst->src[0].negate; + /* Found a move of a GRF to a GRF. Let's see if we can coalesce * them: check for no writes to either one until the exit of the * program. @@ -2969,37 +3111,33 @@ fs_visitor::register_coalesce() for (; scan_iter.has_next(); scan_iter.next()) { fs_inst *scan_inst = (fs_inst *)scan_iter.get(); - if (scan_inst->opcode == BRW_OPCODE_DO || - scan_inst->opcode == BRW_OPCODE_WHILE || - scan_inst->opcode == BRW_OPCODE_ENDIF) { - interfered = true; - iter = scan_iter; - break; - } - if (scan_inst->dst.file == GRF) { if (scan_inst->dst.reg == inst->dst.reg && (scan_inst->dst.reg_offset == inst->dst.reg_offset || - scan_inst->opcode == FS_OPCODE_TEX)) { + scan_inst->is_tex())) { interfered = true; break; } if (scan_inst->dst.reg == inst->src[0].reg && (scan_inst->dst.reg_offset == inst->src[0].reg_offset || - scan_inst->opcode == FS_OPCODE_TEX)) { + scan_inst->is_tex())) { interfered = true; break; } } + + /* The gen6 MATH instruction can't handle source modifiers, so avoid + * coalescing those for now. We should do something more specific. + */ + if (intel->gen == 6 && scan_inst->is_math() && has_source_modifiers) { + interfered = true; + break; + } } if (interfered) { continue; } - /* Update live interval so we don't have to recalculate. */ - this->virtual_grf_use[inst->src[0].reg] = MAX2(virtual_grf_use[inst->src[0].reg], - virtual_grf_use[inst->dst.reg]); - /* Rewrite the later usage to point at the source of the move to * be removed. */ @@ -3024,6 +3162,9 @@ fs_visitor::register_coalesce() progress = true; } + if (progress) + live_intervals_valid = false; + return progress; } @@ -3034,6 +3175,8 @@ fs_visitor::compute_to_mrf() bool progress = false; int next_ip = 0; + calculate_live_intervals(); + foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); @@ -3066,7 +3209,7 @@ fs_visitor::compute_to_mrf() * into a compute-to-MRF. */ - if (scan_inst->opcode == FS_OPCODE_TEX) { + if (scan_inst->is_tex()) { /* texturing writes several continuous regs, so we can't * compute-to-mrf that. */ @@ -3087,14 +3230,7 @@ fs_visitor::compute_to_mrf() /* gen6 math instructions must have the destination be * GRF, so no compute-to-MRF for them. */ - if (scan_inst->opcode == FS_OPCODE_RCP || - scan_inst->opcode == FS_OPCODE_RSQ || - scan_inst->opcode == FS_OPCODE_SQRT || - scan_inst->opcode == FS_OPCODE_EXP2 || - scan_inst->opcode == FS_OPCODE_LOG2 || - scan_inst->opcode == FS_OPCODE_SIN || - scan_inst->opcode == FS_OPCODE_COS || - scan_inst->opcode == FS_OPCODE_POW) { + if (scan_inst->is_math()) { break; } } @@ -3116,6 +3252,7 @@ fs_visitor::compute_to_mrf() */ if (scan_inst->opcode == BRW_OPCODE_DO || scan_inst->opcode == BRW_OPCODE_WHILE || + scan_inst->opcode == BRW_OPCODE_ELSE || scan_inst->opcode == BRW_OPCODE_ENDIF) { break; } @@ -3202,7 +3339,7 @@ fs_visitor::remove_duplicate_mrf_writes() } if (inst->mlen > 0) { - /* Found a SEND instruction, which will include two of fewer + /* Found a SEND instruction, which will include two or fewer * implied MRF writes. We could do better here. */ for (int i = 0; i < implied_mrf_writes(inst); i++) { @@ -3237,15 +3374,16 @@ fs_visitor::virtual_grf_interferes(int a, int b) int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]); int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]); - /* For dead code, just check if the def interferes with the other range. */ - if (this->virtual_grf_use[a] == -1) { - return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] && - this->virtual_grf_def[a] < this->virtual_grf_use[b]); - } - if (this->virtual_grf_use[b] == -1) { - return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] && - this->virtual_grf_def[b] < this->virtual_grf_use[a]); - } + /* We can't handle dead register writes here, without iterating + * over the whole instruction stream to find every single dead + * write to that register to compare to the live interval of the + * other register. Just assert that dead_code_eliminate() has been + * called. + */ + assert((this->virtual_grf_use[a] != -1 || + this->virtual_grf_def[a] == MAX_INSTRUCTION) && + (this->virtual_grf_use[b] != -1 || + this->virtual_grf_def[b] == MAX_INSTRUCTION)); return start < end; } @@ -3280,6 +3418,7 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) break; default: assert(!"not reached"); + brw_reg = brw_null_reg(); break; } break; @@ -3294,6 +3433,10 @@ static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg) assert(!"not reached"); brw_reg = brw_null_reg(); break; + default: + assert(!"not reached"); + brw_reg = brw_null_reg(); + break; } if (reg->abs) brw_reg = brw_abs(brw_reg); @@ -3307,20 +3450,25 @@ void fs_visitor::generate_code() { int last_native_inst = 0; - struct brw_instruction *if_stack[16], *loop_stack[16]; - int if_stack_depth = 0, loop_stack_depth = 0; - int if_depth_in_loop[16]; const char *last_annotation_string = NULL; ir_instruction *last_annotation_ir = NULL; + int if_stack_array_size = 16; + int loop_stack_array_size = 16; + int if_stack_depth = 0, loop_stack_depth = 0; + brw_instruction **if_stack = + rzalloc_array(this->mem_ctx, brw_instruction *, if_stack_array_size); + brw_instruction **loop_stack = + rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size); + int *if_depth_in_loop = + rzalloc_array(this->mem_ctx, int, loop_stack_array_size); + + if (unlikely(INTEL_DEBUG & DEBUG_WM)) { printf("Native code for fragment shader %d:\n", ctx->Shader.CurrentFragmentProgram->Name); } - if_depth_in_loop[loop_stack_depth] = 0; - - memset(&if_stack, 0, sizeof(if_stack)); foreach_iter(exec_list_iterator, iter, this->instructions) { fs_inst *inst = (fs_inst *)iter.get(); struct brw_reg src[3], dst; @@ -3404,7 +3552,6 @@ fs_visitor::generate_code() break; case BRW_OPCODE_IF: - assert(if_stack_depth < 16); if (inst->src[0].file != BAD_FILE) { assert(intel->gen >= 6); if_stack[if_stack_depth] = brw_IF_gen6(p, inst->conditional_mod, src[0], src[1]); @@ -3413,6 +3560,11 @@ fs_visitor::generate_code() } if_depth_in_loop[loop_stack_depth]++; if_stack_depth++; + if (if_stack_array_size <= if_stack_depth) { + if_stack_array_size *= 2; + if_stack = reralloc(this->mem_ctx, if_stack, brw_instruction *, + if_stack_array_size); + } break; case BRW_OPCODE_ELSE: @@ -3427,6 +3579,13 @@ fs_visitor::generate_code() case BRW_OPCODE_DO: loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8); + if (loop_stack_array_size <= loop_stack_depth) { + loop_stack_array_size *= 2; + loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *, + loop_stack_array_size); + if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int, + loop_stack_array_size); + } if_depth_in_loop[loop_stack_depth] = 0; break; @@ -3480,13 +3639,17 @@ fs_visitor::generate_code() case FS_OPCODE_COS: generate_math(inst, dst, src); break; + case FS_OPCODE_CINTERP: + brw_MOV(p, dst, src[0]); + break; case FS_OPCODE_LINTERP: generate_linterp(inst, dst, src); break; case FS_OPCODE_TEX: case FS_OPCODE_TXB: + case FS_OPCODE_TXD: case FS_OPCODE_TXL: - generate_tex(inst, dst); + generate_tex(inst, dst, src[0]); break; case FS_OPCODE_DISCARD_NOT: generate_discard_not(inst, dst); @@ -3542,6 +3705,10 @@ fs_visitor::generate_code() last_native_inst = p->nr_insn; } + ralloc_free(if_stack); + ralloc_free(loop_stack); + ralloc_free(if_depth_in_loop); + brw_set_uip_jip(p); /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS @@ -3617,10 +3784,9 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) v.emit_fb_writes(); v.split_virtual_grfs(); - v.setup_pull_constants(); - v.assign_curb_setup(); - v.assign_urb_setup(); + v.setup_paramvalues_refs(); + v.setup_pull_constants(); bool progress; do { @@ -3628,20 +3794,23 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) progress = v.remove_duplicate_mrf_writes() || progress; - v.calculate_live_intervals(); progress = v.propagate_constants() || progress; progress = v.register_coalesce() || progress; progress = v.compute_to_mrf() || progress; progress = v.dead_code_eliminate() || progress; } while (progress); + v.schedule_instructions(); + + v.assign_curb_setup(); + v.assign_urb_setup(); + if (0) { /* Debug of register spilling: Go spill everything. */ int virtual_grf_count = v.virtual_grf_next; for (int i = 1; i < virtual_grf_count; i++) { v.spill_reg(i); } - v.calculate_live_intervals(); } if (0) @@ -3650,8 +3819,6 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) while (!v.assign_regs()) { if (v.fail) break; - - v.calculate_live_intervals(); } } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index de7b15312a..dc030ae5b5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -41,7 +41,6 @@ extern "C" { #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" -#include "talloc.h" } #include "../glsl/glsl_types.h" #include "../glsl/ir.h" @@ -68,9 +67,11 @@ enum fs_opcodes { FS_OPCODE_COS, FS_OPCODE_DDX, FS_OPCODE_DDY, + FS_OPCODE_CINTERP, FS_OPCODE_LINTERP, FS_OPCODE_TEX, FS_OPCODE_TXB, + FS_OPCODE_TXD, FS_OPCODE_TXL, FS_OPCODE_DISCARD_NOT, FS_OPCODE_DISCARD_AND, @@ -82,13 +83,13 @@ enum fs_opcodes { class fs_reg { public: - /* Callers of this talloc-based new need not call delete. It's - * easier to just talloc_free 'ctx' (or any of its ancestors). */ + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ static void* operator new(size_t size, void *ctx) { void *node; - node = talloc_size(ctx, size); + node = ralloc_size(ctx, size); assert(node != NULL); return node; @@ -192,13 +193,13 @@ static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D); class fs_inst : public exec_node { public: - /* Callers of this talloc-based new need not call delete. It's - * easier to just talloc_free 'ctx' (or any of its ancestors). */ + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'ctx' (or any of its ancestors). */ static void* operator new(size_t size, void *ctx) { void *node; - node = talloc_zero_size(ctx, size); + node = rzalloc_size(ctx, size); assert(node != NULL); return node; @@ -305,6 +306,26 @@ public: offset == inst->offset); } + bool is_tex() + { + return (opcode == FS_OPCODE_TEX || + opcode == FS_OPCODE_TXB || + opcode == FS_OPCODE_TXD || + opcode == FS_OPCODE_TXL); + } + + bool is_math() + { + return (opcode == FS_OPCODE_RCP || + opcode == FS_OPCODE_RSQ || + opcode == FS_OPCODE_SQRT || + opcode == FS_OPCODE_EXP2 || + opcode == FS_OPCODE_LOG2 || + opcode == FS_OPCODE_SIN || + opcode == FS_OPCODE_COS || + opcode == FS_OPCODE_POW); + } + int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ fs_reg dst; fs_reg src[3]; @@ -341,13 +362,30 @@ public: this->fp = brw->fragment_program; this->intel = &brw->intel; this->ctx = &intel->ctx; - this->mem_ctx = talloc_new(NULL); + this->mem_ctx = ralloc_context(NULL); this->shader = shader; this->fail = false; this->variable_ht = hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); + /* There's a question that appears to be left open in the spec: + * How do implicit dst conversions interact with the CMP + * instruction or conditional mods? On gen6, the instruction: + * + * CMP null<d> src0<f> src1<f> + * + * will do src1 - src0 and compare that result as if it was an + * integer. On gen4, it will do src1 - src0 as float, convert + * the result to int, and compare as int. In between, it + * appears that it does src1 - src0 and does the compare in the + * execution type so dst type doesn't matter. + */ + if (this->intel->gen > 4) + this->reg_null_cmp = reg_null_d; + else + this->reg_null_cmp = reg_null_f; + this->frag_color = NULL; this->frag_data = NULL; this->frag_depth = NULL; @@ -361,13 +399,14 @@ public: this->virtual_grf_array_size = 0; this->virtual_grf_def = NULL; this->virtual_grf_use = NULL; + this->live_intervals_valid = false; this->kill_emitted = false; } ~fs_visitor() { - talloc_free(this->mem_ctx); + ralloc_free(this->mem_ctx); hash_table_dtor(this->variable_ht); } @@ -393,6 +432,7 @@ public: void visit(ir_function_signature *ir); fs_inst *emit(fs_inst inst); + void setup_paramvalues_refs(); void assign_curb_setup(); void calculate_urb_setup(); void assign_urb_setup(); @@ -409,11 +449,13 @@ public: bool dead_code_eliminate(); bool remove_duplicate_mrf_writes(); bool virtual_grf_interferes(int a, int b); + void schedule_instructions(); + void generate_code(); void generate_fb_write(fs_inst *inst); void generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); - void generate_tex(fs_inst *inst, struct brw_reg dst); + void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src); void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); void generate_discard_not(fs_inst *inst, struct brw_reg temp); void generate_discard_and(fs_inst *inst, struct brw_reg temp); @@ -457,11 +499,18 @@ public: void *mem_ctx; exec_list instructions; + /* Delayed setup of c->prog_data.params[] due to realloc of + * ParamValues[] during compile. + */ + int param_index[MAX_UNIFORMS * 4]; + int param_offset[MAX_UNIFORMS * 4]; + int *virtual_grf_sizes; int virtual_grf_next; int virtual_grf_array_size; int *virtual_grf_def; int *virtual_grf_use; + bool live_intervals_valid; struct hash_table *variable_ht; ir_variable *frag_color, *frag_data, *frag_depth; @@ -485,6 +534,7 @@ public: fs_reg pixel_w; fs_reg delta_x; fs_reg delta_y; + fs_reg reg_null_cmp; int grf_used; }; diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 20bfa4c3ea..7f3f52854d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -141,7 +141,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) return visit_continue; if (!this->mem_ctx) - this->mem_ctx = talloc_parent(ir); + this->mem_ctx = ralloc_parent(ir); for (i = 0; i < expr->get_num_operands(); i++) { if (expr->operands[i]->type->is_vector()) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index bbb210cd44..f027742317 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -41,7 +41,6 @@ extern "C" { #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" -#include "talloc.h" } #include "brw_fs.h" #include "../glsl/glsl_types.h" @@ -94,6 +93,8 @@ fs_visitor::assign_regs() int class_count = 0; int aligned_pair_class = -1; + calculate_live_intervals(); + /* Set up the register classes. * * The base registers store a scalar value. For texture samples, @@ -232,8 +233,8 @@ fs_visitor::assign_regs() } - talloc_free(g); - talloc_free(regs); + ralloc_free(g); + ralloc_free(regs); return false; } @@ -271,8 +272,8 @@ fs_visitor::assign_regs() this->grf_used = last_grf + 1; - talloc_free(g); - talloc_free(regs); + ralloc_free(g); + ralloc_free(regs); return true; } @@ -416,4 +417,6 @@ fs_visitor::spill_reg(int spill_reg) } } } + + this->live_intervals_valid = false; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp new file mode 100644 index 0000000000..bff8f82f3f --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -0,0 +1,488 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +extern "C" { + +#include <sys/types.h> + +#include "main/macros.h" +#include "main/shaderobj.h" +#include "main/uniforms.h" +#include "program/prog_optimize.h" +#include "program/register_allocate.h" +#include "program/sampler.h" +#include "program/hash_table.h" +#include "brw_context.h" +#include "brw_eu.h" +#include "brw_wm.h" +} +#include "brw_fs.h" +#include "../glsl/glsl_types.h" +#include "../glsl/ir_optimization.h" +#include "../glsl/ir_print_visitor.h" + +/** @file brw_fs_schedule_instructions.cpp + * + * List scheduling of FS instructions. + * + * The basic model of the list scheduler is to take a basic block, + * compute a DAG of the dependencies (RAW ordering with latency, WAW + * ordering, WAR ordering), and make a list of the DAG heads. + * Heuristically pick a DAG head, then put all the children that are + * now DAG heads into the list of things to schedule. + * + * The heuristic is the important part. We're trying to be cheap, + * since actually computing the optimal scheduling is NP complete. + * What we do is track a "current clock". When we schedule a node, we + * update the earliest-unblocked clock time of its children, and + * increment the clock. Then, when trying to schedule, we just pick + * the earliest-unblocked instruction to schedule. + * + * Note that often there will be many things which could execute + * immediately, and there are a range of heuristic options to choose + * from in picking among those. + */ + +class schedule_node : public exec_node +{ +public: + schedule_node(fs_inst *inst) + { + this->inst = inst; + this->child_array_size = 0; + this->children = NULL; + this->child_latency = NULL; + this->child_count = 0; + this->parent_count = 0; + this->unblocked_time = 0; + + int chans = 8; + int math_latency = 22; + + switch (inst->opcode) { + case FS_OPCODE_RCP: + this->latency = 1 * chans * math_latency; + break; + case FS_OPCODE_RSQ: + this->latency = 2 * chans * math_latency; + break; + case FS_OPCODE_SQRT: + case FS_OPCODE_LOG2: + /* full precision log. partial is 2. */ + this->latency = 3 * chans * math_latency; + break; + case FS_OPCODE_EXP2: + /* full precision. partial is 3, same throughput. */ + this->latency = 4 * chans * math_latency; + break; + case FS_OPCODE_POW: + this->latency = 8 * chans * math_latency; + break; + case FS_OPCODE_SIN: + case FS_OPCODE_COS: + /* minimum latency, max is 12 rounds. */ + this->latency = 5 * chans * math_latency; + break; + default: + this->latency = 2; + break; + } + } + + fs_inst *inst; + schedule_node **children; + int *child_latency; + int child_count; + int parent_count; + int child_array_size; + int unblocked_time; + int latency; +}; + +class instruction_scheduler { +public: + instruction_scheduler(fs_visitor *v, void *mem_ctx, int virtual_grf_count) + { + this->v = v; + this->mem_ctx = ralloc_context(mem_ctx); + this->virtual_grf_count = virtual_grf_count; + this->instructions.make_empty(); + this->instructions_to_schedule = 0; + } + + ~instruction_scheduler() + { + ralloc_free(this->mem_ctx); + } + void add_barrier_deps(schedule_node *n); + void add_dep(schedule_node *before, schedule_node *after, int latency); + + void add_inst(fs_inst *inst); + void calculate_deps(); + void schedule_instructions(fs_inst *next_block_header); + + void *mem_ctx; + + int instructions_to_schedule; + int virtual_grf_count; + exec_list instructions; + fs_visitor *v; +}; + +void +instruction_scheduler::add_inst(fs_inst *inst) +{ + schedule_node *n = new(mem_ctx) schedule_node(inst); + + assert(!inst->is_head_sentinel()); + assert(!inst->is_tail_sentinel()); + + this->instructions_to_schedule++; + + inst->remove(); + instructions.push_tail(n); +} + +/** + * Add a dependency between two instruction nodes. + * + * The @after node will be scheduled after @before. We will try to + * schedule it @latency cycles after @before, but no guarantees there. + */ +void +instruction_scheduler::add_dep(schedule_node *before, schedule_node *after, + int latency) +{ + if (!before || !after) + return; + + assert(before != after); + + for (int i = 0; i < before->child_count; i++) { + if (before->children[i] == after) { + before->child_latency[i] = MAX2(before->child_latency[i], latency); + return; + } + } + + if (before->child_array_size <= before->child_count) { + if (before->child_array_size < 16) + before->child_array_size = 16; + else + before->child_array_size *= 2; + + before->children = reralloc(mem_ctx, before->children, + schedule_node *, + before->child_array_size); + before->child_latency = reralloc(mem_ctx, before->child_latency, + int, before->child_array_size); + } + + before->children[before->child_count] = after; + before->child_latency[before->child_count] = latency; + before->child_count++; + after->parent_count++; +} + +/** + * Sometimes we really want this node to execute after everything that + * was before it and before everything that followed it. This adds + * the deps to do so. + */ +void +instruction_scheduler::add_barrier_deps(schedule_node *n) +{ + schedule_node *prev = (schedule_node *)n->prev; + schedule_node *next = (schedule_node *)n->next; + + if (prev) { + while (!prev->is_head_sentinel()) { + add_dep(prev, n, 0); + prev = (schedule_node *)prev->prev; + } + } + + if (next) { + while (!next->is_tail_sentinel()) { + add_dep(n, next, 0); + next = (schedule_node *)next->next; + } + } +} + +void +instruction_scheduler::calculate_deps() +{ + schedule_node *last_grf_write[virtual_grf_count]; + schedule_node *last_mrf_write[BRW_MAX_MRF]; + schedule_node *last_conditional_mod = NULL; + + /* The last instruction always needs to still be the last + * instruction. Either it's flow control (IF, ELSE, ENDIF, DO, + * WHILE) and scheduling other things after it would disturb the + * basic block, or it's FB_WRITE and we should do a better job at + * dead code elimination anyway. + */ + schedule_node *last = (schedule_node *)instructions.get_tail(); + add_barrier_deps(last); + + memset(last_grf_write, 0, sizeof(last_grf_write)); + memset(last_mrf_write, 0, sizeof(last_mrf_write)); + + /* top-to-bottom dependencies: RAW and WAW. */ + foreach_iter(exec_list_iterator, iter, instructions) { + schedule_node *n = (schedule_node *)iter.get(); + fs_inst *inst = n->inst; + + /* read-after-write deps. */ + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) { + if (last_grf_write[inst->src[i].reg]) { + add_dep(last_grf_write[inst->src[i].reg], n, + last_grf_write[inst->src[i].reg]->latency); + } + } else if (inst->src[i].file != BAD_FILE && + inst->src[i].file != IMM && + inst->src[i].file != UNIFORM) { + assert(inst->src[i].file != MRF); + add_barrier_deps(n); + } + } + + for (int i = 0; i < inst->mlen; i++) { + /* It looks like the MRF regs are released in the send + * instruction once it's sent, not when the result comes + * back. + */ + if (last_mrf_write[inst->base_mrf + i]) { + add_dep(last_mrf_write[inst->base_mrf + i], n, + last_mrf_write[inst->base_mrf + i]->latency); + } + } + + if (inst->predicated) { + assert(last_conditional_mod); + add_dep(last_conditional_mod, n, last_conditional_mod->latency); + } + + /* write-after-write deps. */ + if (inst->dst.file == GRF) { + if (last_grf_write[inst->dst.reg]) { + add_dep(last_grf_write[inst->dst.reg], n, + last_grf_write[inst->dst.reg]->latency); + } + last_grf_write[inst->dst.reg] = n; + } else if (inst->dst.file == MRF) { + if (last_mrf_write[inst->dst.hw_reg]) { + add_dep(last_mrf_write[inst->dst.hw_reg], n, + last_mrf_write[inst->dst.hw_reg]->latency); + } + last_mrf_write[inst->dst.hw_reg] = n; + } else if (inst->dst.file != BAD_FILE) { + add_barrier_deps(n); + } + + if (inst->mlen > 0) { + for (int i = 0; i < v->implied_mrf_writes(inst); i++) { + if (last_mrf_write[inst->base_mrf + i]) { + add_dep(last_mrf_write[inst->base_mrf + i], n, + last_mrf_write[inst->base_mrf + i]->latency); + } + last_mrf_write[inst->base_mrf + i] = n; + } + } + + if (inst->conditional_mod) { + add_dep(last_conditional_mod, n, 0); + last_conditional_mod = n; + } + } + + /* bottom-to-top dependencies: WAR */ + memset(last_grf_write, 0, sizeof(last_grf_write)); + memset(last_mrf_write, 0, sizeof(last_mrf_write)); + last_conditional_mod = NULL; + + exec_node *node; + exec_node *prev; + for (node = instructions.get_tail(), prev = node->prev; + !node->is_head_sentinel(); + node = prev, prev = node->prev) { + schedule_node *n = (schedule_node *)node; + fs_inst *inst = n->inst; + + /* write-after-read deps. */ + for (int i = 0; i < 3; i++) { + if (inst->src[i].file == GRF) { + if (last_grf_write[inst->src[i].reg]) { + add_dep(n, last_grf_write[inst->src[i].reg], n->latency); + } + } else if (inst->src[i].file != BAD_FILE && + inst->src[i].file != IMM && + inst->src[i].file != UNIFORM) { + assert(inst->src[i].file != MRF); + add_barrier_deps(n); + } + } + + for (int i = 0; i < inst->mlen; i++) { + /* It looks like the MRF regs are released in the send + * instruction once it's sent, not when the result comes + * back. + */ + add_dep(n, last_mrf_write[inst->base_mrf + i], 2); + } + + if (inst->predicated) { + if (last_conditional_mod) { + add_dep(n, last_conditional_mod, n->latency); + } + } + + /* Update the things this instruction wrote, so earlier reads + * can mark this as WAR dependency. + */ + if (inst->dst.file == GRF) { + last_grf_write[inst->dst.reg] = n; + } else if (inst->dst.file == MRF) { + last_mrf_write[inst->dst.hw_reg] = n; + } else if (inst->dst.file != BAD_FILE) { + add_barrier_deps(n); + } + + if (inst->mlen > 0) { + for (int i = 0; i < v->implied_mrf_writes(inst); i++) { + last_mrf_write[inst->base_mrf + i] = n; + } + } + + if (inst->conditional_mod) + last_conditional_mod = n; + } +} + +void +instruction_scheduler::schedule_instructions(fs_inst *next_block_header) +{ + int time = 0; + + /* Remove non-DAG heads from the list. */ + foreach_iter(exec_list_iterator, iter, instructions) { + schedule_node *n = (schedule_node *)iter.get(); + if (n->parent_count != 0) + n->remove(); + } + + while (!instructions.is_empty()) { + schedule_node *chosen = NULL; + int chosen_time = 0; + + foreach_iter(exec_list_iterator, iter, instructions) { + schedule_node *n = (schedule_node *)iter.get(); + + if (!chosen || n->unblocked_time < chosen_time) { + chosen = n; + chosen_time = n->unblocked_time; + } + } + + /* Schedule this instruction. */ + assert(chosen); + chosen->remove(); + next_block_header->insert_before(chosen->inst); + instructions_to_schedule--; + + /* Bump the clock. If we expected a delay for scheduling, then + * bump the clock to reflect that. + */ + time = MAX2(time + 1, chosen_time); + + /* Now that we've scheduled a new instruction, some of its + * children can be promoted to the list of instructions ready to + * be scheduled. Update the children's unblocked time for this + * DAG edge as we do so. + */ + for (int i = 0; i < chosen->child_count; i++) { + schedule_node *child = chosen->children[i]; + + child->unblocked_time = MAX2(child->unblocked_time, + time + chosen->child_latency[i]); + + child->parent_count--; + if (child->parent_count == 0) { + instructions.push_tail(child); + } + } + + /* Shared resource: the mathbox. There's one per EU (on later + * generations, it's even more limited pre-gen6), so if we send + * something off to it then the next math isn't going to make + * progress until the first is done. + */ + if (chosen->inst->is_math()) { + foreach_iter(exec_list_iterator, iter, instructions) { + schedule_node *n = (schedule_node *)iter.get(); + + if (n->inst->is_math()) + n->unblocked_time = MAX2(n->unblocked_time, + time + chosen->latency); + } + } + } + + assert(instructions_to_schedule == 0); +} + +void +fs_visitor::schedule_instructions() +{ + fs_inst *next_block_header = (fs_inst *)instructions.head; + instruction_scheduler sched(this, mem_ctx, this->virtual_grf_next); + + while (!next_block_header->is_tail_sentinel()) { + /* Add things to be scheduled until we get to a new BB. */ + while (!next_block_header->is_tail_sentinel()) { + fs_inst *inst = next_block_header; + next_block_header = (fs_inst *)next_block_header->next; + + sched.add_inst(inst); + if (inst->opcode == BRW_OPCODE_IF || + inst->opcode == BRW_OPCODE_ELSE || + inst->opcode == BRW_OPCODE_ENDIF || + inst->opcode == BRW_OPCODE_DO || + inst->opcode == BRW_OPCODE_WHILE || + inst->opcode == BRW_OPCODE_BREAK || + inst->opcode == BRW_OPCODE_CONTINUE) { + break; + } + } + sched.calculate_deps(); + sched.schedule_instructions(next_block_header); + } + + this->live_intervals_valid = false; +} diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp index 2be6b08b5c..530ffa2658 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp @@ -69,7 +69,7 @@ public: ir_variable *components[4]; - /** talloc_parent(this->var) -- the shader's talloc context. */ + /** ralloc_parent(this->var) -- the shader's ralloc context. */ void *mem_ctx; }; @@ -77,13 +77,13 @@ class ir_vector_reference_visitor : public ir_hierarchical_visitor { public: ir_vector_reference_visitor(void) { - this->mem_ctx = talloc_new(NULL); + this->mem_ctx = ralloc_context(NULL); this->variable_list.make_empty(); } ~ir_vector_reference_visitor(void) { - talloc_free(mem_ctx); + ralloc_free(mem_ctx); } virtual ir_visitor_status visit(ir_variable *); @@ -358,7 +358,7 @@ brw_do_vector_splitting(exec_list *instructions) if (refs.variable_list.is_empty()) return false; - void *mem_ctx = talloc_new(NULL); + void *mem_ctx = ralloc_context(NULL); /* Replace the decls of the vectors to be split with their split * components. @@ -368,10 +368,10 @@ brw_do_vector_splitting(exec_list *instructions) const struct glsl_type *type; type = glsl_type::get_instance(entry->var->type->base_type, 1, 1); - entry->mem_ctx = talloc_parent(entry->var); + entry->mem_ctx = ralloc_parent(entry->var); for (unsigned int i = 0; i < entry->var->type->vector_elements; i++) { - const char *name = talloc_asprintf(mem_ctx, "%s_%c", + const char *name = ralloc_asprintf(mem_ctx, "%s_%c", entry->var->name, "xyzw"[i]); @@ -386,7 +386,7 @@ brw_do_vector_splitting(exec_list *instructions) ir_vector_splitting_visitor split(&refs.variable_list); visit_list_elements(&split, instructions); - talloc_free(mem_ctx); + ralloc_free(mem_ctx); return true; } diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 73b41fdbce..70c451d071 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -96,6 +96,9 @@ static void compile_gs_prog( struct brw_context *brw, brw_gs_quad_strip( &c, key ); break; case GL_LINE_LOOP: + /* Gen6: LINELOOP is converted to LINESTRIP at the beginning of the 3D pipeline */ + if (intel->gen == 6) + return; brw_gs_lines( &c ); break; case GL_LINES: @@ -189,7 +192,7 @@ static void populate_key( struct brw_context *brw, } if (intel->gen == 6) - prim_gs_always = brw->primitive == GL_LINE_LOOP; + prim_gs_always = 0; else prim_gs_always = brw->primitive == GL_QUADS || brw->primitive == GL_QUAD_STRIP || diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index a91b0528fa..c768be23fa 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -74,7 +74,7 @@ static void upload_binding_table_pointers(struct brw_context *brw) struct intel_context *intel = &brw->intel; BEGIN_BATCH(6); - OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2)); OUT_BATCH(brw->vs.bind_bo_offset); OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ @@ -104,7 +104,7 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw) struct intel_context *intel = &brw->intel; BEGIN_BATCH(4); - OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | + OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | GEN6_BINDING_TABLE_MODIFY_VS | GEN6_BINDING_TABLE_MODIFY_GS | GEN6_BINDING_TABLE_MODIFY_PS | @@ -142,7 +142,7 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) } BEGIN_BATCH(7); - OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); + OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2)); OUT_RELOC(brw->vs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); if (brw->gs.prog_active) OUT_RELOC(brw->gs.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); @@ -151,7 +151,7 @@ static void upload_pipelined_state_pointers(struct brw_context *brw ) OUT_RELOC(brw->clip.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_RELOC(brw->wm.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, + OUT_RELOC(brw->intel.batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, brw->cc.state_offset); ADVANCE_BATCH(); @@ -214,7 +214,7 @@ static void emit_depthbuffer(struct brw_context *brw) if (region == NULL) { BEGIN_BATCH(len); - OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); + OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29)); OUT_BATCH(0); @@ -251,7 +251,7 @@ static void emit_depthbuffer(struct brw_context *brw) assert(region->tiling != I915_TILING_NONE); BEGIN_BATCH(len); - OUT_BATCH(CMD_DEPTH_BUFFER << 16 | (len - 2)); + OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); OUT_BATCH(((region->pitch * region->cpp) - 1) | (format << 18) | (BRW_TILEWALK_YMAJOR << 26) | @@ -277,7 +277,7 @@ static void emit_depthbuffer(struct brw_context *brw) /* Initialize it for safety. */ if (intel->gen >= 6) { BEGIN_BATCH(2); - OUT_BATCH(CMD_3D_CLEAR_PARAMS << 16 | (2 - 2)); + OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | (2 - 2)); OUT_BATCH(0); ADVANCE_BATCH(); } @@ -301,16 +301,15 @@ const struct brw_tracked_state brw_depthbuffer = { static void upload_polygon_stipple(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; struct gl_context *ctx = &brw->intel.ctx; - struct brw_polygon_stipple bps; GLuint i; if (!ctx->Polygon.StippleFlag) return; - memset(&bps, 0, sizeof(bps)); - bps.header.opcode = CMD_POLY_STIPPLE_PATTERN; - bps.header.length = sizeof(bps)/4-2; + BEGIN_BATCH(33); + OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2)); /* Polygon stipple is provided in OpenGL order, i.e. bottom * row first. If we're rendering to a window (i.e. the @@ -321,14 +320,13 @@ static void upload_polygon_stipple(struct brw_context *brw) */ if (ctx->DrawBuffer->Name == 0) { for (i = 0; i < 32; i++) - bps.stipple[i] = ctx->PolygonStipple[31 - i]; /* invert */ + OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */ } else { for (i = 0; i < 32; i++) - bps.stipple[i] = ctx->PolygonStipple[i]; /* don't invert */ + OUT_BATCH(ctx->PolygonStipple[i]); } - - BRW_CACHED_BATCH_STRUCT(brw, &bps); + CACHED_BATCH(); } const struct brw_tracked_state brw_polygon_stipple = { @@ -347,15 +345,14 @@ const struct brw_tracked_state brw_polygon_stipple = { static void upload_polygon_stipple_offset(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; struct gl_context *ctx = &brw->intel.ctx; - struct brw_polygon_stipple_offset bpso; if (!ctx->Polygon.StippleFlag) return; - memset(&bpso, 0, sizeof(bpso)); - bpso.header.opcode = CMD_POLY_STIPPLE_OFFSET; - bpso.header.length = sizeof(bpso)/4-2; + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2)); /* If we're drawing to a system window (ctx->DrawBuffer->Name == 0), * we have to invert the Y axis in order to match the OpenGL @@ -365,16 +362,11 @@ static void upload_polygon_stipple_offset(struct brw_context *brw) * system works just fine, and there's no window system to * worry about. */ - if (brw->intel.ctx.DrawBuffer->Name == 0) { - bpso.bits0.x_offset = 0; - bpso.bits0.y_offset = (32 - (ctx->DrawBuffer->Height & 31)) & 31; - } - else { - bpso.bits0.y_offset = 0; - bpso.bits0.x_offset = 0; - } - - BRW_CACHED_BATCH_STRUCT(brw, &bpso); + if (brw->intel.ctx.DrawBuffer->Name == 0) + OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31); + else + OUT_BATCH(0); + CACHED_BATCH(); } #define _NEW_WINDOW_POS 0x40000000 @@ -393,18 +385,17 @@ const struct brw_tracked_state brw_polygon_stipple_offset = { */ static void upload_aa_line_parameters(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; struct gl_context *ctx = &brw->intel.ctx; - struct brw_aa_line_parameters balp; if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters) return; + OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2)); /* use legacy aa line coverage computation */ - memset(&balp, 0, sizeof(balp)); - balp.header.opcode = CMD_AA_LINE_PARAMETERS; - balp.header.length = sizeof(balp) / 4 - 2; - - BRW_CACHED_BATCH_STRUCT(brw, &balp); + OUT_BATCH(0); + OUT_BATCH(0); + CACHED_BATCH(); } const struct brw_tracked_state brw_aa_line_parameters = { @@ -422,28 +413,21 @@ const struct brw_tracked_state brw_aa_line_parameters = { static void upload_line_stipple(struct brw_context *brw) { + struct intel_context *intel = &brw->intel; struct gl_context *ctx = &brw->intel.ctx; - struct brw_line_stipple bls; GLfloat tmp; GLint tmpi; if (!ctx->Line.StippleFlag) return; - memset(&bls, 0, sizeof(bls)); - bls.header.opcode = CMD_LINE_STIPPLE_PATTERN; - bls.header.length = sizeof(bls)/4 - 2; - - bls.bits0.pattern = ctx->Line.StipplePattern; - bls.bits1.repeat_count = ctx->Line.StippleFactor; - + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2)); + OUT_BATCH(ctx->Line.StipplePattern); tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor; tmpi = tmp * (1<<13); - - - bls.bits1.inverse_repeat_count = tmpi; - - BRW_CACHED_BATCH_STRUCT(brw, &bls); + OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor); + CACHED_BATCH(); } const struct brw_tracked_state brw_line_stipple = { @@ -481,7 +465,7 @@ static void upload_invarient_state( struct brw_context *brw ) /* Disable depth offset clamping. */ - gdo.header.opcode = CMD_GLOBAL_DEPTH_OFFSET_CLAMP; + gdo.header.opcode = _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP; gdo.header.length = sizeof(gdo)/4 - 2; gdo.depth_offset_clamp = 0.0; @@ -492,20 +476,20 @@ static void upload_invarient_state( struct brw_context *brw ) int i; BEGIN_BATCH(3); - OUT_BATCH(CMD_3D_MULTISAMPLE << 16 | (3 - 2)); + OUT_BATCH(_3DSTATE_MULTISAMPLE << 16 | (3 - 2)); OUT_BATCH(MS_PIXEL_LOCATION_CENTER | MS_NUMSAMPLES_1); OUT_BATCH(0); /* positions for 4/8-sample */ ADVANCE_BATCH(); BEGIN_BATCH(2); - OUT_BATCH(CMD_3D_SAMPLE_MASK << 16 | (2 - 2)); + OUT_BATCH(_3DSTATE_SAMPLE_MASK << 16 | (2 - 2)); OUT_BATCH(1); ADVANCE_BATCH(); for (i = 0; i < 4; i++) { BEGIN_BATCH(4); - OUT_BATCH(CMD_GS_SVB_INDEX << 16 | (4 - 2)); + OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); OUT_BATCH(i << SVB_INDEX_SHIFT); OUT_BATCH(0); OUT_BATCH(0xffffffff); @@ -565,7 +549,7 @@ static void upload_state_base_address( struct brw_context *brw ) BEGIN_BATCH(10); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0, + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); /* Surface state base address */ OUT_BATCH(1); /* Dynamic state base address */ OUT_BATCH(1); /* Indirect object base address */ @@ -579,7 +563,7 @@ static void upload_state_base_address( struct brw_context *brw ) BEGIN_BATCH(8); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0, + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* Instruction base address */ @@ -591,7 +575,7 @@ static void upload_state_base_address( struct brw_context *brw ) BEGIN_BATCH(6); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); OUT_BATCH(1); /* General state base address */ - OUT_RELOC(intel->batch->buf, I915_GEM_DOMAIN_SAMPLER, 0, + OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* General state upper bound */ diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 94efa79109..7d653327e3 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -36,7 +36,7 @@ #include "program/program.h" #include "program/programopt.h" #include "tnl/tnl.h" -#include "talloc.h" +#include "../glsl/ralloc.h" #include "brw_context.h" #include "brw_wm.h" @@ -115,7 +115,7 @@ shader_error(struct gl_context *ctx, struct gl_program *prog, const char *msg) shader = _mesa_lookup_shader_program(ctx, prog->Id); if (shader) { - shader->InfoLog = talloc_strdup_append(shader->InfoLog, msg); + ralloc_strcat(&shader->InfoLog, msg); shader->LinkStatus = GL_FALSE; } } diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index f28f28663e..b41d05dd43 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -177,7 +177,7 @@ brw_end_query(struct gl_context *ctx, struct gl_query_object *q) ADVANCE_BATCH(); } - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); } else { /* Flush the batchbuffer in case it has writes to our query BO. * Have later queries write to a new query BO so that further rendering @@ -185,7 +185,7 @@ brw_end_query(struct gl_context *ctx, struct gl_query_object *q) */ if (query->bo) { brw_emit_query_end(brw); - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); drm_intel_bo_unreference(brw->query.bo); brw->query.bo = NULL; @@ -232,6 +232,12 @@ brw_prepare_query_begin(struct brw_context *brw) brw->query.bo = NULL; brw->query.bo = drm_intel_bo_alloc(intel->bufmgr, "query", 4096, 1); + + /* clear target buffer */ + drm_intel_bo_map(brw->query.bo, GL_TRUE); + memset((char *)brw->query.bo->virtual, 0, 4096); + drm_intel_bo_unmap(brw->query.bo); + brw->query.index = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 3beed16945..86b0caa4a4 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -164,25 +164,18 @@ void brw_destroy_caches( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c */ -#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data( brw->intel.batch, (s), sizeof(*(s))) -#define BRW_CACHED_BATCH_STRUCT(brw, s) brw_cached_batch_struct( brw, (s), sizeof(*(s)) ) - -GLboolean brw_cached_batch_struct( struct brw_context *brw, - const void *data, - GLuint sz ); -void brw_destroy_batch_cache( struct brw_context *brw ); -void brw_clear_batch_cache( struct brw_context *brw ); +#define BRW_BATCH_STRUCT(brw, s) intel_batchbuffer_data(&brw->intel, (s), \ + sizeof(*(s)), false) + void *brw_state_batch(struct brw_context *brw, int size, int alignment, - drm_intel_bo **out_bo, uint32_t *out_offset); /* brw_wm_surface_state.c */ void brw_create_constant_surface(struct brw_context *brw, drm_intel_bo *bo, int width, - drm_intel_bo **out_bo, uint32_t *out_offset); #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index be3989eb7d..213c7a38d8 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -29,75 +29,10 @@ * Keith Whitwell <keith@tungstengraphics.com> */ - - #include "brw_state.h" #include "intel_batchbuffer.h" #include "main/imports.h" - - -/* A facility similar to the data caching code above, which aims to - * prevent identical commands being issued repeatedly. - */ -GLboolean brw_cached_batch_struct( struct brw_context *brw, - const void *data, - GLuint sz ) -{ - struct brw_cached_batch_item *item = brw->cached_batch_items; - struct header *newheader = (struct header *)data; - - if (brw->emit_state_always) { - intel_batchbuffer_data(brw->intel.batch, data, sz); - return GL_TRUE; - } - - while (item) { - if (item->header->opcode == newheader->opcode) { - if (item->sz == sz && memcmp(item->header, newheader, sz) == 0) - return GL_FALSE; - if (item->sz != sz) { - free(item->header); - item->header = malloc(sz); - item->sz = sz; - } - goto emit; - } - item = item->next; - } - - assert(!item); - item = CALLOC_STRUCT(brw_cached_batch_item); - item->header = malloc(sz); - item->sz = sz; - item->next = brw->cached_batch_items; - brw->cached_batch_items = item; - - emit: - memcpy(item->header, newheader, sz); - intel_batchbuffer_data(brw->intel.batch, data, sz); - return GL_TRUE; -} - -void brw_clear_batch_cache( struct brw_context *brw ) -{ - struct brw_cached_batch_item *item = brw->cached_batch_items; - - while (item) { - struct brw_cached_batch_item *next = item->next; - free((void *)item->header); - free(item); - item = next; - } - - brw->cached_batch_items = NULL; -} - -void brw_destroy_batch_cache( struct brw_context *brw ) -{ - brw_clear_batch_cache(brw); -} - /** * Allocates a block of space in the batchbuffer for indirect state. * @@ -116,13 +51,12 @@ void * brw_state_batch(struct brw_context *brw, int size, int alignment, - drm_intel_bo **out_bo, uint32_t *out_offset) { - struct intel_batchbuffer *batch = brw->intel.batch; + struct intel_batchbuffer *batch = &brw->intel.batch; uint32_t offset; - assert(size < batch->buf->size); + assert(size < batch->bo->size); offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment); /* If allocating from the top would wrap below the batchbuffer, or @@ -130,19 +64,13 @@ brw_state_batch(struct brw_context *brw, * space, then flush and try again. */ if (batch->state_batch_offset < size || - offset < batch->ptr - batch->map + batch->reserved_space) { - intel_batchbuffer_flush(batch); + offset < 4*batch->used + batch->reserved_space) { + intel_batchbuffer_flush(&brw->intel); offset = ROUND_DOWN_TO(batch->state_batch_offset - size, alignment); } batch->state_batch_offset = offset; - if (*out_bo != batch->buf) { - drm_intel_bo_unreference(*out_bo); - drm_intel_bo_reference(batch->buf); - *out_bo = batch->buf; - } - *out_offset = offset; - return batch->map + offset; + return batch->map + (offset>>2); } diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 58ff528d44..01eeb19a68 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -58,8 +58,6 @@ #include "main/imports.h" #include "brw_state.h" -#include "intel_batchbuffer.h" -#include "brw_wm.h" #define FILE_DEBUG_FLAG DEBUG_STATE @@ -433,8 +431,6 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache) void brw_state_cache_check_size(struct brw_context *brw) { - DBG("%s (n_items=%d)\n", __FUNCTION__, brw->cache.n_items); - /* un-tuned guess. Each object is generally a page, so 1000 of them is 4 MB of * state cache. */ diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index e262887471..fdce79da2f 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -26,6 +26,7 @@ */ #include "main/mtypes.h" +#include "intel_batchbuffer.h" #include "brw_context.h" #include "brw_defines.h" @@ -54,7 +55,8 @@ state_out(const char *name, void *data, uint32_t hw_offset, int index, /** Generic, undecoded state buffer debug printout */ static void -state_struct_out(const char *name, drm_intel_bo *buffer, unsigned int state_size) +state_struct_out(const char *name, drm_intel_bo *buffer, + unsigned int offset, unsigned int size) { int i; @@ -62,8 +64,8 @@ state_struct_out(const char *name, drm_intel_bo *buffer, unsigned int state_size return; drm_intel_bo_map(buffer, GL_FALSE); - for (i = 0; i < state_size / 4; i++) { - state_out(name, buffer->virtual, buffer->offset, i, + for (i = 0; i < size / 4; i++) { + state_out(name, buffer->virtual + offset, buffer->offset + offset, i, "dword %d\n", i); } drm_intel_bo_unmap(buffer); @@ -98,21 +100,25 @@ get_965_surface_format(unsigned int surface_format) static void dump_wm_surface_state(struct brw_context *brw) { + dri_bo *bo; + GLubyte *base; int i; + bo = brw->intel.batch.bo; + drm_intel_bo_map(bo, GL_FALSE); + base = bo->virtual; + for (i = 0; i < brw->wm.nr_surfaces; i++) { - drm_intel_bo *surf_bo = brw->wm.surf_bo[i]; unsigned int surfoff; struct brw_surface_state *surf; char name[20]; - if (surf_bo == NULL) { + if (brw->wm.surf_offset[i] == 0) { fprintf(stderr, "WM SURF%d: NULL\n", i); continue; } - drm_intel_bo_map(surf_bo, GL_FALSE); - surfoff = surf_bo->offset + brw->wm.surf_offset[i]; - surf = (struct brw_surface_state *)(surf_bo->virtual + brw->wm.surf_offset[i]); + surfoff = bo->offset + brw->wm.surf_offset[i]; + surf = (struct brw_surface_state *)(base + brw->wm.surf_offset[i]); sprintf(name, "WM SURF%d", i); state_out(name, surf, surfoff, 0, "%s %s\n", @@ -127,9 +133,8 @@ static void dump_wm_surface_state(struct brw_context *brw) surf->ss4.min_lod); state_out(name, surf, surfoff, 5, "x,y offset: %d,%d\n", surf->ss5.x_offset, surf->ss5.y_offset); - - drm_intel_bo_unmap(surf_bo); } + drm_intel_bo_unmap(bo); } @@ -280,13 +285,14 @@ static void dump_cc_state(struct brw_context *brw) const char *name = "CC"; struct gen6_color_calc_state *cc; uint32_t cc_off; + dri_bo *bo = brw->intel.batch.bo; - if (brw->cc.state_bo == NULL) + if (brw->cc.state_offset == 0) return; - drm_intel_bo_map(brw->cc.state_bo, GL_FALSE); - cc = brw->cc.state_bo->virtual; - cc_off = brw->cc.state_bo->offset; + drm_intel_bo_map(bo, GL_FALSE); + cc = bo->virtual; + cc_off = bo->offset; state_out(name, cc, cc_off, 0, "alpha test format %s, round disable %d, stencil ref %d," "bf stencil ref %d\n", @@ -300,7 +306,7 @@ static void dump_cc_state(struct brw_context *brw) state_out(name, cc, cc_off, 4, "constant blue %f\n", cc->constant_b); state_out(name, cc, cc_off, 5, "constant alpha %f\n", cc->constant_a); - drm_intel_bo_unmap(brw->cc.state_bo); + drm_intel_bo_unmap(bo); } @@ -369,26 +375,29 @@ void brw_debug_batch(struct intel_context *intel) { struct brw_context *brw = brw_context(&intel->ctx); - state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces); + state_struct_out("WM bind", + brw->intel.batch.bo, + brw->wm.bind_bo_offset, + 4 * brw->wm.nr_surfaces); dump_wm_surface_state(brw); dump_wm_sampler_state(brw); if (intel->gen < 6) - state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state)); + state_struct_out("VS", brw->vs.state_bo, 0, sizeof(struct brw_vs_unit_state)); brw_debug_prog("VS prog", brw->vs.prog_bo); if (intel->gen < 6) - state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state)); + state_struct_out("GS", brw->gs.state_bo, 0, sizeof(struct brw_gs_unit_state)); brw_debug_prog("GS prog", brw->gs.prog_bo); if (intel->gen < 6) { - state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state)); + state_struct_out("SF", brw->sf.state_bo, 0, sizeof(struct brw_sf_unit_state)); brw_debug_prog("SF prog", brw->sf.prog_bo); } dump_sf_viewport_state(brw); if (intel->gen < 6) - state_struct_out("WM", brw->wm.state_bo, sizeof(struct brw_wm_unit_state)); + state_struct_out("WM", brw->wm.state_bo, 0, sizeof(struct brw_wm_unit_state)); brw_debug_prog("WM prog", brw->wm.prog_bo); if (intel->gen >= 6) { diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index eba4411ca7..6f521be659 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -104,7 +104,7 @@ static const struct brw_tracked_state *gen4_atoms[] = &brw_constant_buffer }; -const struct brw_tracked_state *gen6_atoms[] = +static const struct brw_tracked_state *gen6_atoms[] = { &brw_check_fallback, @@ -169,25 +169,49 @@ const struct brw_tracked_state *gen6_atoms[] = void brw_init_state( struct brw_context *brw ) { + const struct brw_tracked_state **atoms; + int num_atoms; + brw_init_caches(brw); + + if (brw->intel.gen >= 6) { + atoms = gen6_atoms; + num_atoms = ARRAY_SIZE(gen6_atoms); + } else { + atoms = gen4_atoms; + num_atoms = ARRAY_SIZE(gen4_atoms); + } + + while (num_atoms--) { + assert((*atoms)->dirty.mesa | + (*atoms)->dirty.brw | + (*atoms)->dirty.cache); + + if ((*atoms)->prepare) + brw->prepare_atoms[brw->num_prepare_atoms++] = **atoms; + if ((*atoms)->emit) + brw->emit_atoms[brw->num_emit_atoms++] = **atoms; + atoms++; + } + assert(brw->num_emit_atoms <= ARRAY_SIZE(brw->emit_atoms)); + assert(brw->num_prepare_atoms <= ARRAY_SIZE(brw->prepare_atoms)); } void brw_destroy_state( struct brw_context *brw ) { brw_destroy_caches(brw); - brw_destroy_batch_cache(brw); } /*********************************************************************** */ -static GLboolean check_state( const struct brw_state_flags *a, - const struct brw_state_flags *b ) +static GLuint check_state( const struct brw_state_flags *a, + const struct brw_state_flags *b ) { - return ((a->mesa & b->mesa) || - (a->brw & b->brw) || - (a->cache & b->cache)); + return ((a->mesa & b->mesa) | + (a->brw & b->brw) | + (a->cache & b->cache)) != 0; } static void accumulate_state( struct brw_state_flags *a, @@ -233,7 +257,6 @@ static struct dirty_bit_map mesa_bits[] = { DEFINE_BIT(_NEW_MODELVIEW), DEFINE_BIT(_NEW_PROJECTION), DEFINE_BIT(_NEW_TEXTURE_MATRIX), - DEFINE_BIT(_NEW_ACCUM), DEFINE_BIT(_NEW_COLOR), DEFINE_BIT(_NEW_DEPTH), DEFINE_BIT(_NEW_EVAL), @@ -279,6 +302,10 @@ static struct dirty_bit_map brw_bits[] = { DEFINE_BIT(BRW_NEW_VERTICES), DEFINE_BIT(BRW_NEW_BATCH), DEFINE_BIT(BRW_NEW_DEPTH_BUFFER), + DEFINE_BIT(BRW_NEW_NR_WM_SURFACES), + DEFINE_BIT(BRW_NEW_NR_VS_SURFACES), + DEFINE_BIT(BRW_NEW_VS_CONSTBUF), + DEFINE_BIT(BRW_NEW_WM_CONSTBUF), {0, 0, 0} }; @@ -340,24 +367,16 @@ void brw_validate_state( struct brw_context *brw ) struct gl_context *ctx = &brw->intel.ctx; struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; + const struct brw_tracked_state *atoms = brw->prepare_atoms; + int num_atoms = brw->num_prepare_atoms; GLuint i; - const struct brw_tracked_state **atoms; - int num_atoms; brw_clear_validated_bos(brw); state->mesa |= brw->intel.NewGLState; brw->intel.NewGLState = 0; - brw_add_validated_bo(brw, intel->batch->buf); - - if (intel->gen >= 6) { - atoms = gen6_atoms; - num_atoms = ARRAY_SIZE(gen6_atoms); - } else { - atoms = gen4_atoms; - num_atoms = ARRAY_SIZE(gen4_atoms); - } + brw_add_validated_bo(brw, intel->batch.bo); if (brw->emit_state_always) { state->mesa |= ~0; @@ -375,27 +394,20 @@ void brw_validate_state( struct brw_context *brw ) brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; } - if (state->mesa == 0 && - state->cache == 0 && - state->brw == 0) + if ((state->mesa | state->cache | state->brw) == 0) return; - if (brw->state.dirty.brw & BRW_NEW_CONTEXT) - brw_clear_batch_cache(brw); - brw->intel.Fallback = GL_FALSE; /* boolean, not bitfield */ /* do prepare stage for all atoms */ for (i = 0; i < num_atoms; i++) { - const struct brw_tracked_state *atom = atoms[i]; - - if (brw->intel.Fallback) - break; + const struct brw_tracked_state *atom = &atoms[i]; if (check_state(state, &atom->dirty)) { - if (atom->prepare) { - atom->prepare(brw); - } + atom->prepare(brw); + + if (brw->intel.Fallback) + break; } } @@ -418,20 +430,11 @@ void brw_validate_state( struct brw_context *brw ) void brw_upload_state(struct brw_context *brw) { - struct intel_context *intel = &brw->intel; struct brw_state_flags *state = &brw->state.dirty; + const struct brw_tracked_state *atoms = brw->emit_atoms; + int num_atoms = brw->num_emit_atoms; int i; static int dirty_count = 0; - const struct brw_tracked_state **atoms; - int num_atoms; - - if (intel->gen >= 6) { - atoms = gen6_atoms; - num_atoms = ARRAY_SIZE(gen6_atoms); - } else { - atoms = gen4_atoms; - num_atoms = ARRAY_SIZE(gen4_atoms); - } brw_clear_validated_bos(brw); @@ -445,20 +448,14 @@ void brw_upload_state(struct brw_context *brw) prev = *state; for (i = 0; i < num_atoms; i++) { - const struct brw_tracked_state *atom = atoms[i]; + const struct brw_tracked_state *atom = &atoms[i]; struct brw_state_flags generated; - assert(atom->dirty.mesa || - atom->dirty.brw || - atom->dirty.cache); - if (brw->intel.Fallback) break; if (check_state(state, &atom->dirty)) { - if (atom->emit) { - atom->emit( brw ); - } + atom->emit(brw); } accumulate_state(&examined, &atom->dirty); @@ -474,15 +471,13 @@ void brw_upload_state(struct brw_context *brw) } else { for (i = 0; i < num_atoms; i++) { - const struct brw_tracked_state *atom = atoms[i]; + const struct brw_tracked_state *atom = &atoms[i]; if (brw->intel.Fallback) break; if (check_state(state, &atom->dirty)) { - if (atom->emit) { - atom->emit( brw ); - } + atom->emit(brw); } } } diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 8f97bd136f..6687a89e80 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -1017,7 +1017,14 @@ struct brw_wm_unit_state GLuint enable_32_pix:1; GLuint enable_con_32_pix:1; GLuint enable_con_64_pix:1; - GLuint pad0:5; + GLuint pad0:1; + + /* These next four bits are for Ironlake+ */ + GLuint fast_span_coverage_enable:1; + GLuint depth_buffer_clear:1; + GLuint depth_buffer_resolve_enable:1; + GLuint hierarchical_depth_buffer_resolve_enable:1; + GLuint legacy_global_depth_bias:1; GLuint line_stipple:1; GLuint depth_offset:1; @@ -1064,6 +1071,15 @@ struct brw_sampler_default_color { GLfloat color[4]; }; +struct gen5_sampler_default_color { + uint8_t ub[4]; + float f[4]; + uint16_t hf[4]; + uint16_t us[4]; + int16_t s[4]; + uint8_t b[4]; +}; + struct brw_sampler_state { @@ -1169,7 +1185,12 @@ struct brw_surface_state GLuint cube_neg_y:1; GLuint cube_pos_x:1; GLuint cube_neg_x:1; - GLuint pad:4; + GLuint pad:2; + /* Required on gen6 for surfaces accessed through render cache messages. + */ + GLuint render_cache_read_write:1; + /* Ironlake and newer: instead of replicating one of the texels */ + GLuint cube_corner_average:1; GLuint mipmap_layout_mode:1; GLuint vert_line_stride_ofs:1; GLuint vert_line_stride:1; @@ -1651,6 +1672,18 @@ struct brw_instruction struct { GLuint binding_table_index:8; + GLuint msg_control:3; + GLuint msg_type:3; + GLuint target_cache:2; + GLuint response_length:4; + GLuint msg_length:4; + GLuint msg_target:4; + GLuint pad1:3; + GLuint end_of_thread:1; + } dp_read_g4x; + + struct { + GLuint binding_table_index:8; GLuint msg_control:3; GLuint msg_type:3; GLuint target_cache:2; diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c index dfc1551aca..b0419d8a42 100644 --- a/src/mesa/drivers/dri/i965/brw_urb.c +++ b/src/mesa/drivers/dri/i965/brw_urb.c @@ -248,5 +248,13 @@ void brw_upload_urb_fence(struct brw_context *brw) uf.bits1.sf_fence = brw->urb.cs_start; uf.bits1.cs_fence = brw->urb.size; + /* erratum: URB_FENCE must not cross a 64byte cacheline */ + if ((brw->intel.batch.used & 15) > 12) { + int pad = 16 - (brw->intel.batch.used & 15); + do + brw->intel.batch.map[brw->intel.batch.used++] = MI_NOOP; + while (--pad); + } + BRW_BATCH_STRUCT(brw, &uf); } diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c index e878da3850..d28d9abcb3 100644 --- a/src/mesa/drivers/dri/i965/brw_util.c +++ b/src/mesa/drivers/dri/i965/brw_util.c @@ -37,16 +37,6 @@ #include "brw_util.h" #include "brw_defines.h" -GLuint brw_count_bits(uint64_t val) -{ - GLuint i; - for (i = 0; val ; val >>= 1) - if (val & 1) - i++; - return i; -} - - GLuint brw_translate_blend_equation( GLenum mode ) { switch (mode) { diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h index 04f3175d3e..940a871550 100644 --- a/src/mesa/drivers/dri/i965/brw_util.h +++ b/src/mesa/drivers/dri/i965/brw_util.h @@ -35,7 +35,14 @@ #include "main/mtypes.h" -extern GLuint brw_count_bits(uint64_t val); +#ifdef __GNUC__ +#define brw_count_bits(v) __builtin_popcount(v) +#else +static inline GLuint brw_count_bits(uint64_t v) +{ + return _mesa_popcount(v>>32) + _mesa_popcount(v&0xffffffff); +} +#endif extern GLuint brw_parameter_list_state_flags(struct gl_program_parameter_list *paramList); extern GLuint brw_translate_blend_factor( GLenum factor ); extern GLuint brw_translate_blend_equation( GLenum mode ); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 59f270d675..6ae75d22c1 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -130,6 +130,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) key.nr_userclip = brw_count_bits(ctx->Transform.ClipPlanesEnabled); key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || ctx->Polygon.BackMode != GL_FILL); + key.two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); /* _NEW_POINT */ if (ctx->Point.PointSprite) { @@ -157,7 +158,7 @@ static void brw_upload_vs_prog(struct brw_context *brw) */ const struct brw_tracked_state brw_vs_prog = { .dirty = { - .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT, + .mesa = _NEW_TRANSFORM | _NEW_POLYGON | _NEW_POINT | _NEW_LIGHT, .brw = BRW_NEW_VERTEX_PROGRAM, .cache = 0 }, diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 9338a6b7db..0b88cc1ec7 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -44,6 +44,7 @@ struct brw_vs_prog_key { GLuint nr_userclip:4; GLuint copy_edgeflag:1; GLuint point_coord_replace:8; + GLuint two_side_color: 1; }; diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index e1a3f33393..6ec62554cc 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -140,11 +140,13 @@ clear_current_const(struct brw_vs_compile *c) static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { struct intel_context *intel = &c->func.brw->intel; - GLuint i, reg = 0, mrf; + GLuint i, reg = 0, mrf, j; int attributes_in_vue; int first_reladdr_output; int max_constant; int constant = 0; + int vert_result_reoder[VERT_RESULT_MAX]; + int bfc = 0; /* Determine whether to use a real constant buffer or use a block * of GRF registers for constants. The later is faster but only @@ -254,7 +256,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } reg += (constant + 1) / 2; c->prog_data.curb_read_length = reg - 1; - c->prog_data.nr_params = constant; + c->prog_data.nr_params = constant * 4; /* XXX 0 causes a bug elsewhere... */ if (intel->gen < 6 && c->prog_data.nr_params == 0) c->prog_data.nr_params = 4; @@ -291,7 +293,36 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) mrf = 4; first_reladdr_output = get_first_reladdr_output(&c->vp->program); - for (i = 0; i < VERT_RESULT_MAX; i++) { + + for (i = 0; i < VERT_RESULT_MAX; i++) + vert_result_reoder[i] = i; + + /* adjust attribute order in VUE for BFC0/BFC1 on Gen6+ */ + if (intel->gen >= 6 && c->key.two_side_color) { + if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) && + (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) { + assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)); + assert(c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)); + bfc = 2; + } else if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) && + (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0))) + bfc = 1; + + if (bfc) { + for (i = 0; i < bfc; i++) { + vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 0] = VERT_RESULT_COL0 + i; + vert_result_reoder[VERT_RESULT_COL0 + i * 2 + 1] = VERT_RESULT_BFC0 + i; + } + + for (i = VERT_RESULT_COL0 + bfc * 2; i < VERT_RESULT_BFC0 + bfc; i++) { + vert_result_reoder[i] = i - bfc; + } + } + } + + for (j = 0; j < VERT_RESULT_MAX; j++) { + i = vert_result_reoder[j]; + if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) { c->nr_outputs++; assert(i < Elements(c->regs[PROGRAM_OUTPUT])); @@ -627,6 +658,22 @@ static void emit_min( struct brw_compile *p, } } +static void emit_arl(struct brw_compile *p, + struct brw_reg dst, + struct brw_reg src) +{ + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6) { + struct brw_reg dst_f = retype(dst, BRW_REGISTER_TYPE_F); + + brw_RNDD(p, dst_f, src); + brw_MOV(p, dst, dst_f); + } else { + brw_RNDD(p, dst, src); + } +} + static void emit_math1_gen4(struct brw_vs_compile *c, GLuint function, struct brw_reg dst, @@ -1072,8 +1119,6 @@ get_constant(struct brw_vs_compile *c, assert(argIndex < 3); - assert(c->func.brw->intel.gen < 6); /* FINISHME */ - if (c->current_const[argIndex].index != src->Index) { /* Keep track of the last constant loaded in this slot, for reuse. */ c->current_const[argIndex].index = src->Index; @@ -1091,7 +1136,7 @@ get_constant(struct brw_vs_compile *c, } /* replicate lower four floats into upper half (to get XYZWXYZW) */ - const_reg = stride(const_reg, 0, 4, 0); + const_reg = stride(const_reg, 0, 4, 1); const_reg.subnr = 0; return const_reg; @@ -1104,14 +1149,14 @@ get_reladdr_constant(struct brw_vs_compile *c, { const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + struct intel_context *intel = &brw->intel; struct brw_reg const_reg = c->current_const[argIndex].reg; - struct brw_reg addrReg = c->regs[PROGRAM_ADDRESS][0]; - struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + uint32_t offset; assert(argIndex < 3); - assert(c->func.brw->intel.gen < 6); /* FINISHME */ - /* Can't reuse a reladdr constant load. */ c->current_const[argIndex].index = -1; @@ -1120,15 +1165,21 @@ get_reladdr_constant(struct brw_vs_compile *c, src->Index, argIndex, c->current_const[argIndex].reg.nr); #endif - brw_MUL(p, byte_addr_reg, addrReg, brw_imm_ud(16)); + if (intel->gen >= 6) { + offset = src->Index; + } else { + struct brw_reg byte_addr_reg = retype(get_tmp(c), BRW_REGISTER_TYPE_D); + brw_MUL(p, byte_addr_reg, addr_reg, brw_imm_d(16)); + addr_reg = byte_addr_reg; + offset = 16 * src->Index; + } /* fetch the first vec4 */ brw_dp_READ_4_vs_relative(p, - const_reg, /* writeback dest */ - byte_addr_reg, /* address register */ - 16 * src->Index, /* byte offset */ - SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ - ); + const_reg, + addr_reg, + offset, + SURF_INDEX_VERT_CONST_BUFFER); return const_reg; } @@ -1375,11 +1426,10 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, GET_SWZ(src->Swizzle, 1), GET_SWZ(src->Swizzle, 2), GET_SWZ(src->Swizzle, 3)); - } - /* Note this is ok for non-swizzle instructions: - */ - reg.negate = src->Negate ? 1 : 0; + /* Note this is ok for non-swizzle ARB_vp instructions */ + reg.negate = src->Negate ? 1 : 0; + } return reg; } @@ -1511,6 +1561,7 @@ static void emit_vertex_write( struct brw_vs_compile *c) int eot; GLuint len_vertex_header = 2; int next_mrf, i; + int msg_len; if (c->key.copy_edgeflag) { brw_MOV(p, @@ -1677,13 +1728,20 @@ static void emit_vertex_write( struct brw_vs_compile *c) eot = (c->first_overflow_output == 0); + msg_len = c->nr_outputs + 2 + len_vertex_header; + if (intel->gen >= 6) { + /* interleaved urb write message length for gen6 should be multiple of 2 */ + if ((msg_len % 2) != 0) + msg_len++; + } + brw_urb_WRITE(p, brw_null_reg(), /* dest */ 0, /* starting mrf reg nr */ c->r0, /* src */ 0, /* allocate */ 1, /* used */ - MIN2(c->nr_outputs + 1 + len_vertex_header, (BRW_MAX_MRF-1)), /* msg len */ + MIN2(msg_len - 1, (BRW_MAX_MRF - 1)), /* msg len */ 0, /* response len */ eot, /* eot */ eot, /* writes complete */ @@ -1892,6 +1950,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) switch (inst->Opcode) { case OPCODE_ABS: + args[0].negate = false; brw_MOV(p, dst, brw_abs(args[0])); break; case OPCODE_ADD: @@ -1928,7 +1987,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL); break; case OPCODE_ARL: - brw_RNDD(p, dst, args[0]); + emit_arl(p, dst, args[0]); break; case OPCODE_FLR: brw_RNDD(p, dst, args[0]); diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index be92313861..c3a7cc247c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -96,7 +96,14 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) * and those dwords will be written to the second URB handle when we * brw_urb_WRITE() results. */ - vs.thread1.single_program_flow = 0; + /* Disable single program flow on Ironlake. We cannot reliably get + * all applications working without it. See: + * https://bugs.freedesktop.org/show_bug.cgi?id=29172 + * + * The most notable and reliably failing application is the Humus + * demo "CelShading" + */ + vs.thread1.single_program_flow = (intel->gen == 5); if (intel->gen == 5) vs.thread1.binding_table_entry_count = 0; /* hardware requirement */ diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c index eabac51160..48cf265e51 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c @@ -82,6 +82,15 @@ prepare_vs_constants(struct brw_context *brw) params->ParameterValues[i], 4 * sizeof(float)); } + + if (0) { + for (i = 0; i < params->NumParameters; i++) { + float *row = (float *)brw->vs.const_bo->virtual + i * 4; + printf("vs const surface %3d: %4.3f %4.3f %4.3f %4.3f\n", + i, row[0], row[1], row[2], row[3]); + } + } + drm_intel_gem_bo_unmap_gtt(brw->vs.const_bo); brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF; } @@ -115,13 +124,11 @@ brw_update_vs_constant_surface( struct gl_context *ctx, * it. */ if (brw->vs.const_bo == NULL) { - drm_intel_bo_unreference(brw->vs.surf_bo[surf]); - brw->vs.surf_bo[surf] = NULL; + brw->vs.surf_offset[surf] = 0; return; } brw_create_constant_surface(brw, brw->vs.const_bo, params->NumParameters, - &brw->vs.surf_bo[surf], &brw->vs.surf_offset[surf]); } @@ -157,11 +164,10 @@ static void upload_vs_surfaces(struct brw_context *brw) /* BRW_NEW_NR_VS_SURFACES */ if (brw->vs.nr_surfaces == 0) { - if (brw->vs.bind_bo) { - drm_intel_bo_unreference(brw->vs.bind_bo); - brw->vs.bind_bo = NULL; + if (brw->vs.bind_bo_offset) { brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE; } + brw->vs.bind_bo_offset = 0; return; } @@ -171,15 +177,11 @@ static void upload_vs_surfaces(struct brw_context *brw) * space for the binding table. (once we have vs samplers) */ bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_VS_MAX_SURF, - 32, &brw->vs.bind_bo, &brw->vs.bind_bo_offset); + 32, &brw->vs.bind_bo_offset); for (i = 0; i < BRW_VS_MAX_SURF; i++) { /* BRW_NEW_VS_CONSTBUF */ - if (brw->vs.surf_bo[i]) { - bind[i] = brw->vs.surf_offset[i]; - } else { - bind[i] = 0; - } + bind[i] = brw->vs.surf_offset[i]; } brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE; diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 3d7a98c981..152ee14156 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -73,15 +73,11 @@ static void brw_destroy_context( struct intel_context *intel ) free(brw->wm.compile_data); } - for (i = 0; i < brw->state.nr_color_regions; i++) - intel_region_release(&brw->state.color_regions[i]); - brw->state.nr_color_regions = 0; intel_region_release(&brw->state.depth_region); dri_bo_release(&brw->curbe.curbe_bo); dri_bo_release(&brw->vs.prog_bo); dri_bo_release(&brw->vs.state_bo); - dri_bo_release(&brw->vs.bind_bo); dri_bo_release(&brw->vs.const_bo); dri_bo_release(&brw->gs.prog_bo); dri_bo_release(&brw->gs.state_bo); @@ -93,16 +89,12 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->sf.vp_bo); for (i = 0; i < BRW_MAX_TEX_UNIT; i++) dri_bo_release(&brw->wm.sdc_bo[i]); - dri_bo_release(&brw->wm.bind_bo); - for (i = 0; i < BRW_WM_MAX_SURF; i++) - dri_bo_release(&brw->wm.surf_bo[i]); dri_bo_release(&brw->wm.sampler_bo); dri_bo_release(&brw->wm.prog_bo); dri_bo_release(&brw->wm.state_bo); dri_bo_release(&brw->wm.const_bo); dri_bo_release(&brw->wm.push_const_bo); dri_bo_release(&brw->cc.prog_bo); - dri_bo_release(&brw->cc.state_bo); dri_bo_release(&brw->cc.vp_bo); dri_bo_release(&brw->cc.blend_state_bo); dri_bo_release(&brw->cc.depth_stencil_state_bo); @@ -122,20 +114,14 @@ static void brw_set_draw_region( struct intel_context *intel, GLuint num_color_regions) { struct brw_context *brw = brw_context(&intel->ctx); - GLuint i; /* release old color/depth regions */ if (brw->state.depth_region != depth_region) brw->state.dirty.brw |= BRW_NEW_DEPTH_BUFFER; - for (i = 0; i < brw->state.nr_color_regions; i++) - intel_region_release(&brw->state.color_regions[i]); intel_region_release(&brw->state.depth_region); /* reference new color/depth regions */ - for (i = 0; i < num_color_regions; i++) - intel_region_reference(&brw->state.color_regions[i], color_regions[i]); intel_region_reference(&brw->state.depth_region, depth_region); - brw->state.nr_color_regions = num_color_regions; } @@ -173,14 +159,7 @@ static void brw_new_batch( struct intel_context *intel ) brw->state.dirty.brw |= ~0; brw->state.dirty.cache |= ~0; - /* Move to the end of the current upload buffer so that we'll force choosing - * a new buffer next time. - */ - if (brw->vb.upload.bo != NULL) { - drm_intel_bo_unreference(brw->vb.upload.bo); - brw->vb.upload.bo = NULL; - brw->vb.upload.offset = 0; - } + brw->vb.nr_current_buffers = 0; } static void brw_invalidate_state( struct intel_context *intel, GLuint new_state ) @@ -203,4 +182,5 @@ void brwInitVtbl( struct brw_context *brw ) brw->intel.vtbl.destroy = brw_destroy_context; brw->intel.vtbl.set_draw_region = brw_set_draw_region; brw->intel.vtbl.debug_batch = brw_debug_batch; + brw->intel.vtbl.render_target_supported = brw_render_target_supported; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index e0aa3fd7f2..ca51d1599a 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -284,6 +284,7 @@ static void brw_wm_populate_key( struct brw_context *brw, /* Build the index for table lookup */ /* _NEW_COLOR */ + key->alpha_test = ctx->Color.AlphaEnabled; if (fp->program.UsesKill || ctx->Color.AlphaEnabled) lookup |= IZ_PS_KILL_ALPHATEST_BIT; @@ -364,8 +365,6 @@ static void brw_wm_populate_key( struct brw_context *brw, SWIZZLE_NIL }; - key->tex_swizzles[i] = SWIZZLE_NOOP; - /* GL_DEPTH_TEXTURE_MODE is normally handled through * brw_wm_surface_state, but it applies to shadow compares as * well and our shadow compares always return the result in @@ -378,6 +377,11 @@ static void brw_wm_populate_key( struct brw_context *brw, swizzles[2] = SWIZZLE_ZERO; } else if (t->DepthMode == GL_LUMINANCE) { swizzles[3] = SWIZZLE_ONE; + } else if (t->DepthMode == GL_RED) { + /* See table 3.23 of the GL 3.0 spec. */ + swizzles[1] = SWIZZLE_ZERO; + swizzles[2] = SWIZZLE_ZERO; + swizzles[3] = SWIZZLE_ONE; } } @@ -427,7 +431,8 @@ static void brw_wm_populate_key( struct brw_context *brw, key->render_to_fbo = ctx->DrawBuffer->Name != 0; } - key->nr_color_regions = brw->state.nr_color_regions; + /* _NEW_BUFFERS */ + key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; /* CACHE_NEW_VS_PROG */ key->vp_outputs_written = brw->vs.prog_data->outputs_written; diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index e7f3cfbb75..90771e1f50 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -64,6 +64,7 @@ struct brw_wm_prog_key { GLuint linear_color:1; /**< linear interpolation vs perspective interp */ GLuint nr_color_regions:5; GLuint render_to_fbo:1; + GLuint alpha_test:1; GLbitfield proj_attrib_mask; /**< one bit per fragment program attribute */ GLuint shadowtex_mask:16; @@ -474,5 +475,6 @@ struct gl_shader *brw_new_shader(struct gl_context *ctx, GLuint name, GLuint typ struct gl_shader_program *brw_new_shader_program(struct gl_context *ctx, GLuint name); bool brw_color_buffer_write_enabled(struct brw_context *brw); +bool brw_render_target_supported(gl_format format); #endif diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c index a0e86034e1..2336e27c1e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_emit.c +++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c @@ -219,43 +219,45 @@ void emit_wpos_xy(struct brw_wm_compile *c, const struct brw_reg *arg0) { struct brw_compile *p = &c->func; + struct intel_context *intel = &p->brw->intel; + struct brw_reg delta_x = retype(arg0[0], BRW_REGISTER_TYPE_W); + struct brw_reg delta_y = retype(arg0[1], BRW_REGISTER_TYPE_W); if (mask & WRITEMASK_X) { + if (intel->gen >= 6) { + struct brw_reg delta_x_f = retype(delta_x, BRW_REGISTER_TYPE_F); + brw_MOV(p, delta_x_f, delta_x); + delta_x = delta_x_f; + } + if (c->fp->program.PixelCenterInteger) { /* X' = X */ - brw_MOV(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_W)); + brw_MOV(p, dst[0], delta_x); } else { /* X' = X + 0.5 */ - brw_ADD(p, - dst[0], - retype(arg0[0], BRW_REGISTER_TYPE_W), - brw_imm_f(0.5)); + brw_ADD(p, dst[0], delta_x, brw_imm_f(0.5)); } } if (mask & WRITEMASK_Y) { + if (intel->gen >= 6) { + struct brw_reg delta_y_f = retype(delta_y, BRW_REGISTER_TYPE_F); + brw_MOV(p, delta_y_f, delta_y); + delta_y = delta_y_f; + } + if (c->fp->program.OriginUpperLeft) { if (c->fp->program.PixelCenterInteger) { /* Y' = Y */ - brw_MOV(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_W)); + brw_MOV(p, dst[1], delta_y); } else { - /* Y' = Y + 0.5 */ - brw_ADD(p, - dst[1], - retype(arg0[1], BRW_REGISTER_TYPE_W), - brw_imm_f(0.5)); + brw_ADD(p, dst[1], delta_y, brw_imm_f(0.5)); } } else { float center_offset = c->fp->program.PixelCenterInteger ? 0.0 : 0.5; /* Y' = (height - 1) - Y + center */ - brw_ADD(p, - dst[1], - negate(retype(arg0[1], BRW_REGISTER_TYPE_W)), + brw_ADD(p, dst[1], negate(delta_y), brw_imm_f(c->key.drawable_height - 1 + center_offset)); } } @@ -971,34 +973,23 @@ void emit_math2(struct brw_wm_compile *c, struct brw_reg temp_dst = dst[dst_chan]; if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - /* Both scalar arguments. Do scalar calc. */ - src0.hstride = BRW_HORIZONTAL_STRIDE_1; - src1.hstride = BRW_HORIZONTAL_STRIDE_1; - temp_dst.hstride = BRW_HORIZONTAL_STRIDE_1; - temp_dst.width = BRW_WIDTH_1; - - if (arg0[0].subnr != 0) { - brw_MOV(p, temp_dst, src0); - src0 = temp_dst; - - /* Ouch. We've used the temp as a dst, and we still - * need a temp to store arg1 in, because src and dst - * offsets have to be equal. Leaving this up to - * glsl2-965 to handle correctly. - */ - assert(arg1[0].subnr == 0); - } else if (arg1[0].subnr != 0) { - brw_MOV(p, temp_dst, src1); - src1 = temp_dst; - } - } else { - brw_MOV(p, temp_dst, src0); - src0 = temp_dst; - } - } else if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - brw_MOV(p, temp_dst, src1); - src1 = temp_dst; + brw_MOV(p, temp_dst, src0); + src0 = temp_dst; + } + + if (arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { + /* This is a heinous hack to get a temporary register for use + * in case both arg0 and arg1 are constants. Why you're + * doing exponentiation on constant values in the shader, we + * don't know. + * + * max_wm_grf is almost surely less than the maximum GRF, and + * gen6 doesn't care about the number of GRFs used in a + * shader like pre-gen6 did. + */ + struct brw_reg temp = brw_vec8_grf(c->max_wm_grf, 0); + brw_MOV(p, temp, src1); + src1 = temp; } brw_set_saturate(p, (mask & SATURATE) ? 1 : 0); @@ -1016,14 +1007,6 @@ void emit_math2(struct brw_wm_compile *c, sechalf(src0), sechalf(src1)); } - - /* Splat a scalar result into all the channels. */ - if (arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 && - arg1[0].hstride == BRW_HORIZONTAL_STRIDE_0) { - temp_dst.hstride = BRW_HORIZONTAL_STRIDE_0; - temp_dst.vstride = BRW_VERTICAL_STRIDE_0; - brw_MOV(p, dst[dst_chan], temp_dst); - } } else { GLuint saturate = ((mask & SATURATE) ? BRW_MATH_SATURATE_SATURATE : @@ -1350,9 +1333,11 @@ static void fire_fb_write( struct brw_wm_compile *c, dst = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); /* Pass through control information: + * + * Gen6 has done m1 mov in emit_fb_write() for current SIMD16 case. */ /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */ - if (intel->gen < 6) /* gen6, use headerless for fb write */ + if (intel->gen < 6) { brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */ @@ -1373,7 +1358,8 @@ static void fire_fb_write( struct brw_wm_compile *c, target, nr, 0, - eot); + eot, + GL_TRUE); } @@ -1518,7 +1504,8 @@ void emit_fb_write(struct brw_wm_compile *c, */ brw_push_insn_state(p); brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, brw_message_reg(0), brw_vec8_grf(0, 0)); + brw_MOV(p, retype(brw_message_reg(0), BRW_REGISTER_TYPE_UD), + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); brw_pop_insn_state(p); if (target != 0) { diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index fea96d3538..30672b4251 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -69,12 +69,43 @@ static GLuint translate_wrap_mode( GLenum wrap ) static drm_intel_bo *upload_default_color( struct brw_context *brw, const GLfloat *color ) { - struct brw_sampler_default_color sdc; + struct intel_context *intel = &brw->intel; - COPY_4V(sdc.color, color); - - return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, - &sdc, sizeof(sdc)); + if (intel->gen >= 5) { + struct gen5_sampler_default_color sdc; + + memset(&sdc, 0, sizeof(sdc)); + + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[0], color[0]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[1], color[1]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[2], color[2]); + UNCLAMPED_FLOAT_TO_UBYTE(sdc.ub[3], color[3]); + + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[0], color[0]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[1], color[1]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[2], color[2]); + UNCLAMPED_FLOAT_TO_USHORT(sdc.us[3], color[3]); + + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[0], color[0]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[1], color[1]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[2], color[2]); + UNCLAMPED_FLOAT_TO_SHORT(sdc.s[3], color[3]); + + /* XXX: Fill in half floats */ + /* XXX: Fill in signed bytes */ + + COPY_4V(sdc.f, color); + + return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, + &sdc, sizeof(sdc)); + } else { + struct brw_sampler_default_color sdc; + + COPY_4V(sdc.color, color); + + return brw_cache_data(&brw->cache, BRW_SAMPLER_DEFAULT_COLOR, + &sdc, sizeof(sdc)); + } } @@ -245,9 +276,8 @@ brw_wm_sampler_populate_key(struct brw_context *brw, struct wm_sampler_entry *entry = &key->sampler[unit]; struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; struct gl_texture_object *texObj = texUnit->_Current; - struct intel_texture_object *intelObj = intel_texture_object(texObj); struct gl_texture_image *firstImage = - texObj->Image[0][intelObj->firstLevel]; + texObj->Image[0][texObj->BaseLevel]; memset(last_entry_end, 0, (char*)entry - last_entry_end + sizeof(*entry)); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 82835470a3..5b5afc4626 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -47,7 +47,6 @@ struct brw_wm_unit_key { unsigned int dispatch_grf_start_reg; unsigned int curbe_offset; - unsigned int urb_size; unsigned int nr_surfaces, sampler_count; GLboolean uses_depth, computes_depth, uses_kill, is_glsl; @@ -87,7 +86,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) { struct gl_context *ctx = &brw->intel.ctx; const struct gl_fragment_program *fp = brw->fragment_program; - const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp; struct intel_context *intel = &brw->intel; memset(key, 0, sizeof(*key)); @@ -99,9 +97,6 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; key->total_scratch = brw->wm.prog_data->total_scratch; - /* BRW_NEW_URB_FENCE */ - key->urb_size = brw->urb.vsize; - /* BRW_NEW_CURBE_OFFSETS */ key->curbe_offset = brw->curbe.wm_start; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 76fc94df1f..e1f8f57a9d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -42,7 +42,7 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" - +#include "brw_wm.h" static GLuint translate_tex_target( GLenum target ) { @@ -68,104 +68,74 @@ static GLuint translate_tex_target( GLenum target ) } } +static uint32_t brw_format_for_mesa_format[MESA_FORMAT_COUNT] = +{ + [MESA_FORMAT_L8] = BRW_SURFACEFORMAT_L8_UNORM, + [MESA_FORMAT_I8] = BRW_SURFACEFORMAT_I8_UNORM, + [MESA_FORMAT_A8] = BRW_SURFACEFORMAT_A8_UNORM, + [MESA_FORMAT_AL88] = BRW_SURFACEFORMAT_L8A8_UNORM, + [MESA_FORMAT_AL1616] = BRW_SURFACEFORMAT_L16A16_UNORM, + [MESA_FORMAT_R8] = BRW_SURFACEFORMAT_R8_UNORM, + [MESA_FORMAT_R16] = BRW_SURFACEFORMAT_R16_UNORM, + [MESA_FORMAT_RG88] = BRW_SURFACEFORMAT_R8G8_UNORM, + [MESA_FORMAT_RG1616] = BRW_SURFACEFORMAT_R16G16_UNORM, + [MESA_FORMAT_ARGB8888] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM, + [MESA_FORMAT_XRGB8888] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM, + [MESA_FORMAT_RGB565] = BRW_SURFACEFORMAT_B5G6R5_UNORM, + [MESA_FORMAT_ARGB1555] = BRW_SURFACEFORMAT_B5G5R5A1_UNORM, + [MESA_FORMAT_ARGB4444] = BRW_SURFACEFORMAT_B4G4R4A4_UNORM, + [MESA_FORMAT_YCBCR_REV] = BRW_SURFACEFORMAT_YCRCB_NORMAL, + [MESA_FORMAT_YCBCR] = BRW_SURFACEFORMAT_YCRCB_SWAPUVY, + [MESA_FORMAT_RGB_FXT1] = BRW_SURFACEFORMAT_FXT1, + [MESA_FORMAT_RGBA_FXT1] = BRW_SURFACEFORMAT_FXT1, + [MESA_FORMAT_RGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB, + [MESA_FORMAT_RGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM, + [MESA_FORMAT_RGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM, + [MESA_FORMAT_RGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM, + [MESA_FORMAT_SRGB_DXT1] = BRW_SURFACEFORMAT_DXT1_RGB_SRGB, + [MESA_FORMAT_SRGBA_DXT1] = BRW_SURFACEFORMAT_BC1_UNORM_SRGB, + [MESA_FORMAT_SRGBA_DXT3] = BRW_SURFACEFORMAT_BC2_UNORM_SRGB, + [MESA_FORMAT_SRGBA_DXT5] = BRW_SURFACEFORMAT_BC3_UNORM_SRGB, + [MESA_FORMAT_SARGB8] = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB, + [MESA_FORMAT_SLA8] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB, + [MESA_FORMAT_SL8] = BRW_SURFACEFORMAT_L8_UNORM_SRGB, + [MESA_FORMAT_DUDV8] = BRW_SURFACEFORMAT_R8G8_SNORM, + [MESA_FORMAT_SIGNED_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_SNORM, + [MESA_FORMAT_RGBA8888_REV] = BRW_SURFACEFORMAT_R8G8B8A8_UNORM, +}; + +bool +brw_render_target_supported(gl_format format) +{ + if (format == MESA_FORMAT_S8_Z24 || + format == MESA_FORMAT_X8_Z24 || + format == MESA_FORMAT_Z16) { + return true; + } + + /* Not exactly true, as some of those formats are not renderable. + * But at least we know how to translate them. + */ + return brw_format_for_mesa_format[format] != 0; +} static GLuint translate_tex_format( gl_format mesa_format, GLenum internal_format, - GLenum depth_mode ) + GLenum depth_mode, + GLenum srgb_decode ) { switch( mesa_format ) { - case MESA_FORMAT_L8: - return BRW_SURFACEFORMAT_L8_UNORM; - - case MESA_FORMAT_I8: - return BRW_SURFACEFORMAT_I8_UNORM; - - case MESA_FORMAT_A8: - return BRW_SURFACEFORMAT_A8_UNORM; - - case MESA_FORMAT_AL88: - return BRW_SURFACEFORMAT_L8A8_UNORM; - - case MESA_FORMAT_AL1616: - return BRW_SURFACEFORMAT_L16A16_UNORM; - - case MESA_FORMAT_R8: - return BRW_SURFACEFORMAT_R8_UNORM; - - case MESA_FORMAT_R16: - return BRW_SURFACEFORMAT_R16_UNORM; - - case MESA_FORMAT_RG88: - return BRW_SURFACEFORMAT_R8G8_UNORM; - - case MESA_FORMAT_RG1616: - return BRW_SURFACEFORMAT_R16G16_UNORM; - - case MESA_FORMAT_RGB888: - assert(0); /* not supported for sampling */ - return BRW_SURFACEFORMAT_R8G8B8_UNORM; - - case MESA_FORMAT_ARGB8888: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - - case MESA_FORMAT_XRGB8888: - return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; - - case MESA_FORMAT_RGBA8888_REV: - _mesa_problem(NULL, "unexpected format in i965:translate_tex_format()"); - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; - - case MESA_FORMAT_RGB565: - return BRW_SURFACEFORMAT_B5G6R5_UNORM; - - case MESA_FORMAT_ARGB1555: - return BRW_SURFACEFORMAT_B5G5R5A1_UNORM; - - case MESA_FORMAT_ARGB4444: - return BRW_SURFACEFORMAT_B4G4R4A4_UNORM; - - case MESA_FORMAT_YCBCR_REV: - return BRW_SURFACEFORMAT_YCRCB_NORMAL; - - case MESA_FORMAT_YCBCR: - return BRW_SURFACEFORMAT_YCRCB_SWAPUVY; - - case MESA_FORMAT_RGB_FXT1: - case MESA_FORMAT_RGBA_FXT1: - return BRW_SURFACEFORMAT_FXT1; case MESA_FORMAT_Z16: if (depth_mode == GL_INTENSITY) return BRW_SURFACEFORMAT_I16_UNORM; else if (depth_mode == GL_ALPHA) return BRW_SURFACEFORMAT_A16_UNORM; + else if (depth_mode == GL_RED) + return BRW_SURFACEFORMAT_R16_UNORM; else return BRW_SURFACEFORMAT_L16_UNORM; - case MESA_FORMAT_RGB_DXT1: - return BRW_SURFACEFORMAT_DXT1_RGB; - - case MESA_FORMAT_RGBA_DXT1: - return BRW_SURFACEFORMAT_BC1_UNORM; - - case MESA_FORMAT_RGBA_DXT3: - return BRW_SURFACEFORMAT_BC2_UNORM; - - case MESA_FORMAT_RGBA_DXT5: - return BRW_SURFACEFORMAT_BC3_UNORM; - - case MESA_FORMAT_SARGB8: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; - - case MESA_FORMAT_SLA8: - return BRW_SURFACEFORMAT_L8A8_UNORM_SRGB; - - case MESA_FORMAT_SL8: - return BRW_SURFACEFORMAT_L8_UNORM_SRGB; - - case MESA_FORMAT_SRGB_DXT1: - return BRW_SURFACEFORMAT_BC1_UNORM_SRGB; - case MESA_FORMAT_S8_Z24: /* XXX: these different surface formats don't seem to * make any difference for shadow sampler/compares. @@ -174,18 +144,21 @@ static GLuint translate_tex_format( gl_format mesa_format, return BRW_SURFACEFORMAT_I24X8_UNORM; else if (depth_mode == GL_ALPHA) return BRW_SURFACEFORMAT_A24X8_UNORM; + else if (depth_mode == GL_RED) + return BRW_SURFACEFORMAT_R24_UNORM_X8_TYPELESS; else return BRW_SURFACEFORMAT_L24X8_UNORM; - - case MESA_FORMAT_DUDV8: - return BRW_SURFACEFORMAT_R8G8_SNORM; - - case MESA_FORMAT_SIGNED_RGBA8888_REV: - return BRW_SURFACEFORMAT_R8G8B8A8_SNORM; - + + case MESA_FORMAT_SARGB8: + case MESA_FORMAT_SLA8: + case MESA_FORMAT_SL8: + if (srgb_decode == GL_DECODE_EXT) + return brw_format_for_mesa_format[mesa_format]; + else if (srgb_decode == GL_SKIP_DECODE_EXT) + return brw_format_for_mesa_format[_mesa_get_srgb_format_linear(mesa_format)]; default: - assert(0); - return 0; + assert(brw_format_for_mesa_format[mesa_format] != 0); + return brw_format_for_mesa_format[mesa_format]; } } @@ -214,49 +187,45 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit ) struct brw_context *brw = brw_context(ctx); struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); - struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; + struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel]; const GLuint surf_index = SURF_INDEX_TEXTURE(unit); - struct brw_surface_state surf; - void *map; + struct brw_surface_state *surf; - memset(&surf, 0, sizeof(surf)); + surf = brw_state_batch(brw, sizeof(*surf), 32, + &brw->wm.surf_offset[surf_index]); + memset(surf, 0, sizeof(*surf)); - surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf.ss0.surface_type = translate_tex_target(tObj->Target); - surf.ss0.surface_format = translate_tex_format(firstImage->TexFormat, + surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf->ss0.surface_type = translate_tex_target(tObj->Target); + surf->ss0.surface_format = translate_tex_format(firstImage->TexFormat, firstImage->InternalFormat, - tObj->DepthMode); + tObj->DepthMode, tObj->sRGBDecode); /* This is ok for all textures with channel width 8bit or less: */ -/* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - surf.ss1.base_addr = intelObj->mt->region->buffer->offset; /* reloc */ +/* surf->ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ + surf->ss1.base_addr = intelObj->mt->region->buffer->offset; /* reloc */ - surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel; - surf.ss2.width = firstImage->Width - 1; - surf.ss2.height = firstImage->Height - 1; - brw_set_surface_tiling(&surf, intelObj->mt->region->tiling); - surf.ss3.pitch = (intelObj->mt->region->pitch * intelObj->mt->cpp) - 1; - surf.ss3.depth = firstImage->Depth - 1; + surf->ss2.mip_count = intelObj->_MaxLevel - tObj->BaseLevel; + surf->ss2.width = firstImage->Width - 1; + surf->ss2.height = firstImage->Height - 1; + brw_set_surface_tiling(surf, intelObj->mt->region->tiling); + surf->ss3.pitch = (intelObj->mt->region->pitch * intelObj->mt->cpp) - 1; + surf->ss3.depth = firstImage->Depth - 1; - surf.ss4.min_lod = 0; + surf->ss4.min_lod = 0; if (tObj->Target == GL_TEXTURE_CUBE_MAP) { - surf.ss0.cube_pos_x = 1; - surf.ss0.cube_pos_y = 1; - surf.ss0.cube_pos_z = 1; - surf.ss0.cube_neg_x = 1; - surf.ss0.cube_neg_y = 1; - surf.ss0.cube_neg_z = 1; + surf->ss0.cube_pos_x = 1; + surf->ss0.cube_pos_y = 1; + surf->ss0.cube_pos_z = 1; + surf->ss0.cube_neg_x = 1; + surf->ss0.cube_neg_y = 1; + surf->ss0.cube_neg_z = 1; } - map = brw_state_batch(brw, sizeof(surf), 32, - &brw->wm.surf_bo[surf_index], - &brw->wm.surf_offset[surf_index]); - memcpy(map, &surf, sizeof(surf)); - /* Emit relocation to surface contents */ - drm_intel_bo_emit_reloc(brw->wm.surf_bo[surf_index], + drm_intel_bo_emit_reloc(brw->intel.batch.bo, brw->wm.surf_offset[surf_index] + offsetof(struct brw_surface_state, ss1), intelObj->mt->region->buffer, 0, @@ -271,37 +240,38 @@ void brw_create_constant_surface(struct brw_context *brw, drm_intel_bo *bo, int width, - drm_intel_bo **out_bo, uint32_t *out_offset) { + struct intel_context *intel = &brw->intel; const GLint w = width - 1; - struct brw_surface_state surf; - void *map; + struct brw_surface_state *surf; - memset(&surf, 0, sizeof(surf)); + surf = brw_state_batch(brw, sizeof(*surf), 32, out_offset); + memset(surf, 0, sizeof(*surf)); - surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf.ss0.surface_type = BRW_SURFACE_BUFFER; - surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + surf->ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf->ss0.surface_type = BRW_SURFACE_BUFFER; + surf->ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; - assert(bo); - surf.ss1.base_addr = bo->offset; /* reloc */ + if (intel->gen >= 6) + surf->ss0.render_cache_read_write = 1; - surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ - surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ - surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ - surf.ss3.pitch = (width * 16) - 1; /* ignored?? */ - brw_set_surface_tiling(&surf, I915_TILING_NONE); /* tiling now allowed */ + assert(bo); + surf->ss1.base_addr = bo->offset; /* reloc */ - map = brw_state_batch(brw, sizeof(surf), 32, out_bo, out_offset); - memcpy(map, &surf, sizeof(surf)); + surf->ss2.width = w & 0x7f; /* bits 6:0 of size or width */ + surf->ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ + surf->ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ + surf->ss3.pitch = (width * 16) - 1; /* ignored?? */ + brw_set_surface_tiling(surf, I915_TILING_NONE); /* tiling now allowed */ /* Emit relocation to surface contents. Section 5.1.1 of the gen4 * bspec ("Data Cache") says that the data cache does not exist as * a separate cache and is just the sampler cache. */ - drm_intel_bo_emit_reloc(*out_bo, (*out_offset + - offsetof(struct brw_surface_state, ss1)), + drm_intel_bo_emit_reloc(brw->intel.batch.bo, + (*out_offset + + offsetof(struct brw_surface_state, ss1)), bo, 0, I915_GEM_DOMAIN_SAMPLER, 0); } @@ -380,16 +350,14 @@ static void upload_wm_constant_surface(struct brw_context *brw ) * it. */ if (brw->wm.const_bo == 0) { - if (brw->wm.surf_bo[surf] != NULL) { - drm_intel_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = NULL; + if (brw->wm.surf_offset[surf]) { brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; + brw->wm.surf_offset[surf] = 0; } return; } brw_create_constant_surface(brw, brw->wm.const_bo, params->NumParameters, - &brw->wm.surf_bo[surf], &brw->wm.surf_offset[surf]); brw->state.dirty.brw |= BRW_NEW_WM_SURFACES; } @@ -404,6 +372,28 @@ const struct brw_tracked_state brw_wm_constant_surface = { .emit = upload_wm_constant_surface, }; +static void +brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) +{ + struct intel_context *intel = &brw->intel; + struct brw_surface_state *surf; + + surf = brw_state_batch(brw, sizeof(*surf), 32, + &brw->wm.surf_offset[unit]); + memset(surf, 0, sizeof(*surf)); + + surf->ss0.surface_type = BRW_SURFACE_NULL; + surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + + if (intel->gen < 6) { + /* _NEW_COLOR */ + surf->ss0.color_blend = 0; + surf->ss0.writedisable_red = 1; + surf->ss0.writedisable_green = 1; + surf->ss0.writedisable_blue = 1; + surf->ss0.writedisable_alpha = 1; + } +} /** * Sets up a surface state structure to point at the given region. @@ -417,123 +407,57 @@ brw_update_renderbuffer_surface(struct brw_context *brw, { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; - drm_intel_bo *region_bo = NULL; struct intel_renderbuffer *irb = intel_renderbuffer(rb); - struct intel_region *region = irb ? irb->region : NULL; - struct { - unsigned int surface_type; - unsigned int surface_format; - unsigned int width, height, pitch, cpp; - GLubyte color_mask[4]; - GLboolean color_blend; - uint32_t tiling; - uint32_t draw_x; - uint32_t draw_y; - } key; - struct brw_surface_state surf; - void *map; - - memset(&key, 0, sizeof(key)); - - if (region != NULL) { - region_bo = region->buffer; - - key.surface_type = BRW_SURFACE_2D; - switch (irb->Base.Format) { - /* XRGB and ARGB are treated the same here because the chips in this - * family cannot render to XRGB targets. This means that we have to - * mask writes to alpha (ala glColorMask) and reconfigure the alpha - * blending hardware to use GL_ONE (or GL_ZERO) for cases where - * GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is used. - */ - case MESA_FORMAT_ARGB8888: - case MESA_FORMAT_XRGB8888: - key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - break; - case MESA_FORMAT_SARGB8: - key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB; - break; - case MESA_FORMAT_RGB565: - key.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; - break; - case MESA_FORMAT_ARGB1555: - key.surface_format = BRW_SURFACEFORMAT_B5G5R5A1_UNORM; - break; - case MESA_FORMAT_ARGB4444: - key.surface_format = BRW_SURFACEFORMAT_B4G4R4A4_UNORM; - break; - case MESA_FORMAT_A8: - key.surface_format = BRW_SURFACEFORMAT_A8_UNORM; - break; - case MESA_FORMAT_R8: - key.surface_format = BRW_SURFACEFORMAT_R8_UNORM; - break; - case MESA_FORMAT_R16: - key.surface_format = BRW_SURFACEFORMAT_R16_UNORM; - break; - case MESA_FORMAT_RG88: - key.surface_format = BRW_SURFACEFORMAT_R8G8_UNORM; - break; - case MESA_FORMAT_RG1616: - key.surface_format = BRW_SURFACEFORMAT_R16G16_UNORM; - break; - default: - _mesa_problem(ctx, "Bad renderbuffer format: %d\n", irb->Base.Format); - } - key.tiling = region->tiling; - key.width = rb->Width; - key.height = rb->Height; - key.pitch = region->pitch; - key.cpp = region->cpp; - key.draw_x = region->draw_x; - key.draw_y = region->draw_y; - } else { - key.surface_type = BRW_SURFACE_NULL; - key.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; - key.tiling = I915_TILING_X; - key.width = 1; - key.height = 1; - key.cpp = 4; - key.draw_x = 0; - key.draw_y = 0; - } + struct intel_region *region = irb->region; + struct brw_surface_state *surf; - if (intel->gen < 6) { - /* _NEW_COLOR */ - memcpy(key.color_mask, ctx->Color.ColorMask[unit], - sizeof(key.color_mask)); + surf = brw_state_batch(brw, sizeof(*surf), 32, + &brw->wm.surf_offset[unit]); + memset(surf, 0, sizeof(*surf)); - /* As mentioned above, disable writes to the alpha component when the - * renderbuffer is XRGB. + switch (irb->Base.Format) { + case MESA_FORMAT_XRGB8888: + /* XRGB is handled as ARGB because the chips in this family + * cannot render to XRGB targets. This means that we have to + * mask writes to alpha (ala glColorMask) and reconfigure the + * alpha blending hardware to use GL_ONE (or GL_ZERO) for + * cases where GL_DST_ALPHA (or GL_ONE_MINUS_DST_ALPHA) is + * used. */ - if (ctx->DrawBuffer->Visual.alphaBits == 0) - key.color_mask[3] = GL_FALSE; - - key.color_blend = (!ctx->Color._LogicOpEnabled && - (ctx->Color.BlendEnabled & (1 << unit))); + surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + break; + case MESA_FORMAT_SARGB8: + /* without GL_EXT_framebuffer_sRGB we shouldn't bind sRGB + surfaces to the blend/update as sRGB */ + if (ctx->Color.sRGBEnabled) + surf->ss0.surface_format = brw_format_for_mesa_format[irb->Base.Format]; + else + surf->ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + break; + default: + surf->ss0.surface_format = brw_format_for_mesa_format[irb->Base.Format]; + assert(surf->ss0.surface_format != 0); } - memset(&surf, 0, sizeof(surf)); - - surf.ss0.surface_format = key.surface_format; - surf.ss0.surface_type = key.surface_type; - if (key.tiling == I915_TILING_NONE) { - surf.ss1.base_addr = (key.draw_x + key.draw_y * key.pitch) * key.cpp; + surf->ss0.surface_type = BRW_SURFACE_2D; + if (region->tiling == I915_TILING_NONE) { + surf->ss1.base_addr = (region->draw_x + + region->draw_y * region->pitch) * region->cpp; } else { uint32_t tile_base, tile_x, tile_y; - uint32_t pitch = key.pitch * key.cpp; + uint32_t pitch = region->pitch * region->cpp; - if (key.tiling == I915_TILING_X) { - tile_x = key.draw_x % (512 / key.cpp); - tile_y = key.draw_y % 8; - tile_base = ((key.draw_y / 8) * (8 * pitch)); - tile_base += (key.draw_x - tile_x) / (512 / key.cpp) * 4096; + if (region->tiling == I915_TILING_X) { + tile_x = region->draw_x % (512 / region->cpp); + tile_y = region->draw_y % 8; + tile_base = ((region->draw_y / 8) * (8 * pitch)); + tile_base += (region->draw_x - tile_x) / (512 / region->cpp) * 4096; } else { /* Y */ - tile_x = key.draw_x % (128 / key.cpp); - tile_y = key.draw_y % 32; - tile_base = ((key.draw_y / 32) * (32 * pitch)); - tile_base += (key.draw_x - tile_x) / (128 / key.cpp) * 4096; + tile_x = region->draw_x % (128 / region->cpp); + tile_y = region->draw_y % 32; + tile_base = ((region->draw_y / 32) * (32 * pitch)); + tile_base += (region->draw_x - tile_x) / (128 / region->cpp) * 4096; } assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0)); assert(tile_x % 4 == 0); @@ -541,41 +465,40 @@ brw_update_renderbuffer_surface(struct brw_context *brw, /* Note that the low bits of these fields are missing, so * there's the possibility of getting in trouble. */ - surf.ss1.base_addr = tile_base; - surf.ss5.x_offset = tile_x / 4; - surf.ss5.y_offset = tile_y / 2; + surf->ss1.base_addr = tile_base; + surf->ss5.x_offset = tile_x / 4; + surf->ss5.y_offset = tile_y / 2; } - if (region_bo != NULL) - surf.ss1.base_addr += region_bo->offset; /* reloc */ + surf->ss1.base_addr += region->buffer->offset; /* reloc */ - surf.ss2.width = key.width - 1; - surf.ss2.height = key.height - 1; - brw_set_surface_tiling(&surf, key.tiling); - surf.ss3.pitch = (key.pitch * key.cpp) - 1; + surf->ss2.width = rb->Width - 1; + surf->ss2.height = rb->Height - 1; + brw_set_surface_tiling(surf, region->tiling); + surf->ss3.pitch = (region->pitch * region->cpp) - 1; if (intel->gen < 6) { /* _NEW_COLOR */ - surf.ss0.color_blend = key.color_blend; - surf.ss0.writedisable_red = !key.color_mask[0]; - surf.ss0.writedisable_green = !key.color_mask[1]; - surf.ss0.writedisable_blue = !key.color_mask[2]; - surf.ss0.writedisable_alpha = !key.color_mask[3]; + surf->ss0.color_blend = (!ctx->Color._LogicOpEnabled && + (ctx->Color.BlendEnabled & (1 << unit))); + surf->ss0.writedisable_red = !ctx->Color.ColorMask[unit][0]; + surf->ss0.writedisable_green = !ctx->Color.ColorMask[unit][1]; + surf->ss0.writedisable_blue = !ctx->Color.ColorMask[unit][2]; + /* As mentioned above, disable writes to the alpha component when the + * renderbuffer is XRGB. + */ + if (ctx->DrawBuffer->Visual.alphaBits == 0) + surf->ss0.writedisable_alpha = 1; + else + surf->ss0.writedisable_alpha = !ctx->Color.ColorMask[unit][3]; } - map = brw_state_batch(brw, sizeof(surf), 32, - &brw->wm.surf_bo[unit], - &brw->wm.surf_offset[unit]); - memcpy(map, &surf, sizeof(surf)); - - if (region_bo != NULL) { - drm_intel_bo_emit_reloc(brw->wm.surf_bo[unit], - brw->wm.surf_offset[unit] + - offsetof(struct brw_surface_state, ss1), - region_bo, - surf.ss1.base_addr - region_bo->offset, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER); - } + drm_intel_bo_emit_reloc(brw->intel.batch.bo, + brw->wm.surf_offset[unit] + + offsetof(struct brw_surface_state, ss1), + region->buffer, + surf->ss1.base_addr - region->buffer->offset, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); } static void @@ -591,6 +514,11 @@ prepare_wm_surfaces(struct brw_context *brw) struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_region *region = irb ? irb->region : NULL; + if (region == NULL || region->buffer == NULL) { + brw->intel.Fallback = GL_TRUE; /* boolean, not bitfield */ + return; + } + brw_add_validated_bo(brw, region->buffer); nr_surfaces = SURF_INDEX_DRAW(i) + 1; } @@ -635,12 +563,16 @@ upload_wm_surfaces(struct brw_context *brw) /* Update surfaces for drawing buffers */ if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { - brw_update_renderbuffer_surface(brw, - ctx->DrawBuffer->_ColorDrawBuffers[i], - i); + if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) { + brw_update_renderbuffer_surface(brw, + ctx->DrawBuffer->_ColorDrawBuffers[i], + i); + } else { + brw_update_null_renderbuffer_surface(brw, i); + } } } else { - brw_update_renderbuffer_surface(brw, NULL, 0); + brw_update_null_renderbuffer_surface(brw, 0); } /* Update surfaces for textures */ @@ -652,8 +584,7 @@ upload_wm_surfaces(struct brw_context *brw) if (texUnit->_ReallyEnabled) { brw_update_texture_surface(ctx, i); } else { - drm_intel_bo_unreference(brw->wm.surf_bo[surf]); - brw->wm.surf_bo[surf] = NULL; + brw->wm.surf_offset[surf] = 0; } } @@ -686,16 +617,11 @@ brw_wm_upload_binding_table(struct brw_context *brw) * space for the binding table. */ bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_WM_MAX_SURF, - 32, &brw->wm.bind_bo, &brw->wm.bind_bo_offset); + 32, &brw->wm.bind_bo_offset); for (i = 0; i < BRW_WM_MAX_SURF; i++) { /* BRW_NEW_WM_SURFACES */ bind[i] = brw->wm.surf_offset[i]; - if (brw->wm.surf_bo[i]) { - bind[i] = brw->wm.surf_offset[i]; - } else { - bind[i] = 0; - } } brw->state.dirty.brw |= BRW_NEW_BINDING_TABLE; diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c index c2631a7b4d..d1648a102d 100644 --- a/src/mesa/drivers/dri/i965/gen6_cc.c +++ b/src/mesa/drivers/dri/i965/gen6_cc.c @@ -66,12 +66,12 @@ blend_state_populate_key(struct brw_context *brw, /* _NEW_COLOR */ key->color_blend = ctx->Color.BlendEnabled; if (key->color_blend) { - key->blend_eq_rgb = ctx->Color.BlendEquationRGB; - key->blend_eq_a = ctx->Color.BlendEquationA; - key->blend_src_rgb = ctx->Color.BlendSrcRGB; - key->blend_dst_rgb = ctx->Color.BlendDstRGB; - key->blend_src_a = ctx->Color.BlendSrcA; - key->blend_dst_a = ctx->Color.BlendDstA; + key->blend_eq_rgb = ctx->Color.Blend[0].EquationRGB; + key->blend_eq_a = ctx->Color.Blend[0].EquationA; + key->blend_src_rgb = ctx->Color.Blend[0].SrcRGB; + key->blend_dst_rgb = ctx->Color.Blend[0].DstRGB; + key->blend_src_a = ctx->Color.Blend[0].SrcA; + key->blend_dst_a = ctx->Color.Blend[0].DstA; } /* _NEW_COLOR */ @@ -254,14 +254,14 @@ prepare_color_calc_state(struct brw_context *brw) color_calc_state_populate_key(brw, &key); - drm_intel_bo_unreference(brw->cc.state_bo); - brw->cc.state_bo = brw_search_cache(&brw->cache, BRW_COLOR_CALC_STATE, + drm_intel_bo_unreference(brw->cc.color_calc_state_bo); + brw->cc.color_calc_state_bo = brw_search_cache(&brw->cache, BRW_COLOR_CALC_STATE, &key, sizeof(key), NULL, 0, NULL); - if (brw->cc.state_bo == NULL) - brw->cc.state_bo = color_calc_state_create_from_key(brw, &key); + if (brw->cc.color_calc_state_bo == NULL) + brw->cc.color_calc_state_bo = color_calc_state_create_from_key(brw, &key); } const struct brw_tracked_state gen6_color_calc_state = { @@ -278,17 +278,17 @@ static void upload_cc_state_pointers(struct brw_context *brw) struct intel_context *intel = &brw->intel; BEGIN_BATCH(4); - OUT_BATCH(CMD_3D_CC_STATE_POINTERS << 16 | (4 - 2)); + OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2)); OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); - OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); + OUT_RELOC(brw->cc.color_calc_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1); ADVANCE_BATCH(); } static void prepare_cc_state_pointers(struct brw_context *brw) { - brw_add_validated_bo(brw, brw->cc.state_bo); + brw_add_validated_bo(brw, brw->cc.color_calc_state_bo); brw_add_validated_bo(brw, brw->cc.blend_state_bo); brw_add_validated_bo(brw, brw->cc.depth_stencil_state_bo); } diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index c7c4eb1f27..d6c1f1c893 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -43,7 +43,10 @@ upload_clip_state(struct brw_context *brw) depth_clamp = GEN6_CLIP_Z_TEST; if (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION) { - provoking = 0; + provoking = + (0 << GEN6_CLIP_TRI_PROVOKE_SHIFT) | + (1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT) | + (0 << GEN6_CLIP_LINE_PROVOKE_SHIFT); } else { provoking = (2 << GEN6_CLIP_TRI_PROVOKE_SHIFT) | @@ -55,7 +58,7 @@ upload_clip_state(struct brw_context *brw) userclip = (1 << brw_count_bits(ctx->Transform.ClipPlanesEnabled)) - 1; BEGIN_BATCH(4); - OUT_BATCH(CMD_3D_CLIP_STATE << 16 | (4 - 2)); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE); OUT_BATCH(GEN6_CLIP_ENABLE | GEN6_CLIP_API_OGL | @@ -65,7 +68,7 @@ upload_clip_state(struct brw_context *brw) depth_clamp | provoking); OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | - U_FIXED(225.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | + U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | GEN6_CLIP_FORCE_ZERO_RTAINDEX); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c index 6127b9197a..7296c7cd1b 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c @@ -37,7 +37,7 @@ upload_gs_state(struct brw_context *brw) /* Disable all the constant buffers. */ BEGIN_BATCH(5); - OUT_BATCH(CMD_3D_CONSTANT_GS_STATE << 16 | (5 - 2)); + OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -46,7 +46,7 @@ upload_gs_state(struct brw_context *brw) if (brw->gs.prog_bo) { BEGIN_BATCH(7); - OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2)); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_RELOC(brw->gs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_BATCH(GEN6_GS_SPF_MODE | (0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | @@ -62,7 +62,7 @@ upload_gs_state(struct brw_context *brw) ADVANCE_BATCH(); } else { BEGIN_BATCH(7); - OUT_BATCH(CMD_3D_GS_STATE << 16 | (7 - 2)); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); OUT_BATCH(0); /* prog_bo */ OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) | (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c index fc5d391c3c..f65c651bdf 100644 --- a/src/mesa/drivers/dri/i965/gen6_sampler_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c @@ -36,7 +36,7 @@ upload_sampler_state_pointers(struct brw_context *brw) struct intel_context *intel = &brw->intel; BEGIN_BATCH(4); - OUT_BATCH(CMD_3D_SAMPLER_STATE_POINTERS << 16 | + OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS << 16 | VS_SAMPLER_STATE_CHANGE | GS_SAMPLER_STATE_CHANGE | PS_SAMPLER_STATE_CHANGE | diff --git a/src/mesa/drivers/dri/i965/gen6_scissor_state.c b/src/mesa/drivers/dri/i965/gen6_scissor_state.c index b57126c793..12b65826ae 100644 --- a/src/mesa/drivers/dri/i965/gen6_scissor_state.c +++ b/src/mesa/drivers/dri/i965/gen6_scissor_state.c @@ -92,7 +92,7 @@ static void upload_scissor_state_pointers(struct brw_context *brw) struct intel_context *intel = &brw->intel; BEGIN_BATCH(2); - OUT_BATCH(CMD_3D_SCISSOR_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(_3DSTATE_SCISSOR_STATE_POINTERS << 16 | (2 - 2)); OUT_RELOC(brw->sf.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 4cd2d69583..50a5ad38c6 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -33,9 +33,10 @@ #include "intel_batchbuffer.h" static uint32_t -get_attr_override(struct brw_context *brw, int fs_attr) +get_attr_override(struct brw_context *brw, int fs_attr, int two_side_color) { int attr_index = 0, i, vs_attr; + int bfc = 0; if (fs_attr <= FRAG_ATTRIB_TEX7) vs_attr = fs_attr; @@ -53,10 +54,36 @@ get_attr_override(struct brw_context *brw, int fs_attr) * be FRAG_ATTRIB_*. */ for (i = 1; i < vs_attr; i++) { + if (i == VERT_RESULT_PSIZ) + continue; if (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(i)) attr_index++; } + assert(attr_index < 32); + + if (two_side_color) { + if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL1)) && + (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC1))) { + assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)); + assert(brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0)); + bfc = 2; + } else if ((brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_COL0)) && + (brw->vs.prog_data->outputs_written & BITFIELD64_BIT(VERT_RESULT_BFC0))) + bfc = 1; + } + + if (bfc && (fs_attr <= FRAG_ATTRIB_TEX7 && fs_attr > FRAG_ATTRIB_WPOS)) { + if (fs_attr == FRAG_ATTRIB_COL0) + attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT); + else if (fs_attr == FRAG_ATTRIB_COL1 && bfc == 2) { + attr_index++; + attr_index |= (ATTRIBUTE_SWIZZLE_INPUTATTR_FACING << ATTRIBUTE_SWIZZLE_SHIFT); + } else { + attr_index += bfc; + } + } + return attr_index; } @@ -75,6 +102,7 @@ upload_sf_state(struct brw_context *brw) GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; int attr = 0; int urb_start; + int two_side_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide); /* _NEW_TRANSFORM */ if (ctx->Transform.ClipPlanesEnabled) @@ -181,7 +209,7 @@ upload_sf_state(struct brw_context *brw) ctx->Point._Attenuated)) dw4 |= GEN6_SF_USE_STATE_POINT_WIDTH; - dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 225.875), 3) << + dw4 |= U_FIXED(CLAMP(ctx->Point.Size, 0.125, 255.875), 3) << GEN6_SF_POINT_WIDTH_SHIFT; if (ctx->Point.SpriteOrigin == GL_LOWER_LEFT) dw1 |= GEN6_SF_POINT_SPRITE_LOWERLEFT; @@ -211,7 +239,7 @@ upload_sf_state(struct brw_context *brw) } BEGIN_BATCH(20); - OUT_BATCH(CMD_3D_SF_STATE << 16 | (20 - 2)); + OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); OUT_BATCH(dw1); OUT_BATCH(dw2); OUT_BATCH(dw3); @@ -224,7 +252,7 @@ upload_sf_state(struct brw_context *brw) for (; attr < 64; attr++) { if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr); + attr_overrides |= get_attr_override(brw, attr, two_side_color); attr++; break; } @@ -232,7 +260,7 @@ upload_sf_state(struct brw_context *brw) for (; attr < 64; attr++) { if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)) { - attr_overrides |= get_attr_override(brw, attr) << 16; + attr_overrides |= get_attr_override(brw, attr, two_side_color) << 16; attr++; break; } diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c index de97fd3783..c3819f9b36 100644 --- a/src/mesa/drivers/dri/i965/gen6_urb.c +++ b/src/mesa/drivers/dri/i965/gen6_urb.c @@ -34,19 +34,26 @@ static void prepare_urb( struct brw_context *brw ) { - brw->urb.nr_vs_entries = 24; - if (brw->gs.prog_bo) - brw->urb.nr_gs_entries = 4; - else - brw->urb.nr_gs_entries = 0; + int urb_size, max_urb_entry; + struct intel_context *intel = &brw->intel; + + if (IS_GT1(intel->intelScreen->deviceID)) { + urb_size = 32 * 1024; + max_urb_entry = 128; + } else { + urb_size = 64 * 1024; + max_urb_entry = 256; + } + + brw->urb.nr_vs_entries = max_urb_entry; + brw->urb.nr_gs_entries = max_urb_entry; + /* CACHE_NEW_VS_PROG */ brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1); - /* Check that the number of URB rows (8 floats each) allocated is less - * than the URB space. - */ - assert((brw->urb.nr_vs_entries + - brw->urb.nr_gs_entries) * brw->urb.vs_size * 8 < 64 * 1024); + if (2 * brw->urb.vs_size > urb_size) + brw->urb.nr_vs_entries = brw->urb.nr_gs_entries = + (urb_size ) / (2 * brw->urb.vs_size); } static void @@ -60,7 +67,7 @@ upload_urb(struct brw_context *brw) assert(!brw->gs.prog_bo || brw->urb.vs_size < 5); BEGIN_BATCH(3); - OUT_BATCH(CMD_URB << 16 | (3 - 2)); + OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) | ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT)); OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) | diff --git a/src/mesa/drivers/dri/i965/gen6_viewport_state.c b/src/mesa/drivers/dri/i965/gen6_viewport_state.c index d691bbebc8..cd7d209e3e 100644 --- a/src/mesa/drivers/dri/i965/gen6_viewport_state.c +++ b/src/mesa/drivers/dri/i965/gen6_viewport_state.c @@ -117,7 +117,7 @@ static void upload_viewport_state_pointers(struct brw_context *brw) struct intel_context *intel = &brw->intel; BEGIN_BATCH(4); - OUT_BATCH(CMD_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) | + OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) | GEN6_CC_VIEWPORT_MODIFY | GEN6_SF_VIEWPORT_MODIFY | GEN6_CLIP_VIEWPORT_MODIFY); diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c index 4ef9e2e607..ce0b8ea7ea 100644 --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c @@ -47,7 +47,7 @@ upload_vs_state(struct brw_context *brw) if (brw->vs.prog_data->nr_params == 0 && !ctx->Transform.ClipPlanesEnabled) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); - OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | (5 - 2)); + OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -112,7 +112,7 @@ upload_vs_state(struct brw_context *brw) assert(param_regs <= 32); BEGIN_BATCH(5); - OUT_BATCH(CMD_3D_CONSTANT_VS_STATE << 16 | + OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); OUT_RELOC(constant_bo, @@ -127,15 +127,17 @@ upload_vs_state(struct brw_context *brw) } BEGIN_BATCH(6); - OUT_BATCH(CMD_3D_VS_STATE << 16 | (6 - 2)); + OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); OUT_RELOC(brw->vs.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | + GEN6_VS_FLOATING_POINT_MODE_ALT | (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT)); OUT_BATCH(0); /* scratch space base offset */ OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) | (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) | (0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT)); - OUT_BATCH((0 << GEN6_VS_MAX_THREADS_SHIFT) | + + OUT_BATCH(((60 - 1) << GEN6_VS_MAX_THREADS_SHIFT) | /* max 60 threads for gen6 */ GEN6_VS_STATISTICS_ENABLE | GEN6_VS_ENABLE); ADVANCE_BATCH(); diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c index d80df4e254..78901ecac5 100644 --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c @@ -107,7 +107,7 @@ upload_wm_state(struct brw_context *brw) if (brw->wm.prog_data->nr_params == 0) { /* Disable the push constant buffers. */ BEGIN_BATCH(5); - OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2)); + OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2)); OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -115,7 +115,7 @@ upload_wm_state(struct brw_context *brw) ADVANCE_BATCH(); } else { BEGIN_BATCH(5); - OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | + OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | GEN6_CONSTANT_BUFFER_0_ENABLE | (5 - 2)); OUT_RELOC(brw->wm.push_const_bo, @@ -133,6 +133,9 @@ upload_wm_state(struct brw_context *brw) dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; + /* OpenGL non-ieee floating point mode */ + dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT; + /* BRW_NEW_NR_WM_SURFACES */ dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT; @@ -178,7 +181,7 @@ upload_wm_state(struct brw_context *brw) GEN6_WM_NUM_SF_OUTPUTS_SHIFT; BEGIN_BATCH(9); - OUT_BATCH(CMD_3D_WM_STATE << 16 | (9 - 2)); + OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); OUT_BATCH(dw2); OUT_BATCH(0); /* scratch space base offset */ diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.c b/src/mesa/drivers/dri/intel/intel_batchbuffer.c index 21fc9ece88..42b4f923e0 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.c @@ -27,81 +27,89 @@ #include "intel_context.h" #include "intel_batchbuffer.h" +#include "intel_buffer_objects.h" #include "intel_decode.h" #include "intel_reg.h" #include "intel_bufmgr.h" #include "intel_buffers.h" -void -intel_batchbuffer_reset(struct intel_batchbuffer *batch) +struct cached_batch_item { + struct cached_batch_item *next; + uint16_t header; + uint16_t size; +}; + +static void clear_cache( struct intel_context *intel ) { - struct intel_context *intel = batch->intel; + struct cached_batch_item *item = intel->batch.cached_items; - if (batch->buf != NULL) { - drm_intel_bo_unreference(batch->buf); - batch->buf = NULL; + while (item) { + struct cached_batch_item *next = item->next; + free(item); + item = next; } - batch->buf = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer", - intel->maxBatchSize, 4096); - drm_intel_gem_bo_map_gtt(batch->buf); - batch->map = batch->buf->virtual; - - batch->size = intel->maxBatchSize; - batch->ptr = batch->map; - batch->reserved_space = BATCH_RESERVED; - batch->dirty_state = ~0; - batch->state_batch_offset = batch->size; + intel->batch.cached_items = NULL; } -struct intel_batchbuffer * -intel_batchbuffer_alloc(struct intel_context *intel) +void +intel_batchbuffer_reset(struct intel_context *intel) { - struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1); + if (intel->batch.bo != NULL) { + drm_intel_bo_unreference(intel->batch.bo); + intel->batch.bo = NULL; + } + clear_cache(intel); - batch->intel = intel; - intel_batchbuffer_reset(batch); + intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer", + intel->maxBatchSize, 4096); - return batch; + intel->batch.reserved_space = BATCH_RESERVED; + intel->batch.state_batch_offset = intel->batch.bo->size; + intel->batch.used = 0; } void -intel_batchbuffer_free(struct intel_batchbuffer *batch) +intel_batchbuffer_free(struct intel_context *intel) { - if (batch->map) { - drm_intel_gem_bo_unmap_gtt(batch->buf); - batch->map = NULL; - } - dri_bo_unreference(batch->buf); - batch->buf = NULL; - free(batch); + drm_intel_bo_unreference(intel->batch.bo); + clear_cache(intel); } - /* TODO: Push this whole function into bufmgr. */ static void -do_flush_locked(struct intel_batchbuffer *batch, GLuint used) +do_flush_locked(struct intel_context *intel) { - struct intel_context *intel = batch->intel; + struct intel_batchbuffer *batch = &intel->batch; int ret = 0; - int x_off = 0, y_off = 0; - - drm_intel_gem_bo_unmap_gtt(batch->buf); - - batch->ptr = NULL; if (!intel->intelScreen->no_hw) { - drm_intel_bo_exec(batch->buf, used, NULL, 0, - (x_off & 0xffff) | (y_off << 16)); + int ring; + + if (intel->gen < 6 || !batch->is_blit) { + ring = I915_EXEC_RENDER; + } else { + ring = I915_EXEC_BLT; + } + + ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map); + if (ret == 0 && batch->state_batch_offset != batch->bo->size) { + ret = drm_intel_bo_subdata(batch->bo, + batch->state_batch_offset, + batch->bo->size - batch->state_batch_offset, + (char *)batch->map + batch->state_batch_offset); + } + + if (ret == 0) + ret = drm_intel_bo_mrb_exec(batch->bo, 4*batch->used, NULL, 0, 0, ring); } if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { - drm_intel_bo_map(batch->buf, GL_FALSE); - intel_decode(batch->buf->virtual, used / 4, batch->buf->offset, + intel_decode(batch->map, batch->used, + batch->bo->offset, intel->intelScreen->deviceID, GL_TRUE); - drm_intel_bo_unmap(batch->buf); if (intel->vtbl.debug_batch != NULL) intel->vtbl.debug_batch(intel); @@ -114,80 +122,54 @@ do_flush_locked(struct intel_batchbuffer *batch, GLuint used) } void -_intel_batchbuffer_flush(struct intel_batchbuffer *batch, const char *file, - int line) +_intel_batchbuffer_flush(struct intel_context *intel, + const char *file, int line) { - struct intel_context *intel = batch->intel; - GLuint used = batch->ptr - batch->map; - - if (intel->first_post_swapbuffers_batch == NULL) { - intel->first_post_swapbuffers_batch = intel->batch->buf; - drm_intel_bo_reference(intel->first_post_swapbuffers_batch); - } - - if (used == 0) + if (intel->batch.used == 0) return; if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line, - used); + 4*intel->batch.used); - batch->reserved_space = 0; + intel->batch.reserved_space = 0; if (intel->always_flush_cache) { - intel_batchbuffer_emit_mi_flush(batch); - used = batch->ptr - batch->map; - } - - /* Round batchbuffer usage to 2 DWORDs. */ - - if ((used & 4) == 0) { - *(GLuint *) (batch->ptr) = 0; /* noop */ - batch->ptr += 4; - used = batch->ptr - batch->map; + intel_batchbuffer_emit_mi_flush(intel); } /* Mark the end of the buffer. */ - *(GLuint *) (batch->ptr) = MI_BATCH_BUFFER_END; - batch->ptr += 4; - used = batch->ptr - batch->map; - assert (used <= batch->buf->size); - - /* Workaround for recursive batchbuffer flushing: If the window is - * moved, we can get into a case where we try to flush during a - * flush. What happens is that when we try to grab the lock for - * the first flush, we detect that the window moved which then - * causes another flush (from the intel_draw_buffer() call in - * intelUpdatePageFlipping()). To work around this we reset the - * batchbuffer tail pointer before trying to get the lock. This - * prevent the nested buffer flush, but a better fix would be to - * avoid that in the first place. */ - batch->ptr = batch->map; + intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END); + if (intel->batch.used & 1) { + /* Round batchbuffer usage to 2 DWORDs. */ + intel_batchbuffer_emit_dword(intel, MI_NOOP); + } if (intel->vtbl.finish_batch) intel->vtbl.finish_batch(intel); + intel_upload_finish(intel); + /* Check that we didn't just wrap our batchbuffer at a bad time. */ assert(!intel->no_batch_wrap); - do_flush_locked(batch, used); + do_flush_locked(intel); if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) { fprintf(stderr, "waiting for idle\n"); - drm_intel_bo_map(batch->buf, GL_TRUE); - drm_intel_bo_unmap(batch->buf); + drm_intel_bo_wait_rendering(intel->batch.bo); } /* Reset the buffer: */ - intel_batchbuffer_reset(batch); + intel_batchbuffer_reset(intel); } /* This is the only way buffers get added to the validate list. */ GLboolean -intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, +intel_batchbuffer_emit_reloc(struct intel_context *intel, drm_intel_bo *buffer, uint32_t read_domains, uint32_t write_domain, uint32_t delta) @@ -196,58 +178,98 @@ intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, assert(delta < buffer->size); - if (batch->ptr - batch->map > batch->buf->size) - printf ("bad relocation ptr %p map %p offset %d size %lu\n", - batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); - ret = drm_intel_bo_emit_reloc(batch->buf, batch->ptr - batch->map, + ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used, buffer, delta, read_domains, write_domain); + assert(ret == 0); + (void)ret; /* * Using the old buffer offset, write in what the right data would be, in case * the buffer doesn't move and we can short-circuit the relocation processing * in the kernel */ - intel_batchbuffer_emit_dword (batch, buffer->offset + delta); + intel_batchbuffer_emit_dword(intel, buffer->offset + delta); return GL_TRUE; } GLboolean -intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch, +intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, drm_intel_bo *buffer, - uint32_t read_domains, uint32_t write_domain, + uint32_t read_domains, + uint32_t write_domain, uint32_t delta) { int ret; assert(delta < buffer->size); - if (batch->ptr - batch->map > batch->buf->size) - printf ("bad relocation ptr %p map %p offset %d size %lu\n", - batch->ptr, batch->map, batch->ptr - batch->map, batch->buf->size); - ret = drm_intel_bo_emit_reloc_fence(batch->buf, batch->ptr - batch->map, + ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used, buffer, delta, read_domains, write_domain); + assert(ret == 0); + (void)ret; /* * Using the old buffer offset, write in what the right data would * be, in case the buffer doesn't move and we can short-circuit the * relocation processing in the kernel */ - intel_batchbuffer_emit_dword (batch, buffer->offset + delta); + intel_batchbuffer_emit_dword(intel, buffer->offset + delta); return GL_TRUE; } void -intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, GLuint bytes) +intel_batchbuffer_data(struct intel_context *intel, + const void *data, GLuint bytes, bool is_blit) { assert((bytes & 3) == 0); - intel_batchbuffer_require_space(batch, bytes); - __memcpy(batch->ptr, data, bytes); - batch->ptr += bytes; + intel_batchbuffer_require_space(intel, bytes, is_blit); + __memcpy(intel->batch.map + intel->batch.used, data, bytes); + intel->batch.used += bytes >> 2; +} + +void +intel_batchbuffer_cached_advance(struct intel_context *intel) +{ + struct cached_batch_item **prev = &intel->batch.cached_items, *item; + uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t); + uint32_t *start = intel->batch.map + intel->batch.emit; + uint16_t op = *start >> 16; + + while (*prev) { + uint32_t *old; + + item = *prev; + old = intel->batch.map + item->header; + if (op == *old >> 16) { + if (item->size == sz && memcmp(old, start, sz) == 0) { + if (prev != &intel->batch.cached_items) { + *prev = item->next; + item->next = intel->batch.cached_items; + intel->batch.cached_items = item; + } + intel->batch.used = intel->batch.emit; + return; + } + + goto emit; + } + prev = &item->next; + } + + item = malloc(sizeof(struct cached_batch_item)); + if (item == NULL) + return; + + item->next = intel->batch.cached_items; + intel->batch.cached_items = item; + +emit: + item->size = sz; + item->header = intel->batch.emit; } /* Emit a pipelined flush to either flush render and texture cache for @@ -257,27 +279,35 @@ intel_batchbuffer_data(struct intel_batchbuffer *batch, * This is also used for the always_flush_cache driconf debug option. */ void -intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch) +intel_batchbuffer_emit_mi_flush(struct intel_context *intel) { - struct intel_context *intel = batch->intel; - if (intel->gen >= 6) { - BEGIN_BATCH(8); - - /* XXX workaround: issue any post sync != 0 before write cache flush = 1 */ - OUT_BATCH(_3DSTATE_PIPE_CONTROL); - OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_BATCH(0); /* write address */ - OUT_BATCH(0); /* write data */ - - OUT_BATCH(_3DSTATE_PIPE_CONTROL); - OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | - PIPE_CONTROL_WRITE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_NO_WRITE); - OUT_BATCH(0); /* write address */ - OUT_BATCH(0); /* write data */ - ADVANCE_BATCH(); + if (intel->batch.is_blit) { + BEGIN_BATCH_BLT(4); + OUT_BATCH(MI_FLUSH_DW); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(8); + /* XXX workaround: issue any post sync != 0 before write + * cache flush = 1 + */ + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + + OUT_BATCH(_3DSTATE_PIPE_CONTROL); + OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_WRITE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_NO_WRITE); + OUT_BATCH(0); /* write address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + } } else if (intel->gen >= 4) { BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_PIPE_CONTROL | diff --git a/src/mesa/drivers/dri/intel/intel_batchbuffer.h b/src/mesa/drivers/dri/intel/intel_batchbuffer.h index 428c027c2f..a0a5c9841c 100644 --- a/src/mesa/drivers/dri/intel/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/intel/intel_batchbuffer.h @@ -7,70 +7,37 @@ #include "intel_bufmgr.h" #include "intel_reg.h" -#define BATCH_SZ 16384 #define BATCH_RESERVED 16 +void intel_batchbuffer_reset(struct intel_context *intel); +void intel_batchbuffer_free(struct intel_context *intel); -struct intel_batchbuffer -{ - struct intel_context *intel; - - drm_intel_bo *buf; - - GLubyte *map; - GLubyte *ptr; - - GLuint size; - uint32_t state_batch_offset; - -#ifdef DEBUG - /** Tracking of BEGIN_BATCH()/OUT_BATCH()/ADVANCE_BATCH() debugging */ - struct { - GLuint total; - GLubyte *start_ptr; - } emit; -#endif - - GLuint dirty_state; - GLuint reserved_space; -}; - -struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context - *intel); - -void intel_batchbuffer_free(struct intel_batchbuffer *batch); - - -void _intel_batchbuffer_flush(struct intel_batchbuffer *batch, +void _intel_batchbuffer_flush(struct intel_context *intel, const char *file, int line); -#define intel_batchbuffer_flush(batch) \ - _intel_batchbuffer_flush(batch, __FILE__, __LINE__) +#define intel_batchbuffer_flush(intel) \ + _intel_batchbuffer_flush(intel, __FILE__, __LINE__) -void intel_batchbuffer_reset(struct intel_batchbuffer *batch); /* Unlike bmBufferData, this currently requires the buffer be mapped. * Consider it a convenience function wrapping multple * intel_buffer_dword() calls. */ -void intel_batchbuffer_data(struct intel_batchbuffer *batch, - const void *data, GLuint bytes); - -void intel_batchbuffer_release_space(struct intel_batchbuffer *batch, - GLuint bytes); +void intel_batchbuffer_data(struct intel_context *intel, + const void *data, GLuint bytes, bool is_blit); -GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, +GLboolean intel_batchbuffer_emit_reloc(struct intel_context *intel, drm_intel_bo *buffer, uint32_t read_domains, uint32_t write_domain, uint32_t offset); -GLboolean intel_batchbuffer_emit_reloc_fenced(struct intel_batchbuffer *batch, +GLboolean intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel, drm_intel_bo *buffer, uint32_t read_domains, uint32_t write_domain, uint32_t offset); -void intel_batchbuffer_emit_mi_flush(struct intel_batchbuffer *batch); +void intel_batchbuffer_emit_mi_flush(struct intel_context *intel); static INLINE uint32_t float_as_int(float f) { @@ -89,83 +56,93 @@ static INLINE uint32_t float_as_int(float f) * work... */ static INLINE GLint -intel_batchbuffer_space(struct intel_batchbuffer *batch) +intel_batchbuffer_space(struct intel_context *intel) { - return (batch->state_batch_offset - batch->reserved_space) - - (batch->ptr - batch->map); + return (intel->batch.state_batch_offset - intel->batch.reserved_space) - intel->batch.used*4; } static INLINE void -intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword) +intel_batchbuffer_emit_dword(struct intel_context *intel, GLuint dword) { #ifdef DEBUG - assert(intel_batchbuffer_space(batch) >= 4); + assert(intel_batchbuffer_space(intel) >= 4); #endif - *(GLuint *) (batch->ptr) = dword; - batch->ptr += 4; + intel->batch.map[intel->batch.used++] = dword; } static INLINE void -intel_batchbuffer_emit_float(struct intel_batchbuffer *batch, float f) +intel_batchbuffer_emit_float(struct intel_context *intel, float f) { - intel_batchbuffer_emit_dword(batch, float_as_int(f)); + intel_batchbuffer_emit_dword(intel, float_as_int(f)); } static INLINE void -intel_batchbuffer_require_space(struct intel_batchbuffer *batch, - GLuint sz) +intel_batchbuffer_require_space(struct intel_context *intel, + GLuint sz, int is_blit) { + + if (intel->gen >= 6 && + intel->batch.is_blit != is_blit && intel->batch.used) { + intel_batchbuffer_flush(intel); + } + + intel->batch.is_blit = is_blit; + #ifdef DEBUG - assert(sz < batch->size - 8); + assert(sz < sizeof(intel->batch.map) - BATCH_RESERVED); #endif - if (intel_batchbuffer_space(batch) < sz) - intel_batchbuffer_flush(batch); + if (intel_batchbuffer_space(intel) < sz) + intel_batchbuffer_flush(intel); } static INLINE void -intel_batchbuffer_begin(struct intel_batchbuffer *batch, int n) +intel_batchbuffer_begin(struct intel_context *intel, int n, bool is_blit) { - intel_batchbuffer_require_space(batch, n * 4); + intel_batchbuffer_require_space(intel, n * 4, is_blit); + + intel->batch.emit = intel->batch.used; #ifdef DEBUG - assert(batch->map); - assert(batch->emit.start_ptr == NULL); - batch->emit.total = n * 4; - batch->emit.start_ptr = batch->ptr; + intel->batch.total = n; #endif } static INLINE void -intel_batchbuffer_advance(struct intel_batchbuffer *batch) +intel_batchbuffer_advance(struct intel_context *intel) { #ifdef DEBUG - unsigned int _n = batch->ptr - batch->emit.start_ptr; - assert(batch->emit.start_ptr != NULL); - if (_n != batch->emit.total) { + struct intel_batchbuffer *batch = &intel->batch; + unsigned int _n = batch->used - batch->emit; + assert(batch->total != 0); + if (_n != batch->total) { fprintf(stderr, "ADVANCE_BATCH: %d of %d dwords emitted\n", - _n, batch->emit.total); + _n, batch->total); abort(); } - batch->emit.start_ptr = NULL; + batch->total = 0; #endif } +void intel_batchbuffer_cached_advance(struct intel_context *intel); + /* Here are the crusty old macros, to be removed: */ #define BATCH_LOCALS -#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel->batch, n) -#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel->batch, d) -#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel->batch,f) +#define BEGIN_BATCH(n) intel_batchbuffer_begin(intel, n, false) +#define BEGIN_BATCH_BLT(n) intel_batchbuffer_begin(intel, n, true) +#define OUT_BATCH(d) intel_batchbuffer_emit_dword(intel, d) +#define OUT_BATCH_F(f) intel_batchbuffer_emit_float(intel,f) #define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ - intel_batchbuffer_emit_reloc(intel->batch, buf, \ + intel_batchbuffer_emit_reloc(intel, buf, \ read_domains, write_domain, delta); \ } while (0) #define OUT_RELOC_FENCED(buf, read_domains, write_domain, delta) do { \ - intel_batchbuffer_emit_reloc_fenced(intel->batch, buf, \ + intel_batchbuffer_emit_reloc_fenced(intel, buf, \ read_domains, write_domain, delta); \ } while (0) -#define ADVANCE_BATCH() intel_batchbuffer_advance(intel->batch); +#define ADVANCE_BATCH() intel_batchbuffer_advance(intel); +#define CACHED_BATCH() intel_batchbuffer_cached_advance(intel); #endif diff --git a/src/mesa/drivers/dri/intel/intel_blit.c b/src/mesa/drivers/dri/intel/intel_blit.c index c2917e9b07..e1ab7f1637 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.c +++ b/src/mesa/drivers/dri/intel/intel_blit.c @@ -38,6 +38,7 @@ #include "intel_reg.h" #include "intel_regions.h" #include "intel_batchbuffer.h" +#include "intel_mipmap_tree.h" #define FILE_DEBUG_FLAG DEBUG_BLIT @@ -107,10 +108,6 @@ intelEmitCopyBlit(struct intel_context *intel, drm_intel_bo *aper_array[3]; BATCH_LOCALS; - /* Blits are in a different ringbuffer so we don't use them. */ - if (intel->gen >= 6) - return GL_FALSE; - if (dst_tiling != I915_TILING_NONE) { if (dst_offset & 4095) return GL_FALSE; @@ -126,12 +123,12 @@ intelEmitCopyBlit(struct intel_context *intel, /* do space check before going any further */ do { - aper_array[0] = intel->batch->buf; + aper_array[0] = intel->batch.bo; aper_array[1] = dst_buffer; aper_array[2] = src_buffer; if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); pass++; } else break; @@ -140,7 +137,7 @@ intelEmitCopyBlit(struct intel_context *intel, if (pass >= 2) return GL_FALSE; - intel_batchbuffer_require_space(intel->batch, 8 * 4); + intel_batchbuffer_require_space(intel, 8 * 4, true); DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", __FUNCTION__, src_buffer, src_pitch, src_offset, src_x, src_y, @@ -181,7 +178,7 @@ intelEmitCopyBlit(struct intel_context *intel, assert(dst_x < dst_x2); assert(dst_y < dst_y2); - BEGIN_BATCH(8); + BEGIN_BATCH_BLT(8); OUT_BATCH(CMD); OUT_BATCH(BR13 | (uint16_t)dst_pitch); OUT_BATCH((dst_y << 16) | dst_x); @@ -196,7 +193,7 @@ intelEmitCopyBlit(struct intel_context *intel, src_offset); ADVANCE_BATCH(); - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel); return GL_TRUE; } @@ -209,7 +206,7 @@ intelEmitCopyBlit(struct intel_context *intel, * which we're clearing with triangles. * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear */ -void +GLbitfield intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) { struct intel_context *intel = intel_context(ctx); @@ -217,11 +214,9 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) GLuint clear_depth; GLboolean all; GLint cx, cy, cw, ch; + GLbitfield fail_mask = 0; BATCH_LOCALS; - /* Blits are in a different ringbuffer so we don't use them. */ - assert(intel->gen < 6); - /* * Compute values for clearing the buffers. */ @@ -242,7 +237,7 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) ch = fb->_Ymax - fb->_Ymin; if (cw == 0 || ch == 0) - return; + return 0; GLuint buf; all = (cw == fb->Width && ch == fb->Height); @@ -338,9 +333,9 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) clear[3], clear[3]); break; default: - _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n", - irb->Base.Format); - clear_val = 0; + fail_mask |= bufBit; + mask &= ~bufBit; + continue; } } @@ -348,15 +343,15 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) assert(y1 < y2); /* do space check before going any further */ - aper_array[0] = intel->batch->buf; + aper_array[0] = intel->batch.bo; aper_array[1] = write_buffer; if (drm_intel_bufmgr_check_aperture_space(aper_array, ARRAY_SIZE(aper_array)) != 0) { - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); } - BEGIN_BATCH(6); + BEGIN_BATCH_BLT(6); OUT_BATCH(CMD); OUT_BATCH(BR13); OUT_BATCH((y1 << 16) | x1); @@ -368,13 +363,15 @@ intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) ADVANCE_BATCH(); if (intel->always_flush_cache) - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel); if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); else mask &= ~bufBit; /* turn off bit, for faster loop exit */ } + + return fail_mask; } GLboolean @@ -393,10 +390,6 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, int dwords = ALIGN(src_size, 8) / 4; uint32_t opcode, br13, blit_cmd; - /* Blits are in a different ringbuffer so we don't use them. */ - if (intel->gen >= 6) - return GL_FALSE; - if (dst_tiling != I915_TILING_NONE) { if (dst_offset & 4095) return GL_FALSE; @@ -417,10 +410,10 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); - intel_batchbuffer_require_space( intel->batch, - (8 * 4) + - (3 * 4) + - dwords * 4 ); + intel_batchbuffer_require_space(intel, + (8 * 4) + + (3 * 4) + + dwords * 4, true); opcode = XY_SETUP_BLT_CMD; if (cpp == 4) @@ -439,7 +432,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, if (dst_tiling != I915_TILING_NONE) blit_cmd |= XY_DST_TILED; - BEGIN_BATCH(8 + 3); + BEGIN_BATCH_BLT(8 + 3); OUT_BATCH(opcode); OUT_BATCH(br13); OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ @@ -456,11 +449,9 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel, OUT_BATCH(((y + h) << 16) | (x + w)); ADVANCE_BATCH(); - intel_batchbuffer_data( intel->batch, - src_bits, - dwords * 4 ); + intel_batchbuffer_data(intel, src_bits, dwords * 4, true); - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel); return GL_TRUE; } @@ -480,9 +471,6 @@ intel_emit_linear_blit(struct intel_context *intel, GLuint pitch, height; GLboolean ok; - /* Blits are in a different ringbuffer so we don't use them. */ - assert(intel->gen < 6); - /* The pitch given to the GPU must be DWORD aligned, and * we want width to match pitch. Max width is (1 << 15 - 1), * rounding that down to the nearest DWORD is 1 << 15 - 4 @@ -514,3 +502,81 @@ intel_emit_linear_blit(struct intel_context *intel, assert(ok); } } + +/** + * Used to initialize the alpha value of an ARGB8888 teximage after + * loading it from an XRGB8888 source. + * + * This is very common with glCopyTexImage2D(). + */ +void +intel_set_teximage_alpha_to_one(struct gl_context *ctx, + struct intel_texture_image *intel_image) +{ + struct intel_context *intel = intel_context(ctx); + unsigned int image_x, image_y; + uint32_t x1, y1, x2, y2; + uint32_t BR13, CMD; + int pitch, cpp; + drm_intel_bo *aper_array[2]; + struct intel_region *region = intel_image->mt->region; + BATCH_LOCALS; + + assert(intel_image->base.TexFormat == MESA_FORMAT_ARGB8888); + + /* get dest x/y in destination texture */ + intel_miptree_get_image_offset(intel_image->mt, + intel_image->level, + intel_image->face, + 0, + &image_x, &image_y); + + x1 = image_x; + y1 = image_y; + x2 = image_x + intel_image->base.Width; + y2 = image_y + intel_image->base.Height; + + pitch = region->pitch; + cpp = region->cpp; + + DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", + __FUNCTION__, + intel_image->mt->region->buffer, (pitch * cpp), + x1, y1, x2 - x1, y2 - y1); + + BR13 = br13_for_cpp(cpp) | 0xf0 << 16; + CMD = XY_COLOR_BLT_CMD; + CMD |= XY_BLT_WRITE_ALPHA; + + assert(region->tiling != I915_TILING_Y); + +#ifndef I915 + if (region->tiling != I915_TILING_NONE) { + CMD |= XY_DST_TILED; + pitch /= 4; + } +#endif + BR13 |= (pitch * cpp); + + /* do space check before going any further */ + aper_array[0] = intel->batch.bo; + aper_array[1] = region->buffer; + + if (drm_intel_bufmgr_check_aperture_space(aper_array, + ARRAY_SIZE(aper_array)) != 0) { + intel_batchbuffer_flush(intel); + } + + BEGIN_BATCH_BLT(6); + OUT_BATCH(CMD); + OUT_BATCH(BR13); + OUT_BATCH((y1 << 16) | x1); + OUT_BATCH((y2 << 16) | x2); + OUT_RELOC_FENCED(region->buffer, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH(0xffffffff); /* white, but only alpha gets written */ + ADVANCE_BATCH(); + + intel_batchbuffer_emit_mi_flush(intel); +} diff --git a/src/mesa/drivers/dri/intel/intel_blit.h b/src/mesa/drivers/dri/intel/intel_blit.h index 0163146573..88322c7b49 100644 --- a/src/mesa/drivers/dri/intel/intel_blit.h +++ b/src/mesa/drivers/dri/intel/intel_blit.h @@ -33,7 +33,7 @@ extern void intelCopyBuffer(const __DRIdrawable * dpriv, const drm_clip_rect_t * rect); -extern void intelClearWithBlit(struct gl_context * ctx, GLbitfield mask); +extern GLbitfield intelClearWithBlit(struct gl_context * ctx, GLbitfield mask); GLboolean intelEmitCopyBlit(struct intel_context *intel, @@ -69,5 +69,7 @@ void intel_emit_linear_blit(struct intel_context *intel, drm_intel_bo *src_bo, unsigned int src_offset, unsigned int size); +void intel_set_teximage_alpha_to_one(struct gl_context *ctx, + struct intel_texture_image *intel_image); #endif diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c index 87da60a771..439d6fc824 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c @@ -27,6 +27,7 @@ #include "main/imports.h" +#include "main/mfeatures.h" #include "main/mtypes.h" #include "main/macros.h" #include "main/bufferobj.h" @@ -52,6 +53,15 @@ intel_bufferobj_alloc_buffer(struct intel_context *intel, intel_obj->Base.Size, 64); } +static void +release_buffer(struct intel_buffer_object *intel_obj) +{ + drm_intel_bo_unreference(intel_obj->buffer); + intel_obj->buffer = NULL; + intel_obj->offset = 0; + intel_obj->source = 0; +} + /** * There is some duplication between mesa's bufferobjects and our * bufmgr buffers. Both have an integer handle and a hashtable to @@ -80,8 +90,7 @@ intel_bufferobj_release_region(struct intel_context *intel, intel_obj->region->pbo = NULL; intel_obj->region = NULL; - drm_intel_bo_unreference(intel_obj->buffer); - intel_obj->buffer = NULL; + release_buffer(intel_obj); } /* Break the COW tie to the region. Both the pbo and the region end @@ -119,10 +128,8 @@ intel_bufferobj_free(struct gl_context * ctx, struct gl_buffer_object *obj) if (intel_obj->region) { intel_bufferobj_release_region(intel, intel_obj); } - else if (intel_obj->buffer) { - drm_intel_bo_unreference(intel_obj->buffer); - } + drm_intel_bo_unreference(intel_obj->buffer); free(intel_obj); } @@ -153,19 +160,22 @@ intel_bufferobj_data(struct gl_context * ctx, if (intel_obj->region) intel_bufferobj_release_region(intel, intel_obj); - if (intel_obj->buffer != NULL) { - drm_intel_bo_unreference(intel_obj->buffer); - intel_obj->buffer = NULL; - } + if (intel_obj->buffer != NULL) + release_buffer(intel_obj); + free(intel_obj->sys_buffer); intel_obj->sys_buffer = NULL; if (size != 0) { + if (usage == GL_DYNAMIC_DRAW #ifdef I915 - /* On pre-965, stick VBOs in system memory, as we're always doing swtnl - * with their contents anyway. - */ - if (target == GL_ARRAY_BUFFER || target == GL_ELEMENT_ARRAY_BUFFER) { + /* On pre-965, stick VBOs in system memory, as we're always doing + * swtnl with their contents anyway. + */ + || target == GL_ARRAY_BUFFER || target == GL_ELEMENT_ARRAY_BUFFER +#endif + ) + { intel_obj->sys_buffer = malloc(size); if (intel_obj->sys_buffer != NULL) { if (data != NULL) @@ -173,7 +183,6 @@ intel_bufferobj_data(struct gl_context * ctx, return GL_TRUE; } } -#endif intel_bufferobj_alloc_buffer(intel, intel_obj); if (!intel_obj->buffer) return GL_FALSE; @@ -201,6 +210,7 @@ intel_bufferobj_subdata(struct gl_context * ctx, { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + bool busy; if (size == 0) return; @@ -210,34 +220,53 @@ intel_bufferobj_subdata(struct gl_context * ctx, if (intel_obj->region) intel_bufferobj_cow(intel, intel_obj); - if (intel_obj->sys_buffer) - memcpy((char *)intel_obj->sys_buffer + offset, data, size); - else { - /* Flush any existing batchbuffer that might reference this data. */ - if (intel->gen < 6) { - if (drm_intel_bo_busy(intel_obj->buffer) || - drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) { - drm_intel_bo *temp_bo; + /* If we have a single copy in system memory, update that */ + if (intel_obj->sys_buffer) { + if (intel_obj->source) + release_buffer(intel_obj); - temp_bo = drm_intel_bo_alloc(intel->bufmgr, "subdata temp", size, 64); + if (intel_obj->buffer == NULL) { + memcpy((char *)intel_obj->sys_buffer + offset, data, size); + return; + } - drm_intel_bo_subdata(temp_bo, 0, size, data); + free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; + } - intel_emit_linear_blit(intel, - intel_obj->buffer, offset, - temp_bo, 0, - size); + /* Otherwise we need to update the copy in video memory. */ + busy = + drm_intel_bo_busy(intel_obj->buffer) || + drm_intel_bo_references(intel->batch.bo, intel_obj->buffer); - drm_intel_bo_unreference(temp_bo); - } else { - drm_intel_bo_subdata(intel_obj->buffer, offset, size, data); - } + /* replace the current busy bo with fresh data */ + if (busy && size == intel_obj->Base.Size) { + drm_intel_bo_unreference(intel_obj->buffer); + intel_bufferobj_alloc_buffer(intel, intel_obj); + drm_intel_bo_subdata(intel_obj->buffer, 0, size, data); + } else if (intel->gen < 6) { + if (busy) { + drm_intel_bo *temp_bo; + + temp_bo = drm_intel_bo_alloc(intel->bufmgr, "subdata temp", size, 64); + + drm_intel_bo_subdata(temp_bo, 0, size, data); + + intel_emit_linear_blit(intel, + intel_obj->buffer, offset, + temp_bo, 0, + size); + + drm_intel_bo_unreference(temp_bo); } else { - if (drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) { - intel_batchbuffer_flush(intel->batch); - } drm_intel_bo_subdata(intel_obj->buffer, offset, size, data); } + } else { + /* Can't use the blit to modify the buffer in the middle of batch. */ + if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) { + intel_batchbuffer_flush(intel); + } + drm_intel_bo_subdata(intel_obj->buffer, offset, size, data); } } @@ -279,14 +308,23 @@ intel_bufferobj_map(struct gl_context * ctx, assert(intel_obj); if (intel_obj->sys_buffer) { - obj->Pointer = intel_obj->sys_buffer; - obj->Length = obj->Size; - obj->Offset = 0; - return obj->Pointer; + if (!read_only && intel_obj->source) { + release_buffer(intel_obj); + } + + if (!intel_obj->buffer || intel_obj->source) { + obj->Pointer = intel_obj->sys_buffer; + obj->Length = obj->Size; + obj->Offset = 0; + return obj->Pointer; + } + + free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; } /* Flush any existing batchbuffer that might reference this data. */ - if (drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) + if (drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) intel_flush(ctx); if (intel_obj->region) @@ -335,6 +373,7 @@ intel_bufferobj_map_range(struct gl_context * ctx, { struct intel_context *intel = intel_context(ctx); struct intel_buffer_object *intel_obj = intel_buffer_object(obj); + GLboolean read_only = (access == GL_READ_ONLY_ARB); assert(intel_obj); @@ -346,8 +385,16 @@ intel_bufferobj_map_range(struct gl_context * ctx, obj->AccessFlags = access; if (intel_obj->sys_buffer) { - obj->Pointer = intel_obj->sys_buffer + offset; - return obj->Pointer; + if (!read_only && intel_obj->source) + release_buffer(intel_obj); + + if (!intel_obj->buffer || intel_obj->source) { + obj->Pointer = intel_obj->sys_buffer + offset; + return obj->Pointer; + } + + free(intel_obj->sys_buffer); + intel_obj->sys_buffer = NULL; } if (intel_obj->region) @@ -358,7 +405,7 @@ intel_bufferobj_map_range(struct gl_context * ctx, * syncing. */ if (!(access & GL_MAP_UNSYNCHRONIZED_BIT) && - drm_intel_bo_references(intel->batch->buf, intel_obj->buffer)) + drm_intel_bo_references(intel->batch.bo, intel_obj->buffer)) intel_flush(ctx); if (intel_obj->buffer == NULL) { @@ -373,8 +420,7 @@ intel_bufferobj_map_range(struct gl_context * ctx, (access & GL_MAP_INVALIDATE_BUFFER_BIT) && drm_intel_bo_busy(intel_obj->buffer)) { drm_intel_bo_unreference(intel_obj->buffer); - intel_obj->buffer = drm_intel_bo_alloc(intel->bufmgr, "bufferobj", - intel_obj->Base.Size, 64); + intel_bufferobj_alloc_buffer(intel, intel_obj); } /* If the user is mapping a range of an active buffer object but @@ -472,7 +518,7 @@ intel_bufferobj_unmap(struct gl_context * ctx, * flush. Once again, we wish for a domain tracker in libdrm to cover * usage inside of a batchbuffer. */ - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel); free(intel_obj->range_map_buffer); intel_obj->range_map_buffer = NULL; } else if (intel_obj->range_map_bo != NULL) { @@ -492,7 +538,7 @@ intel_bufferobj_unmap(struct gl_context * ctx, * flush. Once again, we wish for a domain tracker in libdrm to cover * usage inside of a batchbuffer. */ - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel); drm_intel_bo_unreference(intel_obj->range_map_bo); intel_obj->range_map_bo = NULL; @@ -512,7 +558,8 @@ intel_bufferobj_unmap(struct gl_context * ctx, drm_intel_bo * intel_bufferobj_buffer(struct intel_context *intel, - struct intel_buffer_object *intel_obj, GLuint flag) + struct intel_buffer_object *intel_obj, + GLuint flag) { if (intel_obj->region) { if (flag == INTEL_WRITE_PART) @@ -523,23 +570,169 @@ intel_bufferobj_buffer(struct intel_context *intel, } } - if (intel_obj->buffer == NULL) { - void *sys_buffer = intel_obj->sys_buffer; + if (intel_obj->source) + release_buffer(intel_obj); - /* only one of buffer and sys_buffer could be non-NULL */ + if (intel_obj->buffer == NULL) { intel_bufferobj_alloc_buffer(intel, intel_obj); - intel_obj->sys_buffer = NULL; + drm_intel_bo_subdata(intel_obj->buffer, + 0, intel_obj->Base.Size, + intel_obj->sys_buffer); - intel_bufferobj_subdata(&intel->ctx, - GL_ARRAY_BUFFER_ARB, - 0, - intel_obj->Base.Size, - sys_buffer, - &intel_obj->Base); - free(sys_buffer); + free(intel_obj->sys_buffer); intel_obj->sys_buffer = NULL; + intel_obj->offset = 0; + } + + return intel_obj->buffer; +} + +#define INTEL_UPLOAD_SIZE (64*1024) + +void +intel_upload_finish(struct intel_context *intel) +{ + if (!intel->upload.bo) + return; + + if (intel->upload.buffer_len) { + drm_intel_bo_subdata(intel->upload.bo, + intel->upload.buffer_offset, + intel->upload.buffer_len, + intel->upload.buffer); + intel->upload.buffer_len = 0; + } + + drm_intel_bo_unreference(intel->upload.bo); + intel->upload.bo = NULL; +} + +static void wrap_buffers(struct intel_context *intel, GLuint size) +{ + intel_upload_finish(intel); + + if (size < INTEL_UPLOAD_SIZE) + size = INTEL_UPLOAD_SIZE; + + intel->upload.bo = drm_intel_bo_alloc(intel->bufmgr, "upload", size, 0); + intel->upload.offset = 0; +} + +void intel_upload_data(struct intel_context *intel, + const void *ptr, GLuint size, GLuint align, + drm_intel_bo **return_bo, + GLuint *return_offset) +{ + GLuint base, delta; + + base = (intel->upload.offset + align - 1) / align * align; + if (intel->upload.bo == NULL || base + size > intel->upload.bo->size) { + wrap_buffers(intel, size); + base = 0; + } + + drm_intel_bo_reference(intel->upload.bo); + *return_bo = intel->upload.bo; + *return_offset = base; + + delta = base - intel->upload.offset; + if (intel->upload.buffer_len && + intel->upload.buffer_len + delta + size > sizeof(intel->upload.buffer)) + { + drm_intel_bo_subdata(intel->upload.bo, + intel->upload.buffer_offset, + intel->upload.buffer_len, + intel->upload.buffer); + intel->upload.buffer_len = 0; + } + + if (size < sizeof(intel->upload.buffer)) + { + if (intel->upload.buffer_len == 0) + intel->upload.buffer_offset = base; + else + intel->upload.buffer_len += delta; + + memcpy(intel->upload.buffer + intel->upload.buffer_len, ptr, size); + intel->upload.buffer_len += size; + } + else + { + drm_intel_bo_subdata(intel->upload.bo, base, size, ptr); + } + + intel->upload.offset = base + size; +} + +void *intel_upload_map(struct intel_context *intel, GLuint size, GLuint align) +{ + GLuint base, delta; + char *ptr; + + base = (intel->upload.offset + align - 1) / align * align; + if (intel->upload.bo == NULL || base + size > intel->upload.bo->size) { + wrap_buffers(intel, size); + base = 0; + } + + delta = base - intel->upload.offset; + if (intel->upload.buffer_len && + intel->upload.buffer_len + delta + size > sizeof(intel->upload.buffer)) + { + drm_intel_bo_subdata(intel->upload.bo, + intel->upload.buffer_offset, + intel->upload.buffer_len, + intel->upload.buffer); + intel->upload.buffer_len = 0; + } + + if (size <= sizeof(intel->upload.buffer)) { + if (intel->upload.buffer_len == 0) + intel->upload.buffer_offset = base; + else + intel->upload.buffer_len += delta; + + ptr = intel->upload.buffer + intel->upload.buffer_len; + intel->upload.buffer_len += size; + } else + ptr = malloc(size); + + return ptr; +} + +void intel_upload_unmap(struct intel_context *intel, + const void *ptr, GLuint size, GLuint align, + drm_intel_bo **return_bo, + GLuint *return_offset) +{ + GLuint base; + + base = (intel->upload.offset + align - 1) / align * align; + if (size > sizeof(intel->upload.buffer)) { + drm_intel_bo_subdata(intel->upload.bo, base, size, ptr); + free((void*)ptr); + } + + drm_intel_bo_reference(intel->upload.bo); + *return_bo = intel->upload.bo; + *return_offset = base; + + intel->upload.offset = base + size; +} + +drm_intel_bo * +intel_bufferobj_source(struct intel_context *intel, + struct intel_buffer_object *intel_obj, + GLuint align, GLuint *offset) +{ + if (intel_obj->buffer == NULL) { + intel_upload_data(intel, + intel_obj->sys_buffer, intel_obj->Base.Size, align, + &intel_obj->buffer, &intel_obj->offset); + intel_obj->source = 1; } + *offset = intel_obj->offset; return intel_obj->buffer; } @@ -554,6 +747,7 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx, struct intel_buffer_object *intel_src = intel_buffer_object(src); struct intel_buffer_object *intel_dst = intel_buffer_object(dst); drm_intel_bo *src_bo, *dst_bo; + GLuint src_offset; if (size == 0) return; @@ -566,7 +760,7 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx, if (src == dst) { char *ptr = intel_bufferobj_map(ctx, GL_COPY_WRITE_BUFFER, GL_READ_WRITE, dst); - memcpy(ptr + write_offset, ptr + read_offset, size); + memmove(ptr + write_offset, ptr + read_offset, size); intel_bufferobj_unmap(ctx, GL_COPY_WRITE_BUFFER, dst); } else { const char *src_ptr; @@ -588,18 +782,18 @@ intel_bufferobj_copy_subdata(struct gl_context *ctx, /* Otherwise, we have real BOs, so blit them. */ dst_bo = intel_bufferobj_buffer(intel, intel_dst, INTEL_WRITE_PART); - src_bo = intel_bufferobj_buffer(intel, intel_src, INTEL_READ); + src_bo = intel_bufferobj_source(intel, intel_src, 64, &src_offset); intel_emit_linear_blit(intel, dst_bo, write_offset, - src_bo, read_offset, size); + src_bo, read_offset + src_offset, size); /* Since we've emitted some blits to buffers that will (likely) be used * in rendering operations in other cache domains in this batch, emit a * flush. Once again, we wish for a domain tracker in libdrm to cover * usage inside of a batchbuffer. */ - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel); } #if FEATURE_APPLE_object_purgeable diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.h b/src/mesa/drivers/dri/intel/intel_buffer_objects.h index b15c192106..81ee21f062 100644 --- a/src/mesa/drivers/dri/intel/intel_buffer_objects.h +++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.h @@ -42,6 +42,8 @@ struct intel_buffer_object { struct gl_buffer_object Base; drm_intel_bo *buffer; /* the low-level buffer manager's buffer handle */ + GLuint offset; /* any offset into that buffer */ + /** System memory buffer data, if not using a BO to store the data. */ void *sys_buffer; @@ -55,6 +57,7 @@ struct intel_buffer_object GLsizei range_map_size; GLboolean mapped_gtt; + GLboolean source; }; @@ -63,8 +66,26 @@ struct intel_buffer_object drm_intel_bo *intel_bufferobj_buffer(struct intel_context *intel, struct intel_buffer_object *obj, GLuint flag); +drm_intel_bo *intel_bufferobj_source(struct intel_context *intel, + struct intel_buffer_object *obj, + GLuint align, + GLuint *offset); + +void intel_upload_data(struct intel_context *intel, + const void *ptr, GLuint size, GLuint align, + drm_intel_bo **return_bo, + GLuint *return_offset); + +void *intel_upload_map(struct intel_context *intel, + GLuint size, GLuint align); +void intel_upload_unmap(struct intel_context *intel, + const void *ptr, GLuint size, GLuint align, + drm_intel_bo **return_bo, + GLuint *return_offset); + +void intel_upload_finish(struct intel_context *intel); -/* Hook the bufferobject implementation into mesa: +/* Hook the bufferobject implementation into mesa: */ void intelInitBufferObjectFuncs(struct dd_function_table *functions); diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h index 4fecdbed20..4ff9140d56 100644 --- a/src/mesa/drivers/dri/intel/intel_chipset.h +++ b/src/mesa/drivers/dri/intel/intel_chipset.h @@ -133,6 +133,10 @@ devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \ devid == PCI_CHIP_SANDYBRIDGE_S) +#define IS_GT1(devid) (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ + devid == PCI_CHIP_SANDYBRIDGE_S) + #define IS_965(devid) (IS_GEN4(devid) || \ IS_G4X(devid) || \ IS_GEN5(devid) || \ diff --git a/src/mesa/drivers/dri/intel/intel_clear.c b/src/mesa/drivers/dri/intel/intel_clear.c index fa451f0045..82d29e7671 100644 --- a/src/mesa/drivers/dri/intel/intel_clear.c +++ b/src/mesa/drivers/dri/intel/intel_clear.c @@ -85,6 +85,8 @@ intelClear(struct gl_context *ctx, GLbitfield mask) GLbitfield blit_mask = 0; GLbitfield swrast_mask = 0; struct gl_framebuffer *fb = ctx->DrawBuffer; + struct intel_renderbuffer *irb; + int i; if (mask & (BUFFER_BIT_FRONT_LEFT | BUFFER_BIT_FRONT_RIGHT)) { intel->front_buffer_dirty = GL_TRUE; @@ -93,6 +95,22 @@ intelClear(struct gl_context *ctx, GLbitfield mask) if (0) fprintf(stderr, "%s\n", __FUNCTION__); + /* Get SW clears out of the way: Anything without an intel_renderbuffer */ + for (i = 0; i < BUFFER_COUNT; i++) { + if (!(mask & (1 << i))) + continue; + + irb = intel_get_renderbuffer(fb, i); + if (unlikely(!irb)) { + swrast_mask |= (1 << i); + mask &= ~(1 << i); + } + } + if (unlikely(swrast_mask)) { + debug_mask("swrast", swrast_mask); + _swrast_Clear(ctx, swrast_mask); + } + /* HW color buffers (front, back, aux, generic FBO, etc) */ if (colorMask == ~0) { /* clear all R,G,B,A */ @@ -151,44 +169,18 @@ intelClear(struct gl_context *ctx, GLbitfield mask) } } - if (intel->gen >= 6) { - /* Blits are in a different ringbuffer so we don't use them. */ - tri_mask |= blit_mask; - blit_mask = 0; - } - - /* SW fallback clearing */ - swrast_mask = mask & ~tri_mask & ~blit_mask; - - { - /* look for non-Intel renderbuffers (clear them with swrast) */ - GLbitfield blit_or_tri = blit_mask | tri_mask; - while (blit_or_tri) { - GLuint i = _mesa_ffs(blit_or_tri) - 1; - GLbitfield bufBit = 1 << i; - if (!fb->Attachment[i].Renderbuffer->ClassID) { - blit_mask &= ~bufBit; - tri_mask &= ~bufBit; - swrast_mask |= bufBit; - } - blit_or_tri ^= bufBit; - } - } + /* Anything left, just use tris */ + tri_mask |= mask & ~blit_mask; if (blit_mask) { debug_mask("blit", blit_mask); - intelClearWithBlit(ctx, blit_mask); + tri_mask |= intelClearWithBlit(ctx, blit_mask); } if (tri_mask) { debug_mask("tri", tri_mask); _mesa_meta_Clear(&intel->ctx, tri_mask); } - - if (swrast_mask) { - debug_mask("swrast", swrast_mask); - _swrast_Clear(ctx, swrast_mask); - } } diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 968f3c6e46..7c422c4c3a 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -496,11 +496,9 @@ intel_prepare_render(struct intel_context *intel) * the swap, and getting our hands on that doesn't seem worth it, * so we just us the first batch we emitted after the last swap. */ - if (intel->need_throttle && intel->first_post_swapbuffers_batch) { - drm_intel_bo_wait_rendering(intel->first_post_swapbuffers_batch); - drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); - intel->first_post_swapbuffers_batch = NULL; - intel->need_throttle = GL_FALSE; + if (intel->need_throttle) { + drmCommandNone(intel->driFd, DRM_I915_GEM_THROTTLE); + intel->need_throttle = GL_FALSE; } } @@ -513,7 +511,7 @@ intel_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei w, GLsizei h) if (intel->saved_viewport) intel->saved_viewport(ctx, x, y, w, h); - if (!intel->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) { + if (ctx->DrawBuffer->Name == 0) { dri2InvalidateDrawable(driContext->driDrawablePriv); dri2InvalidateDrawable(driContext->driReadablePriv); } @@ -580,8 +578,8 @@ intel_flush(struct gl_context *ctx) if (intel->gen < 4) INTEL_FIREVERTICES(intel); - if (intel->batch->map != intel->batch->ptr) - intel_batchbuffer_flush(intel->batch); + if (intel->batch.used) + intel_batchbuffer_flush(intel); } static void @@ -591,7 +589,8 @@ intel_glFlush(struct gl_context *ctx) intel_flush(ctx); intel_flush_front(ctx); - intel->need_throttle = GL_TRUE; + if (intel->is_front_buffer_rendering) + intel->need_throttle = GL_TRUE; } void @@ -608,7 +607,7 @@ intelFinish(struct gl_context * ctx) irb = intel_renderbuffer(fb->_ColorDrawBuffers[i]); - if (irb && irb->region) + if (irb && irb->region && irb->region->buffer) drm_intel_bo_wait_rendering(irb->region->buffer); } if (fb->_DepthBuffer) { @@ -669,8 +668,8 @@ intelInitContext(struct intel_context *intel, mesaVis = &visual; } - if (!_mesa_initialize_context_for_api(&intel->ctx, api, mesaVis, shareCtx, - functions, (void *) intel)) { + if (!_mesa_initialize_context(&intel->ctx, api, mesaVis, shareCtx, + functions, (void *) intel)) { printf("%s: failed to init mesa context\n", __FUNCTION__); return GL_FALSE; } @@ -708,12 +707,75 @@ intelInitContext(struct intel_context *intel, } } + memset(&ctx->TextureFormatSupported, 0, + sizeof(ctx->TextureFormatSupported)); + ctx->TextureFormatSupported[MESA_FORMAT_ARGB8888] = GL_TRUE; + if (intel->has_xrgb_textures) + ctx->TextureFormatSupported[MESA_FORMAT_XRGB8888] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_ARGB4444] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_ARGB1555] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RGB565] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_L8] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_A8] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_I8] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_AL88] = GL_TRUE; + if (intel->gen >= 4) + ctx->TextureFormatSupported[MESA_FORMAT_AL1616] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_S8_Z24] = GL_TRUE; + /* + * This was disabled in initial FBO enabling to avoid combinations + * of depth+stencil that wouldn't work together. We since decided + * that it was OK, since it's up to the app to come up with the + * combo that actually works, so this can probably be re-enabled. + */ + /* + ctx->TextureFormatSupported[MESA_FORMAT_Z16] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_Z24] = GL_TRUE; + */ + + /* ctx->Extensions.MESA_ycbcr_texture */ + ctx->TextureFormatSupported[MESA_FORMAT_YCBCR] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_YCBCR_REV] = GL_TRUE; + + /* GL_3DFX_texture_compression_FXT1 */ + ctx->TextureFormatSupported[MESA_FORMAT_RGB_FXT1] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RGBA_FXT1] = GL_TRUE; + + /* GL_EXT_texture_compression_s3tc */ + ctx->TextureFormatSupported[MESA_FORMAT_RGB_DXT1] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT1] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT3] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RGBA_DXT5] = GL_TRUE; + +#ifndef I915 + /* GL_ARB_texture_rg */ + ctx->TextureFormatSupported[MESA_FORMAT_R8] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_R16] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RG88] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_RG1616] = GL_TRUE; + + ctx->TextureFormatSupported[MESA_FORMAT_DUDV8] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_SIGNED_RGBA8888_REV] = GL_TRUE; + + /* GL_EXT_texture_sRGB */ + ctx->TextureFormatSupported[MESA_FORMAT_SARGB8] = GL_TRUE; + if (intel->gen >= 5 || intel->is_g4x) + ctx->TextureFormatSupported[MESA_FORMAT_SRGB_DXT1] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_SRGBA_DXT1] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_SRGBA_DXT3] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_SRGBA_DXT5] = GL_TRUE; + if (intel->has_luminance_srgb) { + ctx->TextureFormatSupported[MESA_FORMAT_SL8] = GL_TRUE; + ctx->TextureFormatSupported[MESA_FORMAT_SLA8] = GL_TRUE; + } +#endif + driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache, sPriv->myNum, (intel->gen >= 4) ? "i965" : "i915"); - if (intelScreen->deviceID == PCI_CHIP_I865_G) + if (intel->gen < 4) intel->maxBatchSize = 4096; else - intel->maxBatchSize = BATCH_SZ; + intel->maxBatchSize = sizeof(intel->batch.map); intel->bufmgr = intelScreen->bufmgr; @@ -767,8 +829,8 @@ intelInitContext(struct intel_context *intel, */ _mesa_init_point(ctx); - meta_init_metaops(ctx, &intel->meta); if (intel->gen >= 4) { + ctx->Const.sRGBCapable = GL_TRUE; if (MAX_WIDTH > 8192) ctx->Const.MaxRenderbufferSize = 8192; } else { @@ -826,7 +888,7 @@ intelInitContext(struct intel_context *intel, if (INTEL_DEBUG & DEBUG_BUFMGR) dri_bufmgr_set_debug(intel->bufmgr, GL_TRUE); - intel->batch = intel_batchbuffer_alloc(intel); + intel_batchbuffer_reset(intel); intel_fbo_init(intel); @@ -874,8 +936,6 @@ intelDestroyContext(__DRIcontext * driContextPriv) _mesa_meta_free(&intel->ctx); - meta_destroy_metaops(&intel->meta); - intel->vtbl.destroy(intel); _swsetup_DestroyContext(&intel->ctx); @@ -885,15 +945,12 @@ intelDestroyContext(__DRIcontext * driContextPriv) _swrast_DestroyContext(&intel->ctx); intel->Fallback = 0x0; /* don't call _swrast_Flush later */ - intel_batchbuffer_free(intel->batch); - intel->batch = NULL; + intel_batchbuffer_free(intel); free(intel->prim.vb); intel->prim.vb = NULL; drm_intel_bo_unreference(intel->prim.vb_bo); intel->prim.vb_bo = NULL; - drm_intel_bo_unreference(intel->first_post_swapbuffers_batch); - intel->first_post_swapbuffers_batch = NULL; driDestroyOptionCache(&intel->optionCache); diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index 96493c0f2b..772b2fba5a 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -29,10 +29,9 @@ #define INTELCONTEXT_INC - +#include <stdbool.h> #include "main/mtypes.h" #include "main/mm.h" -#include "dri_metaops.h" #ifdef __cplusplus extern "C" { @@ -149,10 +148,9 @@ struct intel_context void (*assert_not_dirty) (struct intel_context *intel); void (*debug_batch)(struct intel_context *intel); + bool (*render_target_supported)(gl_format format); } vtbl; - struct dri_metaops meta; - GLbitfield Fallback; /**< mask of INTEL_FALLBACK_x bits */ GLuint NewGLState; @@ -171,17 +169,28 @@ struct intel_context int urb_size; - struct intel_batchbuffer *batch; - drm_intel_bo *first_post_swapbuffers_batch; + struct intel_batchbuffer { + drm_intel_bo *bo; + struct cached_batch_item *cached_items; + + uint16_t emit, total; + uint16_t used, reserved_space; + uint32_t map[8192]; +#define BATCH_SZ (8192*sizeof(uint32_t)) + + uint32_t state_batch_offset; + bool is_blit; + } batch; + GLboolean need_throttle; GLboolean no_batch_wrap; struct { GLuint id; + uint32_t start_ptr; /**< for i8xx */ uint32_t primitive; /**< Current hardware primitive type */ void (*flush) (struct intel_context *); - GLubyte *start_ptr; /**< for i8xx */ drm_intel_bo *vb_bo; uint8_t *vb; unsigned int start_offset; /**< Byte offset of primitive sequence */ @@ -189,6 +198,14 @@ struct intel_context unsigned int count; /**< Number of vertices in current primitive */ } prim; + struct { + drm_intel_bo *bo; + GLuint offset; + uint32_t buffer_len; + uint32_t buffer_offset; + char buffer[4096]; + } upload; + GLuint stats_wm; /* Offsets of fields within the current vertex: diff --git a/src/mesa/drivers/dri/intel/intel_decode.c b/src/mesa/drivers/dri/intel/intel_decode.c index 25b4131594..688b8fee64 100644 --- a/src/mesa/drivers/dri/intel/intel_decode.c +++ b/src/mesa/drivers/dri/intel/intel_decode.c @@ -1601,10 +1601,12 @@ decode_3d_965(uint32_t *data, int count, uint32_t hw_offset, uint32_t devid, int { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, + { 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" }, { 0x7b00, 6, 6, "3DPRIMITIVE" }, { 0x7802, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" }, { 0x7805, 3, 3, "3DSTATE_URB" }, { 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" }, + { 0x780f, 2, 2, "3DSTATE_SCISSOR_STATE_POINTERS" }, { 0x7810, 6, 6, "3DSTATE_VS_STATE" }, { 0x7811, 7, 7, "3DSTATE_GS_STATE" }, { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" }, diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 556a4195bd..febc1d4f85 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -25,12 +25,15 @@ * **************************************************************************/ +#include "main/mfeatures.h" + #include "intel_chipset.h" #include "intel_context.h" #include "intel_extensions.h" #include "utils.h" +#define need_GL_ARB_ES2_compatibility #define need_GL_ARB_draw_elements_base_vertex #define need_GL_ARB_framebuffer_object #define need_GL_ARB_map_buffer_range @@ -78,8 +81,10 @@ * i965_dri. */ static const struct dri_extension card_extensions[] = { + { "GL_ARB_ES2_compatibility", GL_ARB_ES2_compatibility_functions }, { "GL_ARB_draw_elements_base_vertex", GL_ARB_draw_elements_base_vertex_functions }, { "GL_ARB_explicit_attrib_location", NULL }, + { "GL_ARB_framebuffer_object", GL_ARB_framebuffer_object_functions}, { "GL_ARB_half_float_pixel", NULL }, { "GL_ARB_map_buffer_range", GL_ARB_map_buffer_range_functions }, { "GL_ARB_multitexture", NULL }, @@ -161,7 +166,6 @@ static const struct dri_extension brw_extensions[] = { { "GL_ARB_fragment_program", NULL }, { "GL_ARB_fragment_program_shadow", NULL }, { "GL_ARB_fragment_shader", NULL }, - { "GL_ARB_framebuffer_object", GL_ARB_framebuffer_object_functions}, { "GL_ARB_half_float_vertex", NULL }, { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, { "GL_ARB_point_sprite", NULL }, @@ -171,9 +175,11 @@ static const struct dri_extension brw_extensions[] = { { "GL_ARB_texture_non_power_of_two", NULL }, { "GL_ARB_texture_rg", NULL }, { "GL_EXT_draw_buffers2", GL_EXT_draw_buffers2_functions }, + { "GL_EXT_framebuffer_sRGB", NULL }, { "GL_EXT_shadow_funcs", NULL }, { "GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions }, { "GL_EXT_texture_sRGB", NULL }, + { "GL_EXT_texture_sRGB_decode", NULL }, { "GL_EXT_texture_swizzle", NULL }, { "GL_EXT_vertex_array_bgra", NULL }, { "GL_ATI_envmap_bumpmap", GL_ATI_envmap_bumpmap_functions }, diff --git a/src/mesa/drivers/dri/intel/intel_extensions_es2.c b/src/mesa/drivers/dri/intel/intel_extensions_es2.c index 54b0517deb..747ddf7644 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions_es2.c +++ b/src/mesa/drivers/dri/intel/intel_extensions_es2.c @@ -26,6 +26,7 @@ **************************************************************************/ #include "main/extensions.h" +#include "main/mfeatures.h" #include "intel_extensions.h" @@ -62,6 +63,7 @@ static const char *es2_extensions[] = { "GL_EXT_blend_minmax", "GL_EXT_blend_subtract", "GL_EXT_stencil_wrap", + "GL_NV_blend_square", /* Optional GLES2 */ "GL_ARB_framebuffer_object", @@ -95,9 +97,22 @@ intelInitExtensionsES1(struct gl_context *ctx) _mesa_enable_extension(ctx, "GL_ARB_point_parameters"); _mesa_enable_extension(ctx, "GL_OES_draw_texture"); + + _mesa_enable_extension(ctx, "GL_OES_point_sprite"); } /** + * \brief Extensions to disable. + * + * These extensions must be manually disabled because they may have been + * enabled by default. + */ +static const char* es2_extensions_disabled[] = { + "GL_OES_standard_derivatives", + NULL, +}; + +/** * Initializes potential list of extensions if ctx == NULL, or actually enables * extensions for a context. */ @@ -111,4 +126,6 @@ intelInitExtensionsES2(struct gl_context *ctx) for (i = 0; es2_extensions[i]; i++) _mesa_enable_extension(ctx, es2_extensions[i]); + for (i = 0; es2_extensions_disabled[i]; i++) + _mesa_disable_extension(ctx, es2_extensions_disabled[i]); } diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 18e796a118..8b57eb19f5 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -28,11 +28,13 @@ #include "main/imports.h" #include "main/macros.h" +#include "main/mfeatures.h" #include "main/mtypes.h" #include "main/fbobject.h" #include "main/framebuffer.h" #include "main/renderbuffer.h" #include "main/context.h" +#include "main/teximage.h" #include "main/texrender.h" #include "drivers/common/meta.h" @@ -42,6 +44,11 @@ #include "intel_fbo.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" +#include "intel_tex.h" +#include "intel_span.h" +#ifndef I915 +#include "brw_context.h" +#endif #define FILE_DEBUG_FLAG DEBUG_FBO @@ -107,79 +114,27 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer ASSERT(rb->Name != 0); switch (internalFormat) { - case GL_RED: - case GL_R8: - rb->Format = MESA_FORMAT_R8; - rb->DataType = GL_UNSIGNED_BYTE; - break; - case GL_R16: - rb->Format = MESA_FORMAT_R16; - rb->DataType = GL_UNSIGNED_SHORT; - break; - case GL_RG: - case GL_RG8: - rb->Format = MESA_FORMAT_RG88; - rb->DataType = GL_UNSIGNED_BYTE; - break; - case GL_RG16: - rb->Format = MESA_FORMAT_RG1616; - rb->DataType = GL_UNSIGNED_SHORT; - break; - case GL_R3_G3_B2: - case GL_RGB4: - case GL_RGB5: - rb->Format = MESA_FORMAT_RGB565; - rb->DataType = GL_UNSIGNED_BYTE; - break; - case GL_RGB: - case GL_RGB8: - case GL_RGB10: - case GL_RGB12: - case GL_RGB16: - rb->Format = MESA_FORMAT_XRGB8888; - rb->DataType = GL_UNSIGNED_BYTE; - break; - case GL_RGBA: - case GL_RGBA2: - case GL_RGBA4: - case GL_RGB5_A1: - case GL_RGBA8: - case GL_RGB10_A2: - case GL_RGBA12: - case GL_RGBA16: - rb->Format = MESA_FORMAT_ARGB8888; - rb->DataType = GL_UNSIGNED_BYTE; - break; - case GL_ALPHA: - case GL_ALPHA8: - rb->Format = MESA_FORMAT_A8; - rb->DataType = GL_UNSIGNED_BYTE; - break; - case GL_DEPTH_COMPONENT16: - rb->Format = MESA_FORMAT_Z16; - rb->DataType = GL_UNSIGNED_SHORT; + default: + /* Use the same format-choice logic as for textures. + * Renderbuffers aren't any different from textures for us, + * except they're less useful because you can't texture with + * them. + */ + rb->Format = intel->ctx.Driver.ChooseTextureFormat(ctx, internalFormat, + GL_NONE, GL_NONE); break; case GL_STENCIL_INDEX: case GL_STENCIL_INDEX1_EXT: case GL_STENCIL_INDEX4_EXT: case GL_STENCIL_INDEX8_EXT: case GL_STENCIL_INDEX16_EXT: - case GL_DEPTH_COMPONENT: - case GL_DEPTH_COMPONENT24: - case GL_DEPTH_COMPONENT32: - case GL_DEPTH_STENCIL_EXT: - case GL_DEPTH24_STENCIL8_EXT: - /* alloc a depth+stencil buffer */ + /* These aren't actual texture formats, so force them here. */ rb->Format = MESA_FORMAT_S8_Z24; - rb->DataType = GL_UNSIGNED_INT_24_8_EXT; break; - default: - _mesa_problem(ctx, - "Unexpected format in intel_alloc_renderbuffer_storage"); - return GL_FALSE; } rb->_BaseFormat = _mesa_base_fbo_format(ctx, internalFormat); + rb->DataType = intel_mesa_format_to_rb_datatype(rb->Format); cpp = _mesa_get_format_bytes(rb->Format); intel_flush(ctx); @@ -195,10 +150,15 @@ intel_alloc_renderbuffer_storage(struct gl_context * ctx, struct gl_renderbuffer DBG("Allocating %d x %d Intel RBO\n", width, height); tiling = I915_TILING_NONE; + if (intel->use_texture_tiling) { + GLenum base_format = _mesa_get_format_base_format(rb->Format); - /* Gen6 requires depth must be tiling */ - if (intel->gen >= 6 && rb->Format == MESA_FORMAT_S8_Z24) - tiling = I915_TILING_Y; + if (intel->gen >= 4 && (base_format == GL_DEPTH_COMPONENT || + base_format == GL_DEPTH_STENCIL)) + tiling = I915_TILING_Y; + else + tiling = I915_TILING_X; + } irb->region = intel_region_alloc(intel->intelScreen, tiling, cpp, width, height, GL_TRUE); @@ -334,53 +294,10 @@ intel_create_renderbuffer(gl_format format) _mesa_init_renderbuffer(&irb->Base, 0); irb->Base.ClassID = INTEL_RB_CLASS; - - switch (format) { - case MESA_FORMAT_RGB565: - irb->Base._BaseFormat = GL_RGB; - irb->Base.DataType = GL_UNSIGNED_BYTE; - break; - case MESA_FORMAT_XRGB8888: - irb->Base._BaseFormat = GL_RGB; - irb->Base.DataType = GL_UNSIGNED_BYTE; - break; - case MESA_FORMAT_ARGB8888: - irb->Base._BaseFormat = GL_RGBA; - irb->Base.DataType = GL_UNSIGNED_BYTE; - break; - case MESA_FORMAT_Z16: - irb->Base._BaseFormat = GL_DEPTH_COMPONENT; - irb->Base.DataType = GL_UNSIGNED_SHORT; - break; - case MESA_FORMAT_X8_Z24: - irb->Base._BaseFormat = GL_DEPTH_COMPONENT; - irb->Base.DataType = GL_UNSIGNED_INT; - break; - case MESA_FORMAT_S8_Z24: - irb->Base._BaseFormat = GL_DEPTH_STENCIL; - irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT; - break; - case MESA_FORMAT_A8: - irb->Base._BaseFormat = GL_ALPHA; - irb->Base.DataType = GL_UNSIGNED_BYTE; - break; - case MESA_FORMAT_R8: - irb->Base._BaseFormat = GL_RED; - irb->Base.DataType = GL_UNSIGNED_BYTE; - break; - case MESA_FORMAT_RG88: - irb->Base._BaseFormat = GL_RG; - irb->Base.DataType = GL_UNSIGNED_BYTE; - break; - default: - _mesa_problem(NULL, - "Unexpected intFormat in intel_create_renderbuffer"); - free(irb); - return NULL; - } - + irb->Base._BaseFormat = _mesa_get_format_base_format(format); irb->Base.Format = format; irb->Base.InternalFormat = irb->Base._BaseFormat; + irb->Base.DataType = intel_mesa_format_to_rb_datatype(format); /* intel-specific methods */ irb->Base.Delete = intel_delete_renderbuffer; @@ -457,70 +374,16 @@ static GLboolean intel_update_wrapper(struct gl_context *ctx, struct intel_renderbuffer *irb, struct gl_texture_image *texImage) { - if (texImage->TexFormat == MESA_FORMAT_ARGB8888) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to RGBA8 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_XRGB8888) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to XGBA8 texture OK\n"); - } -#ifndef I915 - else if (texImage->TexFormat == MESA_FORMAT_SARGB8) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to SARGB8 texture OK\n"); - } -#endif - else if (texImage->TexFormat == MESA_FORMAT_RGB565) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to RGB5 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_ARGB1555) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to ARGB1555 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_ARGB4444) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to ARGB4444 texture OK\n"); - } -#ifndef I915 - else if (texImage->TexFormat == MESA_FORMAT_A8) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to A8 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_R8) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to R8 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_RG88) { - irb->Base.DataType = GL_UNSIGNED_BYTE; - DBG("Render to RG88 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_R16) { - irb->Base.DataType = GL_UNSIGNED_SHORT; - DBG("Render to R8 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_RG1616) { - irb->Base.DataType = GL_UNSIGNED_SHORT; - DBG("Render to RG88 texture OK\n"); - } -#endif - else if (texImage->TexFormat == MESA_FORMAT_Z16) { - irb->Base.DataType = GL_UNSIGNED_SHORT; - DBG("Render to DEPTH16 texture OK\n"); - } - else if (texImage->TexFormat == MESA_FORMAT_S8_Z24) { - irb->Base.DataType = GL_UNSIGNED_INT_24_8_EXT; - DBG("Render to DEPTH_STENCIL texture OK\n"); - } - else { + if (!intel_span_supports_format(texImage->TexFormat)) { DBG("Render to texture BAD FORMAT %s\n", _mesa_get_format_name(texImage->TexFormat)); return GL_FALSE; + } else { + DBG("Render to texture %s\n", _mesa_get_format_name(texImage->TexFormat)); } irb->Base.Format = texImage->TexFormat; - + irb->Base.DataType = intel_mesa_format_to_rb_datatype(texImage->TexFormat); irb->Base.InternalFormat = texImage->InternalFormat; irb->Base._BaseFormat = _mesa_base_fbo_format(ctx, irb->Base.InternalFormat); irb->Base.Width = texImage->Width; @@ -562,6 +425,24 @@ intel_wrap_texture(struct gl_context * ctx, struct gl_texture_image *texImage) return irb; } +static void +intel_set_draw_offset_for_image(struct intel_texture_image *intel_image, + int zoffset) +{ + struct intel_mipmap_tree *mt = intel_image->mt; + unsigned int dst_x, dst_y; + + /* compute offset of the particular 2D image within the texture region */ + intel_miptree_get_image_offset(intel_image->mt, + intel_image->level, + intel_image->face, + zoffset, + &dst_x, &dst_y); + + mt->region->draw_offset = (dst_y * mt->region->pitch + dst_x) * mt->cpp; + mt->region->draw_x = dst_x; + mt->region->draw_y = dst_y; +} /** * Called by glFramebufferTexture[123]DEXT() (and other places) to @@ -578,7 +459,6 @@ intel_render_texture(struct gl_context * ctx, = att->Texture->Image[att->CubeMapFace][att->TextureLevel]; struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer); struct intel_texture_image *intel_image; - GLuint dst_x, dst_y; (void) fb; @@ -624,19 +504,53 @@ intel_render_texture(struct gl_context * ctx, intel_region_reference(&irb->region, intel_image->mt->region); } - /* compute offset of the particular 2D image within the texture region */ - intel_miptree_get_image_offset(intel_image->mt, - att->TextureLevel, - att->CubeMapFace, - att->Zoffset, - &dst_x, &dst_y); - - intel_image->mt->region->draw_offset = (dst_y * intel_image->mt->region->pitch + - dst_x) * intel_image->mt->cpp; - intel_image->mt->region->draw_x = dst_x; - intel_image->mt->region->draw_y = dst_y; + intel_set_draw_offset_for_image(intel_image, att->Zoffset); intel_image->used_as_render_target = GL_TRUE; +#ifndef I915 + if (!brw_context(ctx)->has_surface_tile_offset && + (intel_image->mt->region->draw_offset & 4095) != 0) { + /* Original gen4 hardware couldn't draw to a non-tile-aligned + * destination in a miptree unless you actually setup your + * renderbuffer as a miptree and used the fragile + * lod/array_index/etc. controls to select the image. So, + * instead, we just make a new single-level miptree and render + * into that. + */ + struct intel_context *intel = intel_context(ctx); + struct intel_mipmap_tree *old_mt = intel_image->mt; + struct intel_mipmap_tree *new_mt; + int comp_byte = 0, texel_bytes; + + if (_mesa_is_format_compressed(intel_image->base.TexFormat)) + comp_byte = intel_compressed_num_bytes(intel_image->base.TexFormat); + + texel_bytes = _mesa_get_format_bytes(intel_image->base.TexFormat); + + new_mt = intel_miptree_create(intel, newImage->TexObject->Target, + intel_image->base._BaseFormat, + intel_image->base.InternalFormat, + intel_image->level, + intel_image->level, + intel_image->base.Width, + intel_image->base.Height, + intel_image->base.Depth, + texel_bytes, comp_byte, GL_TRUE); + + intel_miptree_image_copy(intel, + new_mt, + intel_image->face, + intel_image->level, + old_mt); + + intel_miptree_release(intel, &intel_image->mt); + intel_image->mt = new_mt; + intel_set_draw_offset_for_image(intel_image, att->Zoffset); + + intel_region_release(&irb->region); + intel_region_reference(&irb->region, intel_image->mt->region); + } +#endif /* update drawing region, etc */ intel_draw_buffer(ctx, fb); } @@ -659,14 +573,15 @@ intel_finish_render_texture(struct gl_context * ctx, _glthread_GetID(), att->Texture->Name); /* Flag that this image may now be validated into the object's miptree. */ - intel_image->used_as_render_target = GL_FALSE; + if (intel_image) + intel_image->used_as_render_target = GL_FALSE; /* Since we've (probably) rendered to the texture and will (likely) use * it in the texture domain later on in this batchbuffer, flush the * batch. Once again, we wish for a domain tracker in libdrm to cover * usage inside of a batchbuffer like GEM does in the kernel. */ - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel); } /** @@ -675,6 +590,7 @@ intel_finish_render_texture(struct gl_context * ctx, static void intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) { + struct intel_context *intel = intel_context(ctx); const struct intel_renderbuffer *depthRb = intel_get_renderbuffer(fb, BUFFER_DEPTH); const struct intel_renderbuffer *stencilRb = @@ -682,10 +598,10 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) int i; if (depthRb && stencilRb && stencilRb != depthRb) { - if (ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE && - ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE && - (ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Texture->Name == - ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Texture->Name)) { + if (fb->Attachment[BUFFER_DEPTH].Type == GL_TEXTURE && + fb->Attachment[BUFFER_STENCIL].Type == GL_TEXTURE && + (fb->Attachment[BUFFER_DEPTH].Texture->Name == + fb->Attachment[BUFFER_STENCIL].Texture->Name)) { /* OK */ } else { /* we only support combined depth/stencil buffers, not separate @@ -698,40 +614,118 @@ intel_validate_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb) } } - for (i = 0; i < ctx->Const.MaxDrawBuffers; i++) { - struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; - struct intel_renderbuffer *irb = intel_renderbuffer(rb); + for (i = 0; i < Elements(fb->Attachment); i++) { + struct gl_renderbuffer *rb; + struct intel_renderbuffer *irb; - if (rb == NULL) + if (fb->Attachment[i].Type == GL_NONE) continue; + /* A supported attachment will have a Renderbuffer set either + * from being a Renderbuffer or being a texture that got the + * intel_wrap_texture() treatment. + */ + rb = fb->Attachment[i].Renderbuffer; + if (rb == NULL) { + DBG("attachment without renderbuffer\n"); + fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; + continue; + } + + irb = intel_renderbuffer(rb); if (irb == NULL) { DBG("software rendering renderbuffer\n"); fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; continue; } - switch (irb->Base.Format) { - case MESA_FORMAT_ARGB8888: - case MESA_FORMAT_XRGB8888: - case MESA_FORMAT_RGB565: - case MESA_FORMAT_ARGB1555: - case MESA_FORMAT_ARGB4444: -#ifndef I915 - case MESA_FORMAT_SARGB8: - case MESA_FORMAT_A8: - case MESA_FORMAT_R8: - case MESA_FORMAT_R16: - case MESA_FORMAT_RG88: - case MESA_FORMAT_RG1616: -#endif - break; - default: + if (!intel_span_supports_format(irb->Base.Format) || + !intel->vtbl.render_target_supported(irb->Base.Format)) { + DBG("Unsupported texture/renderbuffer format attached: %s\n", + _mesa_get_format_name(irb->Base.Format)); fb->_Status = GL_FRAMEBUFFER_UNSUPPORTED_EXT; } } } +/** + * Try to do a glBlitFramebuffer using glCopyTexSubImage2D + * We can do this when the dst renderbuffer is actually a texture and + * there is no scaling, mirroring or scissoring. + * + * \return new buffer mask indicating the buffers left to blit using the + * normal path. + */ +static GLbitfield +intel_blit_framebuffer_copy_tex_sub_image(struct gl_context *ctx, + GLint srcX0, GLint srcY0, + GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, + GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ + if (mask & GL_COLOR_BUFFER_BIT) { + const struct gl_framebuffer *drawFb = ctx->DrawBuffer; + const struct gl_framebuffer *readFb = ctx->ReadBuffer; + const struct gl_renderbuffer_attachment *drawAtt = + &drawFb->Attachment[drawFb->_ColorDrawBufferIndexes[0]]; + + /* If the source and destination are the same size with no + mirroring, the rectangles are within the size of the + texture and there is no scissor then we can use + glCopyTexSubimage2D to implement the blit. This will end + up as a fast hardware blit on some drivers */ + if (drawAtt && drawAtt->Texture && + srcX0 - srcX1 == dstX0 - dstX1 && + srcY0 - srcY1 == dstY0 - dstY1 && + srcX1 >= srcX0 && + srcY1 >= srcY0 && + srcX0 >= 0 && srcX1 <= readFb->Width && + srcY0 >= 0 && srcY1 <= readFb->Height && + dstX0 >= 0 && dstX1 <= drawFb->Width && + dstY0 >= 0 && dstY1 <= drawFb->Height && + !ctx->Scissor.Enabled) { + const struct gl_texture_object *texObj = drawAtt->Texture; + const GLuint dstLevel = drawAtt->TextureLevel; + const GLenum target = texObj->Target; + + struct gl_texture_image *texImage = + _mesa_select_tex_image(ctx, texObj, target, dstLevel); + GLenum internalFormat = texImage->InternalFormat; + + if (intel_copy_texsubimage(intel_context(ctx), target, + intel_texture_image(texImage), + internalFormat, + dstX0, dstY0, + srcX0, srcY0, + srcX1 - srcX0, /* width */ + srcY1 - srcY0)) + mask &= ~GL_COLOR_BUFFER_BIT; + } + } + + return mask; +} + +static void +intel_blit_framebuffer(struct gl_context *ctx, + GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, + GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, + GLbitfield mask, GLenum filter) +{ + /* Try faster, glCopyTexSubImage2D approach first which uses the BLT. */ + mask = intel_blit_framebuffer_copy_tex_sub_image(ctx, + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, filter); + if (mask == 0x0) + return; + + _mesa_meta_BlitFramebuffer(ctx, + srcX0, srcY0, srcX1, srcY1, + dstX0, dstY0, dstX1, dstY1, + mask, filter); +} /** * Do one-time context initializations related to GL_EXT_framebuffer_object. @@ -748,7 +742,7 @@ intel_fbo_init(struct intel_context *intel) intel->ctx.Driver.FinishRenderTexture = intel_finish_render_texture; intel->ctx.Driver.ResizeBuffers = intel_resize_buffers; intel->ctx.Driver.ValidateFramebuffer = intel_validate_framebuffer; - intel->ctx.Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer; + intel->ctx.Driver.BlitFramebuffer = intel_blit_framebuffer; #if FEATURE_OES_EGL_image intel->ctx.Driver.EGLImageTargetRenderbufferStorage = diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c index 9c4e5c5ee8..a3409274fb 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c @@ -157,8 +157,6 @@ struct intel_mipmap_tree * intel_miptree_create_for_region(struct intel_context *intel, GLenum target, GLenum internal_format, - GLuint first_level, - GLuint last_level, struct intel_region *region, GLuint depth0, GLuint compress_byte) @@ -166,7 +164,7 @@ intel_miptree_create_for_region(struct intel_context *intel, struct intel_mipmap_tree *mt; mt = intel_miptree_create_internal(intel, target, internal_format, - first_level, last_level, + 0, 0, region->width, region->height, 1, region->cpp, compress_byte, I915_TILING_NONE); diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h index 21db2f4d3b..760a8bce60 100644 --- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.h @@ -137,8 +137,6 @@ struct intel_mipmap_tree * intel_miptree_create_for_region(struct intel_context *intel, GLenum target, GLenum internal_format, - GLuint first_level, - GLuint last_level, struct intel_region *region, GLuint depth0, GLuint compress_byte); diff --git a/src/mesa/drivers/dri/intel/intel_pixel.c b/src/mesa/drivers/dri/intel/intel_pixel.c index d5c35775ce..f97256e59b 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel.c +++ b/src/mesa/drivers/dri/intel/intel_pixel.c @@ -66,12 +66,12 @@ intel_check_blit_fragment_ops(struct gl_context * ctx, GLboolean src_alpha_is_on } if (ctx->Color.BlendEnabled && - (effective_func(ctx->Color.BlendSrcRGB, src_alpha_is_one) != GL_ONE || - effective_func(ctx->Color.BlendDstRGB, src_alpha_is_one) != GL_ZERO || - ctx->Color.BlendEquationRGB != GL_FUNC_ADD || - effective_func(ctx->Color.BlendSrcA, src_alpha_is_one) != GL_ONE || - effective_func(ctx->Color.BlendDstA, src_alpha_is_one) != GL_ZERO || - ctx->Color.BlendEquationA != GL_FUNC_ADD)) { + (effective_func(ctx->Color.Blend[0].SrcRGB, src_alpha_is_one) != GL_ONE || + effective_func(ctx->Color.Blend[0].DstRGB, src_alpha_is_one) != GL_ZERO || + ctx->Color.Blend[0].EquationRGB != GL_FUNC_ADD || + effective_func(ctx->Color.Blend[0].SrcA, src_alpha_is_one) != GL_ONE || + effective_func(ctx->Color.Blend[0].DstA, src_alpha_is_one) != GL_ZERO || + ctx->Color.Blend[0].EquationA != GL_FUNC_ADD)) { DBG("fallback due to blend\n"); return GL_FALSE; } diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index e7356a6da0..43cdd0d2ba 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -31,6 +31,7 @@ #include "main/colormac.h" #include "main/mtypes.h" #include "main/macros.h" +#include "main/pbo.h" #include "main/bufferobj.h" #include "main/state.h" #include "main/texobj.h" @@ -207,7 +208,7 @@ do_blit_bitmap( struct gl_context *ctx, COPY_4V(tmpColor, ctx->Current.RasterColor); - if (NEED_SECONDARY_COLOR(ctx)) { + if (_mesa_need_secondary_color(ctx)) { ADD_3V(tmpColor, tmpColor, ctx->Current.RasterSecondaryColor); } @@ -285,7 +286,7 @@ do_blit_bitmap( struct gl_context *ctx, out: if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) - intel_batchbuffer_flush(intel->batch); + intel_batchbuffer_flush(intel); if (_mesa_is_bufferobj(unpack->BufferObj)) { /* done with PBO so unmap it now */ diff --git a/src/mesa/drivers/dri/intel/intel_pixel_read.c b/src/mesa/drivers/dri/intel/intel_pixel_read.c index 54da29236d..b2e77c7986 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_read.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_read.c @@ -80,6 +80,7 @@ do_blit_readpixels(struct gl_context * ctx, drm_intel_bo *dst_buffer; GLboolean all; GLint dst_x, dst_y; + GLuint dirty; DBG("%s\n", __FUNCTION__); @@ -129,7 +130,9 @@ do_blit_readpixels(struct gl_context * ctx, return GL_TRUE; } + dirty = intel->front_buffer_dirty; intel_prepare_render(intel); + intel->front_buffer_dirty = dirty; all = (width * height * src->cpp == dst->Base.Size && x == 0 && dst_offset == 0); @@ -138,8 +141,8 @@ do_blit_readpixels(struct gl_context * ctx, dst_y = 0; dst_buffer = intel_bufferobj_buffer(intel, dst, - all ? INTEL_WRITE_FULL : - INTEL_WRITE_PART); + all ? INTEL_WRITE_FULL : + INTEL_WRITE_PART); if (ctx->ReadBuffer->Name == 0) y = ctx->ReadBuffer->Height - (y + height); @@ -171,6 +174,10 @@ intelReadPixels(struct gl_context * ctx, DBG("%s\n", __FUNCTION__); + if (do_blit_readpixels + (ctx, x, y, width, height, format, type, pack, pixels)) + return; + intel_flush(ctx); /* glReadPixels() wont dirty the front buffer, so reset the dirty @@ -179,10 +186,6 @@ intelReadPixels(struct gl_context * ctx, intel_prepare_render(intel); intel->front_buffer_dirty = dirty; - if (do_blit_readpixels - (ctx, x, y, width, height, format, type, pack, pixels)) - return; - fallback_debug("%s: fallback to swrast\n", __FUNCTION__); /* Update Mesa state before calling down into _swrast_ReadPixels, as diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index 955b100b21..5258699d3f 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -37,6 +37,8 @@ #define FLUSH_MAP_CACHE (1 << 0) #define INHIBIT_FLUSH_RENDER_CACHE (1 << 2) +#define MI_FLUSH_DW (CMD_MI | (0x26 << 23) | 2) + /* Stalls command execution waiting for the given events to have occurred. */ #define MI_WAIT_FOR_EVENT (CMD_MI | (0x3 << 23)) #define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index e87e29462c..a4da1ce4fa 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -149,11 +149,6 @@ intel_region_alloc_internal(struct intel_screen *screen, { struct intel_region *region; - if (buffer == NULL) { - _DBG("%s <-- NULL\n", __FUNCTION__); - return NULL; - } - region = calloc(sizeof(*region), 1); if (region == NULL) return region; @@ -180,6 +175,7 @@ intel_region_alloc(struct intel_screen *screen, drm_intel_bo *buffer; unsigned long flags = 0; unsigned long aligned_pitch; + struct intel_region *region; if (expect_accelerated_upload) flags |= BO_ALLOC_FOR_RENDER; @@ -187,9 +183,17 @@ intel_region_alloc(struct intel_screen *screen, buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "region", width, height, cpp, &tiling, &aligned_pitch, flags); + if (buffer == NULL) + return NULL; - return intel_region_alloc_internal(screen, cpp, width, height, - aligned_pitch / cpp, tiling, buffer); + region = intel_region_alloc_internal(screen, cpp, width, height, + aligned_pitch / cpp, tiling, buffer); + if (region == NULL) { + drm_intel_bo_unreference(buffer); + return NULL; + } + + return region; } GLboolean @@ -491,7 +495,7 @@ intel_region_cow(struct intel_context *intel, struct intel_region *region) assert(region->cpp * region->pitch * region->height == pbo->Base.Size); - _DBG("%s %p (%d bytes)\n", __FUNCTION__, region, pbo->Base.Size); + _DBG("%s %p (%d bytes)\n", __FUNCTION__, region, (int)pbo->Base.Size); /* Now blit from the texture buffer to the new buffer: */ diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index e5b6c9f0e4..c8cff0147e 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -25,29 +25,18 @@ * **************************************************************************/ +#include <errno.h> #include "main/glheader.h" #include "main/context.h" #include "main/framebuffer.h" #include "main/renderbuffer.h" #include "main/hash.h" #include "main/fbobject.h" +#include "main/mfeatures.h" #include "utils.h" #include "xmlpool.h" -#include "intel_batchbuffer.h" -#include "intel_buffers.h" -#include "intel_bufmgr.h" -#include "intel_chipset.h" -#include "intel_fbo.h" -#include "intel_screen.h" -#include "intel_tex.h" -#include "intel_regions.h" - -#include "i915_drm.h" - -#define DRI_CONF_TEXTURE_TILING(def) \ - PUBLIC const char __driConfigOptions[] = DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE @@ -92,6 +81,17 @@ DRI_CONF_END; const GLuint __driNConfigOptions = 11; +#include "intel_batchbuffer.h" +#include "intel_buffers.h" +#include "intel_bufmgr.h" +#include "intel_chipset.h" +#include "intel_fbo.h" +#include "intel_screen.h" +#include "intel_tex.h" +#include "intel_regions.h" + +#include "i915_drm.h" + #ifdef USE_NEW_INTERFACE static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; #endif /*USE_NEW_INTERFACE */ @@ -105,15 +105,16 @@ static const __DRItexBufferExtension intelTexBufferExtension = { static void intelDRI2Flush(__DRIdrawable *drawable) { - struct intel_context *intel = drawable->driContextPriv->driverPrivate; + GET_CURRENT_CONTEXT(ctx); + struct intel_context *intel = intel_context(ctx); if (intel->gen < 4) INTEL_FIREVERTICES(intel); intel->need_throttle = GL_TRUE; - if (intel->batch->map != intel->batch->ptr) - intel_batchbuffer_flush(intel->batch); + if (intel->batch.used) + intel_batchbuffer_flush(intel); } static const struct __DRI2flushExtensionRec intelFlushExtension = { @@ -312,13 +313,21 @@ intel_get_param(__DRIscreen *psp, int param, int *value) ret = drmCommandWriteRead(psp->fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); if (ret) { - _mesa_warning(NULL, "drm_i915_getparam: %d", ret); + if (ret != -EINVAL) + _mesa_warning(NULL, "drm_i915_getparam: %d", ret); return GL_FALSE; } return GL_TRUE; } +static GLboolean +intel_get_boolean(__DRIscreen *psp, int param) +{ + int value = 0; + return intel_get_param(psp, param, &value) && value; +} + static void nop_callback(GLuint key, void *data, void *userData) { @@ -492,6 +501,10 @@ intel_init_bufmgr(struct intel_screen *intelScreen) intelScreen->named_regions = _mesa_NewHashTable(); + intelScreen->relaxed_relocations = 0; + intelScreen->relaxed_relocations |= + intel_get_boolean(spriv, I915_PARAM_HAS_RELAXED_DELTA) << 0; + return GL_TRUE; } @@ -645,6 +658,51 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) return (const __DRIconfig **)configs; } +struct intel_buffer { + __DRIbuffer base; + struct intel_region *region; +}; + +static __DRIbuffer * +intelAllocateBuffer(__DRIscreen *screen, + unsigned attachment, unsigned format, + int width, int height) +{ + struct intel_buffer *intelBuffer; + struct intel_screen *intelScreen = screen->private; + + intelBuffer = CALLOC(sizeof *intelBuffer); + if (intelBuffer == NULL) + return NULL; + + intelBuffer->region = intel_region_alloc(intelScreen, I915_TILING_NONE, + format / 8, width, height, GL_TRUE); + + if (intelBuffer->region == NULL) { + FREE(intelBuffer); + return NULL; + } + + intel_region_flink(intelBuffer->region, &intelBuffer->base.name); + + intelBuffer->base.attachment = attachment; + intelBuffer->base.cpp = intelBuffer->region->cpp; + intelBuffer->base.pitch = + intelBuffer->region->pitch * intelBuffer->region->cpp; + + return &intelBuffer->base; +} + +static void +intelReleaseBuffer(__DRIscreen *screen, __DRIbuffer *buffer) +{ + struct intel_buffer *intelBuffer = (struct intel_buffer *) buffer; + + intel_region_release(&intelBuffer->region); + free(intelBuffer); +} + + const struct __DriverAPIRec driDriverAPI = { .DestroyScreen = intelDestroyScreen, .CreateContext = intelCreateContext, @@ -654,6 +712,8 @@ const struct __DriverAPIRec driDriverAPI = { .MakeCurrent = intelMakeCurrent, .UnbindContext = intelUnbindContext, .InitScreen2 = intelInitScreen2, + .AllocateBuffer = intelAllocateBuffer, + .ReleaseBuffer = intelReleaseBuffer }; /* This is the table of extensions that the loader will dlsym() for. */ diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index 5863093f00..0f0b5be56d 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -43,6 +43,7 @@ struct intel_screen __DRIscreen *driScrnPriv; GLboolean no_hw; + GLuint relaxed_relocations; GLboolean no_vbo; dri_bufmgr *bufmgr; diff --git a/src/mesa/drivers/dri/intel/intel_span.c b/src/mesa/drivers/dri/intel/intel_span.c index 104cadf0f9..1f41518535 100644 --- a/src/mesa/drivers/dri/intel/intel_span.c +++ b/src/mesa/drivers/dri/intel/intel_span.c @@ -25,6 +25,7 @@ * **************************************************************************/ +#include <stdbool.h> #include "main/glheader.h" #include "main/macros.h" #include "main/mtypes.h" @@ -113,6 +114,26 @@ intel_set_span_functions(struct intel_context *intel, #define TAG2(x,y) intel_##x##y##_A8 #include "spantmp2.h" +#define SPANTMP_MESA_FMT MESA_FORMAT_R8 +#define TAG(x) intel_##x##_R8 +#define TAG2(x,y) intel_##x##y##_R8 +#include "spantmp2.h" + +#define SPANTMP_MESA_FMT MESA_FORMAT_RG88 +#define TAG(x) intel_##x##_RG88 +#define TAG2(x,y) intel_##x##y##_RG88 +#include "spantmp2.h" + +#define SPANTMP_MESA_FMT MESA_FORMAT_R16 +#define TAG(x) intel_##x##_R16 +#define TAG2(x,y) intel_##x##y##_R16 +#include "spantmp2.h" + +#define SPANTMP_MESA_FMT MESA_FORMAT_RG1616 +#define TAG(x) intel_##x##_RG1616 +#define TAG2(x,y) intel_##x##y##_RG1616 +#include "spantmp2.h" + #define LOCAL_DEPTH_VARS \ struct intel_renderbuffer *irb = intel_renderbuffer(rb); \ const GLint yScale = rb->Name ? 1 : -1; \ @@ -339,6 +360,32 @@ intel_unmap_vertex_shader_textures(struct gl_context *ctx) } } +typedef void (*span_init_func)(struct gl_renderbuffer *rb); + +static span_init_func intel_span_init_funcs[MESA_FORMAT_COUNT] = +{ + [MESA_FORMAT_A8] = intel_InitPointers_A8, + [MESA_FORMAT_RGB565] = intel_InitPointers_RGB565, + [MESA_FORMAT_ARGB4444] = intel_InitPointers_ARGB4444, + [MESA_FORMAT_ARGB1555] = intel_InitPointers_ARGB1555, + [MESA_FORMAT_XRGB8888] = intel_InitPointers_xRGB8888, + [MESA_FORMAT_ARGB8888] = intel_InitPointers_ARGB8888, + [MESA_FORMAT_SARGB8] = intel_InitPointers_ARGB8888, + [MESA_FORMAT_Z16] = intel_InitDepthPointers_z16, + [MESA_FORMAT_X8_Z24] = intel_InitDepthPointers_z24_s8, + [MESA_FORMAT_S8_Z24] = intel_InitDepthPointers_z24_s8, + [MESA_FORMAT_R8] = intel_InitPointers_R8, + [MESA_FORMAT_RG88] = intel_InitPointers_RG88, + [MESA_FORMAT_R16] = intel_InitPointers_R16, + [MESA_FORMAT_RG1616] = intel_InitPointers_RG1616, +}; + +bool +intel_span_supports_format(gl_format format) +{ + return intel_span_init_funcs[format] != NULL; +} + /** * Plug in appropriate span read/write functions for the given renderbuffer. * These are used for the software fallbacks. @@ -349,37 +396,6 @@ intel_set_span_functions(struct intel_context *intel, { struct intel_renderbuffer *irb = (struct intel_renderbuffer *) rb; - switch (irb->Base.Format) { - case MESA_FORMAT_A8: - intel_InitPointers_A8(rb); - break; - case MESA_FORMAT_RGB565: - intel_InitPointers_RGB565(rb); - break; - case MESA_FORMAT_ARGB4444: - intel_InitPointers_ARGB4444(rb); - break; - case MESA_FORMAT_ARGB1555: - intel_InitPointers_ARGB1555(rb); - break; - case MESA_FORMAT_XRGB8888: - intel_InitPointers_xRGB8888(rb); - break; - case MESA_FORMAT_ARGB8888: - case MESA_FORMAT_SARGB8: - intel_InitPointers_ARGB8888(rb); - break; - case MESA_FORMAT_Z16: - intel_InitDepthPointers_z16(rb); - break; - case MESA_FORMAT_X8_Z24: - case MESA_FORMAT_S8_Z24: - intel_InitDepthPointers_z24_s8(rb); - break; - default: - _mesa_problem(NULL, - "Unexpected MesaFormat %d in intelSetSpanFunctions", - irb->Base.Format); - break; - } + assert(intel_span_init_funcs[irb->Base.Format]); + intel_span_init_funcs[irb->Base.Format](rb); } diff --git a/src/mesa/drivers/dri/intel/intel_span.h b/src/mesa/drivers/dri/intel/intel_span.h index aa8d08e843..5a4c4e8e52 100644 --- a/src/mesa/drivers/dri/intel/intel_span.h +++ b/src/mesa/drivers/dri/intel/intel_span.h @@ -28,6 +28,9 @@ #ifndef _INTEL_SPAN_H #define _INTEL_SPAN_H +#include "main/formats.h" +#include <stdbool.h> + extern void intelInitSpanFuncs(struct gl_context * ctx); extern void intelSpanRenderFinish(struct gl_context * ctx); @@ -38,5 +41,6 @@ void intel_renderbuffer_unmap(struct intel_context *intel, struct gl_renderbuffer *rb); void intel_map_vertex_shader_textures(struct gl_context *ctx); void intel_unmap_vertex_shader_textures(struct gl_context *ctx); +bool intel_span_supports_format(gl_format format); #endif diff --git a/src/mesa/drivers/dri/intel/intel_syncobj.c b/src/mesa/drivers/dri/intel/intel_syncobj.c index bbfac74b60..b303ea84dd 100644 --- a/src/mesa/drivers/dri/intel/intel_syncobj.c +++ b/src/mesa/drivers/dri/intel/intel_syncobj.c @@ -72,9 +72,9 @@ intel_fence_sync(struct gl_context *ctx, struct gl_sync_object *s, struct intel_sync_object *sync = (struct intel_sync_object *)s; assert(condition == GL_SYNC_GPU_COMMANDS_COMPLETE); - intel_batchbuffer_emit_mi_flush(intel->batch); + intel_batchbuffer_emit_mi_flush(intel); - sync->bo = intel->batch->buf; + sync->bo = intel->batch.bo; drm_intel_bo_reference(sync->bo); intel_flush(ctx); diff --git a/src/mesa/drivers/dri/intel/intel_tex.c b/src/mesa/drivers/dri/intel/intel_tex.c index 646e55bdff..077c611901 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.c +++ b/src/mesa/drivers/dri/intel/intel_tex.c @@ -113,7 +113,6 @@ intelGenerateMipmap(struct gl_context *ctx, GLenum target, void intelInitTextureFuncs(struct dd_function_table *functions) { - functions->ChooseTextureFormat = intelChooseTextureFormat; functions->GenerateMipmap = intelGenerateMipmap; functions->NewTextureObject = intelNewTextureObject; diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h index 7906554e45..52462f39d5 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.h +++ b/src/mesa/drivers/dri/intel/intel_tex.h @@ -40,8 +40,7 @@ void intelInitTextureSubImageFuncs(struct dd_function_table *functions); void intelInitTextureCopyImageFuncs(struct dd_function_table *functions); -gl_format intelChooseTextureFormat(struct gl_context *ctx, GLint internalFormat, - GLenum format, GLenum type); +GLenum intel_mesa_format_to_rb_datatype(gl_format format); void intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *pDraw); @@ -66,4 +65,12 @@ void intel_tex_unmap_images(struct intel_context *intel, int intel_compressed_num_bytes(GLuint mesaFormat); +GLboolean intel_copy_texsubimage(struct intel_context *intel, + GLenum target, + struct intel_texture_image *intelImage, + GLenum internalFormat, + GLint dstx, GLint dsty, + GLint x, GLint y, + GLsizei width, GLsizei height); + #endif diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 87b31bf078..62d4169acd 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -35,7 +35,6 @@ #include "intel_screen.h" #include "intel_context.h" -#include "intel_buffers.h" #include "intel_mipmap_tree.h" #include "intel_regions.h" #include "intel_fbo.h" @@ -50,74 +49,70 @@ * Do the best we can using the blitter. A future project is to use * the texture engine and fragment programs for these copies. */ -static const struct intel_region * -get_teximage_source(struct intel_context *intel, GLenum internalFormat) +static struct intel_renderbuffer * +get_teximage_readbuffer(struct intel_context *intel, GLenum internalFormat) { - struct intel_renderbuffer *irb; - DBG("%s %s\n", __FUNCTION__, _mesa_lookup_enum_by_nr(internalFormat)); switch (internalFormat) { case GL_DEPTH_COMPONENT: case GL_DEPTH_COMPONENT16: - irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH); - if (irb && irb->region && irb->region->cpp == 2) - return irb->region; - return NULL; case GL_DEPTH24_STENCIL8_EXT: case GL_DEPTH_STENCIL_EXT: - irb = intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH); - if (irb && irb->region && irb->region->cpp == 4) - return irb->region; - return NULL; - case 4: - case GL_RGBA: - case GL_RGBA8: - irb = intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); - /* We're required to set alpha to 1.0 in this case, but we can't - * do that with the blitter, so fall back. We could use the 3D - * engine or do two passes with the blitter, but it doesn't seem - * worth it for this case. */ - if (irb->Base._BaseFormat == GL_RGB) - return NULL; - return irb->region; - case 3: - case GL_RGB: - case GL_RGB8: - return intel_readbuf_region(intel); + return intel_get_renderbuffer(intel->ctx.ReadBuffer, BUFFER_DEPTH); default: - return NULL; + return intel_renderbuffer(intel->ctx.ReadBuffer->_ColorReadBuffer); } } -static GLboolean -do_copy_texsubimage(struct intel_context *intel, - GLenum target, - struct intel_texture_image *intelImage, - GLenum internalFormat, - GLint dstx, GLint dsty, - GLint x, GLint y, GLsizei width, GLsizei height) +GLboolean +intel_copy_texsubimage(struct intel_context *intel, + GLenum target, + struct intel_texture_image *intelImage, + GLenum internalFormat, + GLint dstx, GLint dsty, + GLint x, GLint y, GLsizei width, GLsizei height) { struct gl_context *ctx = &intel->ctx; - const struct intel_region *src = get_teximage_source(intel, internalFormat); + struct intel_renderbuffer *irb; + bool copy_supported = false; + bool copy_supported_with_alpha_override = false; + + intel_prepare_render(intel); - if (!intelImage->mt || !src || !src->buffer) { + irb = get_teximage_readbuffer(intel, internalFormat); + if (!intelImage->mt || !irb || !irb->region) { if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS)) fprintf(stderr, "%s fail %p %p (0x%08x)\n", - __FUNCTION__, intelImage->mt, src, internalFormat); + __FUNCTION__, intelImage->mt, irb, internalFormat); return GL_FALSE; } - if (intelImage->mt->cpp != src->cpp) { - fallback_debug("%s fail %d vs %d cpp\n", - __FUNCTION__, intelImage->mt->cpp, src->cpp); + copy_supported = intelImage->base.TexFormat == irb->Base.Format; + + /* Converting ARGB8888 to XRGB8888 is trivial: ignore the alpha bits */ + if (irb->Base.Format == MESA_FORMAT_ARGB8888 && + intelImage->base.TexFormat == MESA_FORMAT_XRGB8888) { + copy_supported = true; + } + + /* Converting XRGB8888 to ARGB8888 requires setting the alpha bits to 1.0 */ + if (irb->Base.Format == MESA_FORMAT_XRGB8888 && + intelImage->base.TexFormat == MESA_FORMAT_ARGB8888) { + copy_supported_with_alpha_override = true; + } + + if (!copy_supported && !copy_supported_with_alpha_override) { + if (unlikely(INTEL_DEBUG & DEBUG_FALLBACKS)) + fprintf(stderr, "%s mismatched formats %s, %s\n", + __FUNCTION__, + _mesa_get_format_name(intelImage->base.TexFormat), + _mesa_get_format_name(irb->Base.Format)); return GL_FALSE; } - /* intel_flush(ctx); */ - intel_prepare_render(intel); { drm_intel_bo *dst_bo = intel_region_buffer(intel, intelImage->mt->region, @@ -140,24 +135,24 @@ do_copy_texsubimage(struct intel_context *intel, if (ctx->ReadBuffer->Name == 0) { /* Flip vertical orientation for system framebuffers */ y = ctx->ReadBuffer->Height - (y + height); - src_pitch = -src->pitch; + src_pitch = -irb->region->pitch; } else { /* reading from a FBO, y is already oriented the way we like */ - src_pitch = src->pitch; + src_pitch = irb->region->pitch; } /* blit from src buffer to texture */ if (!intelEmitCopyBlit(intel, intelImage->mt->cpp, src_pitch, - src->buffer, + irb->region->buffer, 0, - src->tiling, + irb->region->tiling, intelImage->mt->region->pitch, dst_bo, 0, intelImage->mt->region->tiling, - src->draw_x + x, src->draw_y + y, + irb->region->draw_x + x, irb->region->draw_y + y, image_x + dstx, image_y + dsty, width, height, GL_COPY)) { @@ -165,6 +160,9 @@ do_copy_texsubimage(struct intel_context *intel, } } + if (copy_supported_with_alpha_override) + intel_set_teximage_alpha_to_one(ctx, intelImage); + return GL_TRUE; } @@ -202,9 +200,9 @@ intelCopyTexImage1D(struct gl_context * ctx, GLenum target, GLint level, &width, &height)) return; - if (!do_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, 0, 0, x, y, width, height)) + if (!intel_copy_texsubimage(intel_context(ctx), target, + intel_texture_image(texImage), + internalFormat, 0, 0, x, y, width, height)) goto fail; return; @@ -250,9 +248,9 @@ intelCopyTexImage2D(struct gl_context * ctx, GLenum target, GLint level, &width, &height)) return; - if (!do_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, 0, 0, x, y, width, height)) + if (!intel_copy_texsubimage(intel_context(ctx), target, + intel_texture_image(texImage), + internalFormat, 0, 0, x, y, width, height)) goto fail; return; @@ -280,9 +278,9 @@ intelCopyTexSubImage1D(struct gl_context * ctx, GLenum target, GLint level, /* Need to check texture is compatible with source format. */ - if (!do_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, xoffset, 0, x, y, width, 1)) { + if (!intel_copy_texsubimage(intel_context(ctx), target, + intel_texture_image(texImage), + internalFormat, xoffset, 0, x, y, width, 1)) { fallback_debug("%s - fallback to swrast\n", __FUNCTION__); _mesa_meta_CopyTexSubImage1D(ctx, target, level, xoffset, x, y, width); } @@ -304,11 +302,10 @@ intelCopyTexSubImage2D(struct gl_context * ctx, GLenum target, GLint level, /* Need to check texture is compatible with source format. */ - if (!do_copy_texsubimage(intel_context(ctx), target, - intel_texture_image(texImage), - internalFormat, - xoffset, yoffset, x, y, width, height)) { - + if (!intel_copy_texsubimage(intel_context(ctx), target, + intel_texture_image(texImage), + internalFormat, + xoffset, yoffset, x, y, width, height)) { fallback_debug("%s - fallback to swrast\n", __FUNCTION__); _mesa_meta_CopyTexSubImage2D(ctx, target, level, xoffset, yoffset, x, y, width, height); diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c index 9d73a2fb37..87745bc66d 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_format.c +++ b/src/mesa/drivers/dri/intel/intel_tex_format.c @@ -4,224 +4,35 @@ #include "main/formats.h" /** - * Choose hardware texture format given the user's glTexImage parameters. - * - * It works out that this function is fine for all the supported - * hardware. However, there is still a need to map the formats onto - * hardware descriptors. - * - * Note that the i915 can actually support many more formats than - * these if we take the step of simply swizzling the colors - * immediately after sampling... + * Returns the renderbuffer DataType for a MESA_FORMAT. */ -gl_format -intelChooseTextureFormat(struct gl_context * ctx, GLint internalFormat, - GLenum format, GLenum type) +GLenum +intel_mesa_format_to_rb_datatype(gl_format format) { - struct intel_context *intel = intel_context(ctx); - -#if 0 - printf("%s intFmt=0x%x format=0x%x type=0x%x\n", - __FUNCTION__, internalFormat, format, type); -#endif - - switch (internalFormat) { - case 4: - case GL_RGBA: - case GL_COMPRESSED_RGBA: - if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) - return MESA_FORMAT_ARGB4444; - else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) - return MESA_FORMAT_ARGB1555; - else - return MESA_FORMAT_ARGB8888; - - case 3: - case GL_RGB: - case GL_COMPRESSED_RGB: - if (type == GL_UNSIGNED_SHORT_5_6_5) - return MESA_FORMAT_RGB565; - else if (intel->has_xrgb_textures) - return MESA_FORMAT_XRGB8888; - else - return MESA_FORMAT_ARGB8888; - - case GL_RGBA8: - case GL_RGB10_A2: - case GL_RGBA12: - case GL_RGBA16: - return MESA_FORMAT_ARGB8888; - - case GL_RGBA4: - case GL_RGBA2: - return MESA_FORMAT_ARGB4444; - - case GL_RGB5_A1: - return MESA_FORMAT_ARGB1555; - - case GL_RGB8: - case GL_RGB10: - case GL_RGB12: - case GL_RGB16: - if (intel->has_xrgb_textures) - return MESA_FORMAT_XRGB8888; - else - return MESA_FORMAT_ARGB8888; - - case GL_RGB5: - case GL_RGB4: - case GL_R3_G3_B2: - return MESA_FORMAT_RGB565; - - case GL_ALPHA: - case GL_ALPHA4: - case GL_ALPHA8: - case GL_ALPHA12: - case GL_ALPHA16: - case GL_COMPRESSED_ALPHA: - return MESA_FORMAT_A8; - - case 1: - case GL_LUMINANCE: - case GL_LUMINANCE4: - case GL_LUMINANCE8: - case GL_LUMINANCE12: - case GL_LUMINANCE16: - case GL_COMPRESSED_LUMINANCE: - return MESA_FORMAT_L8; - - case GL_LUMINANCE12_ALPHA4: - case GL_LUMINANCE12_ALPHA12: - case GL_LUMINANCE16_ALPHA16: - /* i915 could implement this mode using MT_32BIT_RG1616. However, this - * would require an extra swizzle instruction in the fragment shader to - * convert the { R, G, 1.0, 1.0 } to { R, R, R, G }. - */ -#ifndef I915 - return MESA_FORMAT_AL1616; -#else - /* FALLTHROUGH */ -#endif - - case 2: - case GL_LUMINANCE_ALPHA: - case GL_LUMINANCE4_ALPHA4: - case GL_LUMINANCE6_ALPHA2: - case GL_LUMINANCE8_ALPHA8: - case GL_COMPRESSED_LUMINANCE_ALPHA: - return MESA_FORMAT_AL88; - - case GL_INTENSITY: - case GL_INTENSITY4: - case GL_INTENSITY8: - case GL_INTENSITY12: - case GL_INTENSITY16: - case GL_COMPRESSED_INTENSITY: - return MESA_FORMAT_I8; - - case GL_YCBCR_MESA: - if (type == GL_UNSIGNED_SHORT_8_8_MESA || type == GL_UNSIGNED_BYTE) - return MESA_FORMAT_YCBCR; - else - return MESA_FORMAT_YCBCR_REV; - - case GL_COMPRESSED_RGB_FXT1_3DFX: - return MESA_FORMAT_RGB_FXT1; - case GL_COMPRESSED_RGBA_FXT1_3DFX: - return MESA_FORMAT_RGBA_FXT1; - - case GL_RGB_S3TC: - case GL_RGB4_S3TC: - case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: - return MESA_FORMAT_RGB_DXT1; - - case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: - return MESA_FORMAT_RGBA_DXT1; - - case GL_RGBA_S3TC: - case GL_RGBA4_S3TC: - case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: - return MESA_FORMAT_RGBA_DXT3; - - case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: - return MESA_FORMAT_RGBA_DXT5; - - case GL_DEPTH_COMPONENT: - case GL_DEPTH_COMPONENT16: - case GL_DEPTH_COMPONENT24: - case GL_DEPTH_COMPONENT32: -#if 0 - return MESA_FORMAT_Z16; -#else - /* fall-through. - * 16bpp depth texture can't be paired with a stencil buffer so - * always used combined depth/stencil format. - */ -#endif - case GL_DEPTH_STENCIL_EXT: - case GL_DEPTH24_STENCIL8_EXT: - return MESA_FORMAT_S8_Z24; - -#ifndef I915 - case GL_SRGB_EXT: - case GL_SRGB8_EXT: - case GL_SRGB_ALPHA_EXT: - case GL_SRGB8_ALPHA8_EXT: - case GL_COMPRESSED_SRGB_EXT: - case GL_COMPRESSED_SRGB_ALPHA_EXT: - case GL_COMPRESSED_SLUMINANCE_EXT: - case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT: - return MESA_FORMAT_SARGB8; - case GL_SLUMINANCE_EXT: - case GL_SLUMINANCE8_EXT: - if (intel->has_luminance_srgb) - return MESA_FORMAT_SL8; - else - return MESA_FORMAT_SARGB8; - case GL_SLUMINANCE_ALPHA_EXT: - case GL_SLUMINANCE8_ALPHA8_EXT: - if (intel->has_luminance_srgb) - return MESA_FORMAT_SLA8; - else - return MESA_FORMAT_SARGB8; - case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: - case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: - return MESA_FORMAT_SRGB_DXT1; - - /* i915 could also do this */ - case GL_DUDV_ATI: - case GL_DU8DV8_ATI: - return MESA_FORMAT_DUDV8; - case GL_RGBA_SNORM: - case GL_RGBA8_SNORM: - return MESA_FORMAT_SIGNED_RGBA8888_REV; - - /* i915 can do a RG16, but it can't do any of the other RED or RG formats. - * In addition, it only implements the broken D3D mode where undefined - * components are read as 1.0. I'm not sure who thought reading - * { R, G, 1.0, 1.0 } from a red-green texture would be useful. - */ - case GL_RED: - case GL_R8: - return MESA_FORMAT_R8; - case GL_R16: - return MESA_FORMAT_R16; - case GL_RG: - case GL_RG8: - return MESA_FORMAT_RG88; - case GL_RG16: - return MESA_FORMAT_RG1616; -#endif - + switch (format) { + case MESA_FORMAT_ARGB8888: + case MESA_FORMAT_XRGB8888: + case MESA_FORMAT_SARGB8: + case MESA_FORMAT_R8: + case MESA_FORMAT_RG88: + case MESA_FORMAT_A8: + case MESA_FORMAT_AL88: + case MESA_FORMAT_RGB565: + case MESA_FORMAT_ARGB1555: + case MESA_FORMAT_ARGB4444: + return GL_UNSIGNED_BYTE; + case MESA_FORMAT_R16: + case MESA_FORMAT_RG1616: + case MESA_FORMAT_Z16: + return GL_UNSIGNED_SHORT; + case MESA_FORMAT_X8_Z24: + return GL_UNSIGNED_INT; + case MESA_FORMAT_S8_Z24: + return GL_UNSIGNED_INT_24_8_EXT; default: - fprintf(stderr, "unexpected texture format %s in %s\n", - _mesa_lookup_enum_by_nr(internalFormat), __FUNCTION__); - return MESA_FORMAT_NONE; + _mesa_problem(NULL, "unexpected MESA_FORMAT for renderbuffer"); + return GL_UNSIGNED_BYTE; } - - return MESA_FORMAT_NONE; /* never get here */ } int intel_compressed_num_bytes(GLuint mesaFormat) diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 41cdbfd2cb..906f8a6271 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -1,11 +1,13 @@ #include "main/glheader.h" #include "main/macros.h" +#include "main/mfeatures.h" #include "main/mtypes.h" #include "main/enums.h" #include "main/bufferobj.h" #include "main/context.h" #include "main/formats.h" +#include "main/pbo.h" #include "main/texcompress.h" #include "main/texstore.h" #include "main/texgetimage.h" @@ -55,11 +57,11 @@ logbase2(int n) * 0)..(1x1). Consider pruning this tree at a validation if the * saving is worth it. */ -static void -guess_and_alloc_mipmap_tree(struct intel_context *intel, - struct intel_texture_object *intelObj, - struct intel_texture_image *intelImage, - GLboolean expect_accelerated_upload) +static struct intel_mipmap_tree * +intel_miptree_create_for_teximage(struct intel_context *intel, + struct intel_texture_object *intelObj, + struct intel_texture_image *intelImage, + GLboolean expect_accelerated_upload) { GLuint firstLevel; GLuint lastLevel; @@ -72,70 +74,71 @@ guess_and_alloc_mipmap_tree(struct intel_context *intel, DBG("%s\n", __FUNCTION__); if (intelImage->base.Border) - return; + return NULL; if (intelImage->level > intelObj->base.BaseLevel && (intelImage->base.Width == 1 || (intelObj->base.Target != GL_TEXTURE_1D && intelImage->base.Height == 1) || (intelObj->base.Target == GL_TEXTURE_3D && - intelImage->base.Depth == 1))) - return; - - /* If this image disrespects BaseLevel, allocate from level zero. - * Usually BaseLevel == 0, so it's unlikely to happen. - */ - if (intelImage->level < intelObj->base.BaseLevel) - firstLevel = 0; - else - firstLevel = intelObj->base.BaseLevel; - - - /* Figure out image dimensions at start level. - */ - for (i = intelImage->level; i > firstLevel; i--) { - width <<= 1; - if (height != 1) - height <<= 1; - if (depth != 1) - depth <<= 1; - } + intelImage->base.Depth == 1))) { + /* For this combination, we're at some lower mipmap level and + * some important dimension is 1. We can't extrapolate up to a + * likely base level width/height/depth for a full mipmap stack + * from this info, so just allocate this one level. + */ + firstLevel = intelImage->level; + lastLevel = intelImage->level; + } else { + /* If this image disrespects BaseLevel, allocate from level zero. + * Usually BaseLevel == 0, so it's unlikely to happen. + */ + if (intelImage->level < intelObj->base.BaseLevel) + firstLevel = 0; + else + firstLevel = intelObj->base.BaseLevel; + + /* Figure out image dimensions at start level. */ + for (i = intelImage->level; i > firstLevel; i--) { + width <<= 1; + if (height != 1) + height <<= 1; + if (depth != 1) + depth <<= 1; + } - /* Guess a reasonable value for lastLevel. This is probably going - * to be wrong fairly often and might mean that we have to look at - * resizable buffers, or require that buffers implement lazy - * pagetable arrangements. - */ - if ((intelObj->base.MinFilter == GL_NEAREST || - intelObj->base.MinFilter == GL_LINEAR) && - intelImage->level == firstLevel && - (intel->gen < 4 || firstLevel == 0)) { - lastLevel = firstLevel; - } - else { - lastLevel = firstLevel + logbase2(MAX2(MAX2(width, height), depth)); + /* Guess a reasonable value for lastLevel. This is probably going + * to be wrong fairly often and might mean that we have to look at + * resizable buffers, or require that buffers implement lazy + * pagetable arrangements. + */ + if ((intelObj->base.MinFilter == GL_NEAREST || + intelObj->base.MinFilter == GL_LINEAR) && + intelImage->level == firstLevel && + (intel->gen < 4 || firstLevel == 0)) { + lastLevel = firstLevel; + } else { + lastLevel = firstLevel + logbase2(MAX2(MAX2(width, height), depth)); + } } - assert(!intelObj->mt); if (_mesa_is_format_compressed(intelImage->base.TexFormat)) comp_byte = intel_compressed_num_bytes(intelImage->base.TexFormat); texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat); - intelObj->mt = intel_miptree_create(intel, - intelObj->base.Target, - intelImage->base._BaseFormat, - intelImage->base.InternalFormat, - firstLevel, - lastLevel, - width, - height, - depth, - texelBytes, - comp_byte, - expect_accelerated_upload); - - DBG("%s - success\n", __FUNCTION__); + return intel_miptree_create(intel, + intelObj->base.Target, + intelImage->base._BaseFormat, + intelImage->base.InternalFormat, + firstLevel, + lastLevel, + width, + height, + depth, + texelBytes, + comp_byte, + expect_accelerated_upload); } @@ -229,15 +232,18 @@ try_pbo_upload(struct intel_context *intel, dst_stride = intelImage->mt->region->pitch; - if (drm_intel_bo_references(intel->batch->buf, dst_buffer)) + if (drm_intel_bo_references(intel->batch.bo, dst_buffer)) intel_flush(&intel->ctx); { - drm_intel_bo *src_buffer = intel_bufferobj_buffer(intel, pbo, INTEL_READ); + GLuint offset; + drm_intel_bo *src_buffer = + intel_bufferobj_source(intel, pbo, 64, &offset); if (!intelEmitCopyBlit(intel, intelImage->mt->cpp, - src_stride, src_buffer, src_offset, GL_FALSE, + src_stride, src_buffer, + src_offset + offset, GL_FALSE, dst_stride, dst_buffer, 0, intelImage->mt->region->tiling, 0, 0, dst_x, dst_y, width, height, @@ -343,41 +349,29 @@ intelTexImage(struct gl_context * ctx, texImage->Data = NULL; } - if (!intelObj->mt) { - guess_and_alloc_mipmap_tree(intel, intelObj, intelImage, pixels == NULL); - if (!intelObj->mt) { - DBG("guess_and_alloc_mipmap_tree: failed\n"); - } - } - assert(!intelImage->mt); if (intelObj->mt && intel_miptree_match_image(intelObj->mt, &intelImage->base)) { - + /* Use an existing miptree when possible */ intel_miptree_reference(&intelImage->mt, intelObj->mt); assert(intelImage->mt); } else if (intelImage->base.Border == 0) { - int comp_byte = 0; - GLuint texelBytes = _mesa_get_format_bytes(intelImage->base.TexFormat); - GLenum baseFormat = _mesa_get_format_base_format(intelImage->base.TexFormat); - if (_mesa_is_format_compressed(intelImage->base.TexFormat)) { - comp_byte = - intel_compressed_num_bytes(intelImage->base.TexFormat); - } - /* Didn't fit in the object miptree, but it's suitable for inclusion in * a miptree, so create one just for our level and store it in the image. * It'll get moved into the object miptree at validate time. */ - intelImage->mt = intel_miptree_create(intel, target, - baseFormat, - internalFormat, - level, level, - width, height, depth, - texelBytes, - comp_byte, pixels == NULL); - + intelImage->mt = intel_miptree_create_for_teximage(intel, intelObj, + intelImage, + pixels == NULL); + + /* Even if the object currently has a mipmap tree associated + * with it, this one is a more likely candidate to represent the + * whole object since our level didn't fit what was there + * before, and any lower levels would fit into our miptree. + */ + if (intelImage->mt) + intel_miptree_reference(&intelObj->mt, intelImage->mt); } /* PBO fastpaths: @@ -439,7 +433,7 @@ intelTexImage(struct gl_context * ctx, if (intelImage->mt) { if (pixels != NULL) { /* Flush any queued rendering with the texture before mapping. */ - if (drm_intel_bo_references(intel->batch->buf, + if (drm_intel_bo_references(intel->batch.bo, intelImage->mt->region->buffer)) { intel_flush(ctx); } @@ -711,8 +705,7 @@ intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, } mt = intel_miptree_create_for_region(intel, target, - internalFormat, - 0, 0, rb->region, 1, 0); + internalFormat, rb->region, 1, 0); if (mt == NULL) return; @@ -777,7 +770,7 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target, mt = intel_miptree_create_for_region(intel, target, image->internal_format, - 0, 0, image->region, 1, 0); + image->region, 1, 0); if (mt == NULL) return; diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h index 5f60e0ea4f..e93ef4a472 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_obj.h +++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h @@ -32,11 +32,11 @@ struct intel_texture_object { struct gl_texture_object base; /* The "parent" object */ - /* The mipmap tree must include at least these levels once - * validated: + /* This is a mirror of base._MaxLevel, updated at validate time, + * except that we don't bother with the non-base levels for + * non-mipmapped textures. */ - GLuint firstLevel; - GLuint lastLevel; + unsigned int _MaxLevel; /* Offset for firstLevel image: */ diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c index c9b992a21b..d0f8294113 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c @@ -27,6 +27,7 @@ **************************************************************************/ #include "main/mtypes.h" +#include "main/pbo.h" #include "main/texobj.h" #include "main/texstore.h" #include "main/texcompress.h" @@ -89,19 +90,19 @@ intelTexSubimage(struct gl_context * ctx, intel->gen < 6 && target == GL_TEXTURE_2D && drm_intel_bo_busy(dst_bo)) { - unsigned long pitch; - uint32_t tiling_mode = I915_TILING_NONE; - temp_bo = drm_intel_bo_alloc_tiled(intel->bufmgr, - "subimage blit bo", - width, height, - intelImage->mt->cpp, - &tiling_mode, - &pitch, - 0); - drm_intel_gem_bo_map_gtt(temp_bo); + dstRowStride = width * intelImage->mt->cpp; + temp_bo = drm_intel_bo_alloc(intel->bufmgr, "subimage blit bo", + dstRowStride * height, 0); + if (!temp_bo) + return; + + if (drm_intel_gem_bo_map_gtt(temp_bo)) { + drm_intel_bo_unreference(temp_bo); + return; + } + texImage->Data = temp_bo->virtual; texImage->ImageOffsets[0] = 0; - dstRowStride = pitch; intel_miptree_get_image_offset(intelImage->mt, level, intelImage->face, 0, diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c index ed5c5d896b..a11b07ed09 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_validate.c +++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c @@ -8,72 +8,21 @@ #define FILE_DEBUG_FLAG DEBUG_TEXTURE /** - * Compute which mipmap levels that really need to be sent to the hardware. - * This depends on the base image size, GL_TEXTURE_MIN_LOD, - * GL_TEXTURE_MAX_LOD, GL_TEXTURE_BASE_LEVEL, and GL_TEXTURE_MAX_LEVEL. + * When validating, we only care about the texture images that could + * be seen, so for non-mipmapped modes we want to ignore everything + * but BaseLevel. */ static void -intel_calculate_first_last_level(struct intel_context *intel, - struct intel_texture_object *intelObj) +intel_update_max_level(struct intel_context *intel, + struct intel_texture_object *intelObj) { struct gl_texture_object *tObj = &intelObj->base; - const struct gl_texture_image *const baseImage = - tObj->Image[0][tObj->BaseLevel]; - /* These must be signed values. MinLod and MaxLod can be negative numbers, - * and having firstLevel and lastLevel as signed prevents the need for - * extra sign checks. - */ - int firstLevel; - int lastLevel; - - /* Yes, this looks overly complicated, but it's all needed. - */ - switch (tObj->Target) { - case GL_TEXTURE_1D: - case GL_TEXTURE_2D: - case GL_TEXTURE_3D: - case GL_TEXTURE_CUBE_MAP: - if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { - /* GL_NEAREST and GL_LINEAR only care about GL_TEXTURE_BASE_LEVEL. - */ - firstLevel = lastLevel = tObj->BaseLevel; - } - else { - if (intel->gen == 2) { - firstLevel = tObj->BaseLevel + (GLint) (tObj->MinLod + 0.5); - firstLevel = MAX2(firstLevel, tObj->BaseLevel); - firstLevel = MIN2(firstLevel, tObj->BaseLevel + baseImage->MaxLog2); - lastLevel = tObj->BaseLevel + (GLint) (tObj->MaxLod + 0.5); - lastLevel = MAX2(lastLevel, tObj->BaseLevel); - lastLevel = MIN2(lastLevel, tObj->BaseLevel + baseImage->MaxLog2); - lastLevel = MIN2(lastLevel, tObj->MaxLevel); - lastLevel = MAX2(firstLevel, lastLevel); /* need at least one level */ - } else { - /* Min/max LOD are taken into account in sampler state. We don't - * want to re-layout textures just because clamping has been applied - * since it means a bunch of blitting around and probably no memory - * savings (since we have to keep the other levels around anyway). - */ - firstLevel = tObj->BaseLevel; - lastLevel = MIN2(tObj->BaseLevel + baseImage->MaxLog2, - tObj->MaxLevel); - /* need at least one level */ - lastLevel = MAX2(firstLevel, lastLevel); - } - } - break; - case GL_TEXTURE_RECTANGLE_NV: - case GL_TEXTURE_4D_SGIS: - firstLevel = lastLevel = 0; - break; - default: - return; + if (tObj->MinFilter == GL_NEAREST || tObj->MinFilter == GL_LINEAR) { + intelObj->_MaxLevel = tObj->BaseLevel; + } else { + intelObj->_MaxLevel = tObj->_MaxLevel; } - - /* save these values */ - intelObj->firstLevel = firstLevel; - intelObj->lastLevel = lastLevel; } /** @@ -135,8 +84,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) /* What levels must the tree include at a minimum? */ - intel_calculate_first_last_level(intel, intelObj); - firstImage = intel_texture_image(tObj->Image[0][intelObj->firstLevel]); + intel_update_max_level(intel, intelObj); + firstImage = intel_texture_image(tObj->Image[0][tObj->BaseLevel]); /* Fallback case: */ @@ -147,23 +96,6 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) return GL_FALSE; } - - /* If both firstImage and intelObj have a tree which can contain - * all active images, favour firstImage. Note that because of the - * completeness requirement, we know that the image dimensions - * will match. - */ - if (firstImage->mt && - firstImage->mt != intelObj->mt && - firstImage->mt->first_level <= intelObj->firstLevel && - firstImage->mt->last_level >= intelObj->lastLevel) { - - if (intelObj->mt) - intel_miptree_release(intel, &intelObj->mt); - - intel_miptree_reference(&intelObj->mt, firstImage->mt); - } - if (_mesa_is_format_compressed(firstImage->base.TexFormat)) { comp_byte = intel_compressed_num_bytes(firstImage->base.TexFormat); cpp = comp_byte; @@ -173,18 +105,17 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) /* Check tree can hold all active levels. Check tree matches * target, imageFormat, etc. - * - * XXX: For some layouts (eg i945?), the test might have to be - * first_level == firstLevel, as the tree isn't valid except at the - * original start level. Hope to get around this by - * programming minLod, maxLod, baseLevel into the hardware and - * leaving the tree alone. + * + * For pre-gen4, we have to match first_level == tObj->BaseLevel, + * because we don't have the control that gen4 does to make min/mag + * determination happen at a nonzero (hardware) baselevel. Because + * of that, we just always relayout on baselevel change. */ if (intelObj->mt && (intelObj->mt->target != intelObj->base.Target || intelObj->mt->internal_format != firstImage->base.InternalFormat || - intelObj->mt->first_level != intelObj->firstLevel || - intelObj->mt->last_level != intelObj->lastLevel || + intelObj->mt->first_level != tObj->BaseLevel || + intelObj->mt->last_level < intelObj->_MaxLevel || intelObj->mt->width0 != firstImage->base.Width || intelObj->mt->height0 != firstImage->base.Height || intelObj->mt->depth0 != firstImage->base.Depth || @@ -201,25 +132,29 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit) intelObj->base.Target, firstImage->base._BaseFormat, firstImage->base.InternalFormat, - intelObj->firstLevel, - intelObj->lastLevel, + tObj->BaseLevel, + intelObj->_MaxLevel, firstImage->base.Width, firstImage->base.Height, firstImage->base.Depth, cpp, comp_byte, GL_TRUE); + if (!intelObj->mt) + return GL_FALSE; } /* Pull in any images not in the object's tree: */ nr_faces = (intelObj->base.Target == GL_TEXTURE_CUBE_MAP) ? 6 : 1; for (face = 0; face < nr_faces; face++) { - for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) { + for (i = tObj->BaseLevel; i <= intelObj->_MaxLevel; i++) { struct intel_texture_image *intelImage = intel_texture_image(intelObj->base.Image[face][i]); - - /* Need to import images in main memory or held in other trees. + /* skip too small size mipmap */ + if (intelImage == NULL) + break; + /* Need to import images in main memory or held in other trees. * If it's a render target, then its data isn't needed to be in * the object tree (otherwise we'd be FBO incomplete), and we need * to keep track of the image's MT as needing to be pulled in still, @@ -289,7 +224,7 @@ intel_tex_map_images(struct intel_context *intel, DBG("%s\n", __FUNCTION__); - for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) + for (i = intelObj->base.BaseLevel; i <= intelObj->_MaxLevel; i++) intel_tex_map_level_images(intel, intelObj, i); } @@ -299,6 +234,6 @@ intel_tex_unmap_images(struct intel_context *intel, { int i; - for (i = intelObj->firstLevel; i <= intelObj->lastLevel; i++) + for (i = intelObj->base.BaseLevel; i <= intelObj->_MaxLevel; i++) intel_tex_unmap_level_images(intel, intelObj, i); } diff --git a/src/mesa/drivers/dri/mach64/mach64_context.c b/src/mesa/drivers/dri/mach64/mach64_context.c index 7c989df5ec..35b01a9740 100644 --- a/src/mesa/drivers/dri/mach64/mach64_context.c +++ b/src/mesa/drivers/dri/mach64/mach64_context.c @@ -120,7 +120,7 @@ GLboolean mach64CreateContext( gl_api api, shareCtx = ((mach64ContextPtr) sharedContextPrivate)->glCtx; else shareCtx = NULL; - mmesa->glCtx = _mesa_create_context(glVisual, shareCtx, + mmesa->glCtx = _mesa_create_context(API_OPENGL, glVisual, shareCtx, &functions, (void *)mmesa); if (!mmesa->glCtx) { FREE(mmesa); diff --git a/src/mesa/drivers/dri/mach64/mach64_context.h b/src/mesa/drivers/dri/mach64/mach64_context.h index 11e8f53b28..70bc0ae79d 100644 --- a/src/mesa/drivers/dri/mach64/mach64_context.h +++ b/src/mesa/drivers/dri/mach64/mach64_context.h @@ -295,11 +295,11 @@ extern GLboolean mach64UnbindContext( __DRIcontext *driContextPriv ); #define LE32_OUT( x, y ) do { *(GLuint *)(x) = (y); } while (0) #define LE32_OUT_FLOAT( x, y ) do { *(GLfloat *)(x) = (y); } while (0) #else -#ifndef __OpenBSD__ -#include <byteswap.h> -#else +#if defined(__OpenBSD__) || defined(__NetBSD__) #include <machine/endian.h> #define bswap_32 bswap32 +#else +#include <byteswap.h> #endif #define LE32_IN( x ) bswap_32( *(GLuint *)(x) ) diff --git a/src/mesa/drivers/dri/mach64/mach64_state.c b/src/mesa/drivers/dri/mach64/mach64_state.c index 8e795955c2..c1a4e63204 100644 --- a/src/mesa/drivers/dri/mach64/mach64_state.c +++ b/src/mesa/drivers/dri/mach64/mach64_state.c @@ -102,7 +102,7 @@ static void mach64UpdateAlphaMode( struct gl_context *ctx ) MACH64_ALPHA_BLEND_DST_MASK | MACH64_ALPHA_BLEND_SAT); - switch ( ctx->Color.BlendSrcRGB ) { + switch ( ctx->Color.Blend[0].SrcRGB ) { case GL_ZERO: s |= MACH64_ALPHA_BLEND_SRC_ZERO; break; @@ -135,7 +135,7 @@ static void mach64UpdateAlphaMode( struct gl_context *ctx ) FALLBACK( mmesa, MACH64_FALLBACK_BLEND_FUNC, GL_TRUE ); } - switch ( ctx->Color.BlendDstRGB ) { + switch ( ctx->Color.Blend[0].DstRGB ) { case GL_ZERO: s |= MACH64_ALPHA_BLEND_DST_ZERO; break; diff --git a/src/mesa/drivers/dri/mga/mga_xmesa.c b/src/mesa/drivers/dri/mga/mga_xmesa.c index d1b281a2c0..eb7df9785c 100644 --- a/src/mesa/drivers/dri/mga/mga_xmesa.c +++ b/src/mesa/drivers/dri/mga/mga_xmesa.c @@ -457,7 +457,7 @@ mgaCreateContext( gl_api api, shareCtx = ((mgaContextPtr) sharedContextPrivate)->glCtx; else shareCtx = NULL; - mmesa->glCtx = _mesa_create_context(mesaVis, shareCtx, + mmesa->glCtx = _mesa_create_context(API_OPENGL, mesaVis, shareCtx, &functions, (void *) mmesa); if (!mmesa->glCtx) { FREE(mmesa); diff --git a/src/mesa/drivers/dri/mga/mgastate.c b/src/mesa/drivers/dri/mga/mgastate.c index 25d7de28fe..bfc55f4fc6 100644 --- a/src/mesa/drivers/dri/mga/mgastate.c +++ b/src/mesa/drivers/dri/mga/mgastate.c @@ -30,6 +30,7 @@ #include "main/colormac.h" #include "main/dd.h" #include "main/mm.h" +#include "main/state.h" #include "mgacontext.h" #include "mgadd.h" @@ -114,7 +115,7 @@ static void mgaDDAlphaFunc(struct gl_context *ctx, GLenum func, GLfloat ref) static void updateBlendLogicOp(struct gl_context *ctx) { mgaContextPtr mmesa = MGA_CONTEXT(ctx); - GLboolean logicOp = RGBA_LOGICOP_ENABLED(ctx); + GLboolean logicOp = _mesa_rgba_logicop_enabled(ctx); MGA_STATECHANGE( mmesa, MGA_UPLOAD_CONTEXT ); @@ -141,7 +142,7 @@ static void mgaDDBlendFuncSeparate( struct gl_context *ctx, GLenum sfactorRGB, GLuint src; GLuint dst; - switch (ctx->Color.BlendSrcRGB) { + switch (ctx->Color.Blend[0].SrcRGB) { case GL_ZERO: src = AC_src_zero; break; case GL_SRC_ALPHA: @@ -169,7 +170,7 @@ static void mgaDDBlendFuncSeparate( struct gl_context *ctx, GLenum sfactorRGB, break; } - switch (ctx->Color.BlendDstRGB) { + switch (ctx->Color.Blend[0].DstRGB) { case GL_SRC_ALPHA: dst = AC_dst_src_alpha; break; case GL_ONE_MINUS_SRC_ALPHA: @@ -197,7 +198,7 @@ static void mgaDDBlendFuncSeparate( struct gl_context *ctx, GLenum sfactorRGB, mmesa->hw.blend_func = (src | dst); FALLBACK( ctx, MGA_FALLBACK_BLEND, - ctx->Color.BlendEnabled && !RGBA_LOGICOP_ENABLED(ctx) && + ctx->Color.BlendEnabled && !_mesa_rgba_logicop_enabled(ctx) && mmesa->hw.blend_func == (AC_src_src_alpha_sat | AC_dst_zero) ); } @@ -483,7 +484,7 @@ static void updateSpecularLighting( struct gl_context *ctx ) mgaContextPtr mmesa = MGA_CONTEXT(ctx); unsigned int specen; - specen = NEED_SECONDARY_COLOR(ctx) ? TMC_specen_enable : 0; + specen = _mesa_need_secondary_color(ctx) ? TMC_specen_enable : 0; if ( specen != mmesa->hw.specen ) { mmesa->hw.specen = specen; @@ -962,7 +963,7 @@ void mgaEmitHwStateLocked( mgaContextPtr mmesa ) ? mmesa->hw.zmode : (DC_zmode_nozcmp | DC_atype_i); mmesa->setup.dwgctl &= DC_bop_MASK; - mmesa->setup.dwgctl |= RGBA_LOGICOP_ENABLED(ctx) + mmesa->setup.dwgctl |= _mesa_rgba_logicop_enabled(ctx) ? mmesa->hw.rop : mgarop_NoBLK[ GL_COPY & 0x0f ]; mmesa->setup.alphactrl &= AC_src_MASK & AC_dst_MASK & AC_atmode_MASK diff --git a/src/mesa/drivers/dri/nouveau/nouveau_context.c b/src/mesa/drivers/dri/nouveau/nouveau_context.c index 53a121420d..71dde41387 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_context.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_context.c @@ -115,7 +115,8 @@ nouveau_context_init(struct gl_context *ctx, struct nouveau_screen *screen, nouveau_fbo_functions_init(&functions); /* Initialize the mesa context. */ - _mesa_initialize_context(ctx, visual, share_ctx, &functions, NULL); + _mesa_initialize_context(ctx, API_OPENGL, visual, + share_ctx, &functions, NULL); nouveau_state_init(ctx); nouveau_bo_state_init(ctx); diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c index 27e2892f71..45630be7f6 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_driver.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c @@ -24,6 +24,8 @@ * */ +#include "main/mfeatures.h" + #include "nouveau_driver.h" #include "nouveau_context.h" #include "nouveau_fbo.h" diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.h b/src/mesa/drivers/dri/nouveau/nouveau_driver.h index 8036b18edc..158aec820a 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_driver.h +++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.h @@ -31,6 +31,7 @@ #include "main/mtypes.h" #include "main/macros.h" #include "main/formats.h" +#include "main/state.h" #include "utils.h" #include "dri_util.h" @@ -38,7 +39,6 @@ #include <assert.h> #include "nouveau_device.h" -#include "nouveau_pushbuf.h" #include "nouveau_grobj.h" #include "nouveau_channel.h" #include "nouveau_bo.h" @@ -46,6 +46,7 @@ #include "nouveau_screen.h" #include "nouveau_state.h" #include "nouveau_surface.h" +#include "nv04_pushbuf.h" #define DRIVER_DATE "20091015" #define DRIVER_AUTHOR "Nouveau" diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c index 079b5d63e4..b36b578878 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c @@ -32,6 +32,7 @@ #include "main/framebuffer.h" #include "main/renderbuffer.h" #include "main/fbobject.h" +#include "main/mfeatures.h" static GLboolean set_renderbuffer_format(struct gl_renderbuffer *rb, GLenum internalFormat) diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c index 2480b1ea50..988208ff56 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c @@ -113,8 +113,10 @@ nouveau_teximage_map(struct gl_context *ctx, struct gl_texture_image *ti, if (access & GL_MAP_WRITE_BIT) flags |= NOUVEAU_BO_WR; - ret = nouveau_bo_map(s->bo, flags); - assert(!ret); + if (!s->bo->map) { + ret = nouveau_bo_map(s->bo, flags); + assert(!ret); + } ti->Data = s->bo->map + y * s->pitch + x * s->cpp; } diff --git a/src/mesa/drivers/dri/nouveau/nv04_state_raster.c b/src/mesa/drivers/dri/nouveau/nv04_state_raster.c index 98f2f98f1d..78d29fc485 100644 --- a/src/mesa/drivers/dri/nouveau/nv04_state_raster.c +++ b/src/mesa/drivers/dri/nouveau/nv04_state_raster.c @@ -264,8 +264,8 @@ nv04_emit_blend(struct gl_context *ctx, int emit) NV04_MULTITEX_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE; /* Alpha blending. */ - blend |= get_blend_func(ctx->Color.BlendDstRGB) << 28 | - get_blend_func(ctx->Color.BlendSrcRGB) << 24; + blend |= get_blend_func(ctx->Color.Blend[0].DstRGB) << 28 | + get_blend_func(ctx->Color.Blend[0].SrcRGB) << 24; if (ctx->Color.BlendEnabled) blend |= NV04_MULTITEX_TRIANGLE_BLEND_BLEND_ENABLE; @@ -277,7 +277,7 @@ nv04_emit_blend(struct gl_context *ctx, int emit) blend |= NV04_MULTITEX_TRIANGLE_BLEND_SHADE_MODE_FLAT; /* Secondary color */ - if (NEED_SECONDARY_COLOR(ctx)) + if (_mesa_need_secondary_color(ctx)) blend |= NV04_MULTITEX_TRIANGLE_BLEND_SPECULAR_ENABLE; /* Fog. */ @@ -296,8 +296,8 @@ nv04_emit_blend(struct gl_context *ctx, int emit) NV04_TEXTURED_TRIANGLE_BLEND_TEXTURE_PERSPECTIVE_ENABLE; /* Alpha blending. */ - blend |= get_blend_func(ctx->Color.BlendDstRGB) << 28 | - get_blend_func(ctx->Color.BlendSrcRGB) << 24; + blend |= get_blend_func(ctx->Color.Blend[0].DstRGB) << 28 | + get_blend_func(ctx->Color.Blend[0].SrcRGB) << 24; if (ctx->Color.BlendEnabled) blend |= NV04_TEXTURED_TRIANGLE_BLEND_BLEND_ENABLE; @@ -315,7 +315,7 @@ nv04_emit_blend(struct gl_context *ctx, int emit) blend |= get_texenv_mode(GL_MODULATE); /* Secondary color */ - if (NEED_SECONDARY_COLOR(ctx)) + if (_mesa_need_secondary_color(ctx)) blend |= NV04_TEXTURED_TRIANGLE_BLEND_SPECULAR_ENABLE; /* Fog. */ diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_raster.c b/src/mesa/drivers/dri/nouveau/nv10_state_raster.c index bb1084ed11..50021b0a7b 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_state_raster.c +++ b/src/mesa/drivers/dri/nouveau/nv10_state_raster.c @@ -68,7 +68,7 @@ nv10_emit_blend_equation(struct gl_context *ctx, int emit) OUT_RINGb(chan, ctx->Color.BlendEnabled); BEGIN_RING(chan, celsius, NV10_3D_BLEND_EQUATION, 1); - OUT_RING(chan, nvgl_blend_eqn(ctx->Color.BlendEquationRGB)); + OUT_RING(chan, nvgl_blend_eqn(ctx->Color.Blend[0].EquationRGB)); } void @@ -78,8 +78,8 @@ nv10_emit_blend_func(struct gl_context *ctx, int emit) struct nouveau_grobj *celsius = context_eng3d(ctx); BEGIN_RING(chan, celsius, NV10_3D_BLEND_FUNC_SRC, 2); - OUT_RING(chan, nvgl_blend_func(ctx->Color.BlendSrcRGB)); - OUT_RING(chan, nvgl_blend_func(ctx->Color.BlendDstRGB)); + OUT_RING(chan, nvgl_blend_func(ctx->Color.Blend[0].SrcRGB)); + OUT_RING(chan, nvgl_blend_func(ctx->Color.Blend[0].DstRGB)); } void diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c index e8bd12e6e0..96d1b320d8 100644 --- a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c +++ b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c @@ -199,7 +199,7 @@ nv10_emit_light_model(struct gl_context *ctx, int emit) BEGIN_RING(chan, celsius, NV10_3D_LIGHT_MODEL, 1); OUT_RING(chan, ((m->LocalViewer ? NV10_3D_LIGHT_MODEL_LOCAL_VIEWER : 0) | - (NEED_SECONDARY_COLOR(ctx) ? + (_mesa_need_secondary_color(ctx) ? NV10_3D_LIGHT_MODEL_SEPARATE_SPECULAR : 0) | (!ctx->Light.Enabled && ctx->Fog.ColorSumEnabled ? NV10_3D_LIGHT_MODEL_VERTEX_SPECULAR : 0))); diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c b/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c index 4677198dd0..4f7ddd8e49 100644 --- a/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c +++ b/src/mesa/drivers/dri/nouveau/nv20_state_tnl.c @@ -178,7 +178,7 @@ nv20_emit_light_model(struct gl_context *ctx, int emit) OUT_RING(chan, ((m->LocalViewer ? NV20_3D_LIGHT_MODEL_VIEWER_LOCAL : NV20_3D_LIGHT_MODEL_VIEWER_NONLOCAL) | - (NEED_SECONDARY_COLOR(ctx) ? + (_mesa_need_secondary_color(ctx) ? NV20_3D_LIGHT_MODEL_SEPARATE_SPECULAR : 0))); diff --git a/src/mesa/drivers/dri/r128/r128_context.c b/src/mesa/drivers/dri/r128/r128_context.c index 274108005f..247d86ca00 100644 --- a/src/mesa/drivers/dri/r128/r128_context.c +++ b/src/mesa/drivers/dri/r128/r128_context.c @@ -128,7 +128,7 @@ GLboolean r128CreateContext( gl_api api, shareCtx = ((r128ContextPtr) sharedContextPrivate)->glCtx; else shareCtx = NULL; - rmesa->glCtx = _mesa_create_context(glVisual, shareCtx, + rmesa->glCtx = _mesa_create_context(API_OPENGL, glVisual, shareCtx, &functions, (void *) rmesa); if (!rmesa->glCtx) { FREE(rmesa); diff --git a/src/mesa/drivers/dri/r128/r128_state.c b/src/mesa/drivers/dri/r128/r128_state.c index 4a49e8fc70..7ce082ead2 100644 --- a/src/mesa/drivers/dri/r128/r128_state.c +++ b/src/mesa/drivers/dri/r128/r128_state.c @@ -43,6 +43,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/enums.h" #include "main/colormac.h" #include "main/macros.h" +#include "main/state.h" #include "swrast/swrast.h" #include "vbo/vbo.h" #include "tnl/tnl.h" @@ -178,12 +179,12 @@ static void r128UpdateAlphaMode( struct gl_context *ctx ) (R128_ALPHA_BLEND_MASK << R128_ALPHA_BLEND_DST_SHIFT) | R128_ALPHA_COMB_FCN_MASK); - a |= blend_factor( rmesa, ctx->Color.BlendSrcRGB, GL_TRUE ) + a |= blend_factor( rmesa, ctx->Color.Blend[0].SrcRGB, GL_TRUE ) << R128_ALPHA_BLEND_SRC_SHIFT; - a |= blend_factor( rmesa, ctx->Color.BlendDstRGB, GL_FALSE ) + a |= blend_factor( rmesa, ctx->Color.Blend[0].DstRGB, GL_FALSE ) << R128_ALPHA_BLEND_DST_SHIFT; - switch (ctx->Color.BlendEquationRGB) { + switch (ctx->Color.Blend[0].EquationRGB) { case GL_FUNC_ADD: a |= R128_ALPHA_COMB_ADD_CLAMP; break; @@ -736,7 +737,7 @@ static void updateSpecularLighting( struct gl_context *ctx ) r128ContextPtr rmesa = R128_CONTEXT(ctx); GLuint t = rmesa->setup.tex_cntl_c; - if ( NEED_SECONDARY_COLOR( ctx ) ) { + if ( _mesa_need_secondary_color( ctx ) ) { if (ctx->Light.ShadeModel == GL_FLAT) { /* R128 can't do flat-shaded separate specular */ t &= ~R128_SPEC_LIGHT_ENABLE; diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c index 5abfc9dac5..9c045b73ac 100644 --- a/src/mesa/drivers/dri/r200/r200_context.c +++ b/src/mesa/drivers/dri/r200/r200_context.c @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/simple_list.h" #include "main/imports.h" #include "main/extensions.h" +#include "main/mfeatures.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" diff --git a/src/mesa/drivers/dri/r200/r200_state.c b/src/mesa/drivers/dri/r200/r200_state.c index b523edcb5d..0a1e0b4757 100644 --- a/src/mesa/drivers/dri/r200/r200_state.c +++ b/src/mesa/drivers/dri/r200/r200_state.c @@ -245,10 +245,10 @@ static void r200_set_blend_state( struct gl_context * ctx ) } } - func = (blend_factor( ctx->Color.BlendSrcRGB, GL_TRUE ) << R200_SRC_BLEND_SHIFT) | - (blend_factor( ctx->Color.BlendDstRGB, GL_FALSE ) << R200_DST_BLEND_SHIFT); + func = (blend_factor( ctx->Color.Blend[0].SrcRGB, GL_TRUE ) << R200_SRC_BLEND_SHIFT) | + (blend_factor( ctx->Color.Blend[0].DstRGB, GL_FALSE ) << R200_DST_BLEND_SHIFT); - switch(ctx->Color.BlendEquationRGB) { + switch(ctx->Color.Blend[0].EquationRGB) { case GL_FUNC_ADD: eqn = R200_COMB_FCN_ADD_CLAMP; break; @@ -275,7 +275,7 @@ static void r200_set_blend_state( struct gl_context * ctx ) default: fprintf( stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB ); + __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationRGB ); return; } @@ -284,10 +284,10 @@ static void r200_set_blend_state( struct gl_context * ctx ) return; } - funcA = (blend_factor( ctx->Color.BlendSrcA, GL_TRUE ) << R200_SRC_BLEND_SHIFT) | - (blend_factor( ctx->Color.BlendDstA, GL_FALSE ) << R200_DST_BLEND_SHIFT); + funcA = (blend_factor( ctx->Color.Blend[0].SrcA, GL_TRUE ) << R200_SRC_BLEND_SHIFT) | + (blend_factor( ctx->Color.Blend[0].DstA, GL_FALSE ) << R200_DST_BLEND_SHIFT); - switch(ctx->Color.BlendEquationA) { + switch(ctx->Color.Blend[0].EquationA) { case GL_FUNC_ADD: eqnA = R200_COMB_FCN_ADD_CLAMP; break; @@ -314,7 +314,7 @@ static void r200_set_blend_state( struct gl_context * ctx ) default: fprintf( stderr, "[%s:%u] Invalid A blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.BlendEquationA ); + __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationA ); return; } diff --git a/src/mesa/drivers/dri/r200/r200_state_init.c b/src/mesa/drivers/dri/r200/r200_state_init.c index f6afb90d59..e173cce086 100644 --- a/src/mesa/drivers/dri/r200/r200_state_init.c +++ b/src/mesa/drivers/dri/r200/r200_state_init.c @@ -587,7 +587,7 @@ static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom) if (rrb) { OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0)); - OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_RELOC(rrb->draw_offset, rrb->bo, rrb->draw_offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); diff --git a/src/mesa/drivers/dri/r200/r200_tex.c b/src/mesa/drivers/dri/r200/r200_tex.c index 064324731b..092b757583 100644 --- a/src/mesa/drivers/dri/r200/r200_tex.c +++ b/src/mesa/drivers/dri/r200/r200_tex.c @@ -37,6 +37,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/enums.h" #include "main/image.h" +#include "main/mfeatures.h" #include "main/simple_list.h" #include "main/texstore.h" #include "main/teximage.h" diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c b/src/mesa/drivers/dri/r200/r200_texstate.c index 24fb031ecb..7adf9ad73e 100644 --- a/src/mesa/drivers/dri/r200/r200_texstate.c +++ b/src/mesa/drivers/dri/r200/r200_texstate.c @@ -777,10 +777,9 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; + uint32_t internalFormat, format; gl_format texFormat; - type = GL_BGRA; format = GL_UNSIGNED_BYTE; internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); @@ -860,9 +859,20 @@ void r200SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format t->pp_txsize = ((rb->base.Width - 1) << RADEON_TEX_USIZE_SHIFT) | ((rb->base.Height - 1) << RADEON_TEX_VSIZE_SHIFT); - t->pp_txformat |= R200_TXFORMAT_NON_POWER2; - t->pp_txpitch = pitch_val; - t->pp_txpitch -= 32; + + if (target == GL_TEXTURE_RECTANGLE_NV) { + t->pp_txformat |= R200_TXFORMAT_NON_POWER2; + t->pp_txpitch = pitch_val; + t->pp_txpitch -= 32; + } else { + t->pp_txformat &= ~(R200_TXFORMAT_WIDTH_MASK | + R200_TXFORMAT_HEIGHT_MASK | + R200_TXFORMAT_CUBIC_MAP_ENABLE | + R200_TXFORMAT_F5_WIDTH_MASK | + R200_TXFORMAT_F5_HEIGHT_MASK); + t->pp_txformat |= ((texImage->WidthLog2 << R200_TXFORMAT_WIDTH_SHIFT) | + (texImage->HeightLog2 << R200_TXFORMAT_HEIGHT_SHIFT)); + } t->validated = GL_TRUE; _mesa_unlock_texture(radeon->glCtx, texObj); diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c index 5d69012a81..63e03b0e0c 100644 --- a/src/mesa/drivers/dri/r200/r200_vertprog.c +++ b/src/mesa/drivers/dri/r200/r200_vertprog.c @@ -438,7 +438,7 @@ static GLboolean r200_translate_vertex_program(struct gl_context *ctx, struct r2 (1 << VERT_RESULT_TEX5) | (1 << VERT_RESULT_PSIZ))) != 0) { if (R200_DEBUG & RADEON_FALLBACKS) { fprintf(stderr, "can't handle vert prog outputs 0x%llx\n", - mesa_vp->Base.OutputsWritten); + (unsigned long long) mesa_vp->Base.OutputsWritten); } return GL_FALSE; } diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c index 782671bac0..deba9ca834 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -49,6 +49,11 @@ static void presub_string(char out[10], unsigned int inst) } } +static int get_msb(unsigned int bit, unsigned int r400_ext_addr) +{ + return (r400_ext_addr & bit) ? 1 << 5 : 0; +} + /* just some random things... */ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) { @@ -61,16 +66,21 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) fprintf(stderr, "Hardware program\n"); fprintf(stderr, "----------------\n"); + if (c->is_r400) { + fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext); + } for (n = 0; n <= (code->config & 3); n++) { uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; - int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT; - int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT; + unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) + + (((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6); + unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) + + (((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6); int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; - fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " - "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n, + fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, " + "alu_end: %u, tex_end: %d (code_addr: %08x)\n", n, alu_offset, tex_offset, alu_end, tex_end, code_addr); if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { @@ -125,11 +135,15 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) for (j = 0; j < 3; ++j) { int regc = code->alu.inst[i].rgb_addr >> (j * 6); int rega = code->alu.inst[i].alpha_addr >> (j * 6); + int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); + int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); sprintf(srcc[j], "%c%i", - (regc & 32) ? 'c' : 't', regc & 31); + (regc & 32) ? 'c' : 't', (regc & 31) | msbc); sprintf(srca[j], "%c%i", - (rega & 32) ? 'c' : 't', rega & 31); + (rega & 32) ? 'c' : 't', (rega & 31) | msba); } dstc[0] = 0; @@ -141,9 +155,14 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) (code->alu.inst[i]. rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); if (flags[0] != 0) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_RGB_MSB_BIT, + code->alu.inst[i].r400_ext_addr); + sprintf(dstc, "t%i.%s ", - (code->alu.inst[i]. - rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + ((code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) + & 31) | msb, flags); } sprintf(flags, "%s%s%s", @@ -166,9 +185,13 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) dsta[0] = 0; if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_A_MSB_BIT, + code->alu.inst[i].r400_ext_addr); sprintf(dsta, "t%i.w ", - (code->alu.inst[i]. - alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + ((code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31) + | msb); } if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { sprintf(tmp, "o%i.w ", diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 1db8678e89..28d132a5fe 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -64,6 +64,20 @@ struct r300_emit_state { __FILE__, __FUNCTION__, ##args); \ } while(0) +static unsigned int get_msbs_alu(unsigned int bits) +{ + return (bits >> 6) & 0x7; +} + +/** + * @param lsbs The number of least significant bits + */ +static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) +{ + return (bits >> lsbs) & 0x15; +} + +#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask) /** * Mark a temporary register as used. @@ -83,7 +97,7 @@ static unsigned int use_source(struct r300_fragment_program_code* code, struct r return src.Index | (1 << 5); } else if (src.File == RC_FILE_TEMPORARY) { use_temporary(code, src.Index); - return src.Index; + return src.Index & 0x1f; } return 0; @@ -151,11 +165,19 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); for(j = 0; j < 3; ++j) { + /* Set the RGB address */ unsigned int src = use_source(code, inst->RGB.Src[j]); unsigned int arg; + if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); + code->alu.inst[ip].rgb_addr |= src << (6*j); + /* Set the Alpha address */ src = use_source(code, inst->Alpha.Src[j]); + if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); + code->alu.inst[ip].alpha_addr |= src << (6*j); arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); @@ -223,8 +245,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i if (inst->RGB.WriteMask) { use_temporary(code, inst->RGB.DestIndex); + if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT; code->alu.inst[ip].rgb_addr |= - (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | + ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) | (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); } if (inst->RGB.OutputWriteMask) { @@ -236,8 +260,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i if (inst->Alpha.WriteMask) { use_temporary(code, inst->Alpha.DestIndex); + if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT; code->alu.inst[ip].alpha_addr |= - (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | + ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_REG; } if (inst->Alpha.OutputWriteMask) { @@ -269,6 +295,8 @@ static int finish_node(struct r300_emit_state * emit) unsigned tex_offset; unsigned tex_end; + unsigned int alu_offset_msbs, alu_end_msbs; + if (code->alu.length == emit->node_first_alu) { /* Generate a single NOP for this node */ struct rc_pair_instruction inst; @@ -301,13 +329,48 @@ static int finish_node(struct r300_emit_state * emit) * * Also note that the register specification from AMD is slightly * incorrect in its description of this register. */ - code->code_addr[emit->current_node] = - (alu_offset << R300_ALU_START_SHIFT) | - (alu_end << R300_ALU_SIZE_SHIFT) | - (tex_offset << R300_TEX_START_SHIFT) | - (tex_end << R300_TEX_SIZE_SHIFT) | - emit->node_flags; - + code->code_addr[emit->current_node] = + ((alu_offset << R300_ALU_START_SHIFT) + & R300_ALU_START_MASK) + | ((alu_end << R300_ALU_SIZE_SHIFT) + & R300_ALU_SIZE_MASK) + | ((tex_offset << R300_TEX_START_SHIFT) + & R300_TEX_START_MASK) + | ((tex_end << R300_TEX_SIZE_SHIFT) + & R300_TEX_SIZE_MASK) + | emit->node_flags + | (get_msbs_tex(tex_offset, 5) + << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 5) + << R400_TEX_SIZE_MSB_SHIFT) + ; + + /* Write r400 extended instruction fields. These will be ignored on + * r300 cards. */ + alu_offset_msbs = get_msbs_alu(alu_offset); + alu_end_msbs = get_msbs_alu(alu_end); + switch(emit->current_node) { + case 0: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START3_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; + break; + case 1: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START2_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; + break; + case 2: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START1_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; + break; + case 3: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START0_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; + break; + } return 1; } @@ -348,7 +411,7 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) unsigned int opcode; PROG_CODE; - if (code->tex.length >= R300_PFS_MAX_TEX_INST) { + if (code->tex.length >= emit->compiler->Base.max_tex_insts) { error("Too many TEX instructions"); return 0; } @@ -376,10 +439,17 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) use_temporary(code, inst->U.I.SrcReg[0].Index); code->tex.inst[code->tex.length++] = - (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | - (dest << R300_DST_ADDR_SHIFT) | - (unit << R300_TEX_ID_SHIFT) | - (opcode << R300_TEX_INST_SHIFT); + ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) + & R300_SRC_ADDR_MASK) + | ((dest << R300_DST_ADDR_SHIFT) + & R300_DST_ADDR_MASK) + | (unit << R300_TEX_ID_SHIFT) + | (opcode << R300_TEX_INST_SHIFT) + | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? + R400_SRC_ADDR_EXT_BIT : 0) + | (dest >= R300_PFS_NUM_TEMP_REGS ? + R400_DST_ADDR_EXT_BIT : 0) + ; return 1; } @@ -393,6 +463,7 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; struct r300_emit_state emit; struct r300_fragment_program_code *code = &compiler->code->code.r300; + unsigned int tex_end; memset(&emit, 0, sizeof(emit)); emit.compiler = compiler; @@ -424,11 +495,28 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) finish_node(&emit); code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + + /* Set r400 extended instruction fields. These values will be ignored + * on r300 cards. */ + code->r400_code_offset_ext |= + (get_msbs_alu(0) + << R400_ALU_OFFSET_MSB_SHIFT) + | (get_msbs_alu(code->alu.length - 1) + << R400_ALU_SIZE_MSB_SHIFT); + + tex_end = code->tex.length ? code->tex.length - 1 : 0; code->code_offset = - (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | - ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | - (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | - ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); + ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) + & R300_PFS_CNTL_ALU_OFFSET_MASK) + | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) + & R300_PFS_CNTL_ALU_END_MASK) + | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) + & R300_PFS_CNTL_TEX_OFFSET_MASK) + | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) + & R300_PFS_CNTL_TEX_END_MASK) + | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT) + ; if (emit.current_node < 3) { int shift = 3 - emit.current_node; @@ -438,4 +526,11 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) for(i = 0; i < shift; ++i) code->code_addr[i] = 0; } + + if (code->pixsize >= R300_PFS_NUM_TEMP_REGS + || code->alu.length > R300_PFS_MAX_ALU_INST + || code->tex.length > R300_PFS_MAX_TEX_INST) { + + code->r390_mode = 1; + } } diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c index 05d3da8a10..5223aaa71a 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c @@ -98,9 +98,6 @@ static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) unsigned int relevant; int j; - if (reg.Abs) - reg.Negate = RC_MASK_NONE; - if (opcode == RC_OPCODE_KIL || opcode == RC_OPCODE_TEX || opcode == RC_OPCODE_TXB || @@ -140,9 +137,6 @@ static void r300_swizzle_split( struct rc_src_register src, unsigned int mask, struct rc_swizzle_split * split) { - if (src.Abs) - src.Negate = RC_MASK_NONE; - split->NumPhases = 0; while(mask) { @@ -222,13 +216,14 @@ unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle) */ unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle) { + unsigned int swz = GET_SWZ(swizzle, 0); if (src == RC_PAIR_PRESUB_SRC) { - return R300_ALU_ARGA_SRCP_X + swizzle; + return R300_ALU_ARGA_SRCP_X + swz; } - if (swizzle < 3) - return swizzle + 3*src; + if (swz < 3) + return swz + 3*src; - switch(swizzle) { + switch(swz) { case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src; case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE; case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO; diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index e0d349b98c..9286733635 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -78,12 +78,32 @@ static void rc_rewrite_depth_out(struct radeon_compiler *cc, void *user) } } +static int radeon_saturate_output( + struct radeon_compiler * c, + struct rc_instruction * inst, + void* data) +{ + const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); + + if (!info->HasDstReg || inst->U.I.DstReg.File != RC_FILE_OUTPUT) + return 0; + + inst->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + return 1; +} + void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) { int is_r500 = c->Base.is_r500; int opt = !c->Base.disable_optimizations; + int sat_out = c->state.frag_clamp; /* Lists of instruction transformations. */ + struct radeon_program_transformation saturate_output[] = { + { &radeon_saturate_output, c }, + { 0, 0 } + }; + struct radeon_program_transformation rewrite_tex[] = { { &radeonTransformTEX, c }, { 0, 0 } @@ -113,6 +133,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) {"unroll loops", 1, is_r500, rc_unroll_loops, NULL}, {"transform loops", 1, !is_r500, rc_transform_loops, NULL}, {"emulate branches", 1, !is_r500, rc_emulate_branches, NULL}, + {"saturate output writes", 1, sat_out, rc_local_transform, saturate_output}, {"transform TEX", 1, 1, rc_local_transform, rewrite_tex}, {"native rewrite", 1, is_r500, rc_local_transform, native_rewrite_r500}, {"native rewrite", 1, !is_r500, rc_local_transform, native_rewrite_r300}, @@ -124,7 +145,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) /* This pass makes it easier for the scheduler to group TEX * instructions and reduces the chances of creating too * many texture indirections.*/ - {"register rename", 1, !is_r500 || opt, rc_rename_regs, NULL}, + {"register rename", 1, !is_r500, rc_rename_regs, NULL}, {"pair translate", 1, 1, rc_pair_translate, NULL}, {"pair scheduling", 1, 1, rc_pair_schedule, NULL}, {"register allocation", 1, opt, rc_pair_regalloc, NULL}, diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index 472029f63d..8ad2175ead 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -490,13 +490,6 @@ static void translate_vertex_program(struct radeon_compiler *c, void *user) continue; if (info->HasDstReg) { - /* Relative addressing of destination operands is not supported yet. */ - if (vpi->DstReg.RelAddr) { - rc_error(&compiler->Base, "Vertex program does not support relative " - "addressing of destination operands (yet).\n"); - return; - } - /* Neither is Saturate. */ if (vpi->SaturateMode != RC_SATURATE_NONE) { rc_error(&compiler->Base, "Vertex program does not support the Saturate " @@ -668,7 +661,6 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) char hwtemps[RC_REGISTER_MAX_INDEX]; struct temporary_allocation * ta; unsigned int i, j; - struct rc_instruction *last_inst_src_reladdr = NULL; memset(hwtemps, 0, sizeof(hwtemps)); @@ -693,28 +685,11 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) } } - /* Pass 2: If there is relative addressing of dst temporaries, we cannot change register indices. Give up. - * For src temporaries, save the last instruction which uses relative addressing. */ - for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { - const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); - - if (opcode->HasDstReg) - if (inst->U.I.DstReg.RelAddr) - return; - - for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && - inst->U.I.SrcReg[i].RelAddr) { - last_inst_src_reladdr = inst; - } - } - } - ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool, sizeof(struct temporary_allocation) * num_orig_temps); memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps); - /* Pass 3: Determine original temporary lifetimes */ + /* Pass 2: Determine original temporary lifetimes */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); /* Instructions inside of loops need to use the ENDLOOP @@ -744,41 +719,22 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) for (i = 0; i < opcode->NumSrcRegs; ++i) { if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { - struct rc_instruction *last_read; - - /* From "last_inst_src_reladdr", "end_loop", and "inst", - * select the instruction with the highest instruction index (IP). - * Note that "end_loop", if available, has always a higher index than "inst". */ - if (last_inst_src_reladdr) { - if (end_loop) { - last_read = last_inst_src_reladdr->IP > end_loop->IP ? - last_inst_src_reladdr : end_loop; - } else { - last_read = last_inst_src_reladdr->IP > inst->IP ? - last_inst_src_reladdr : inst; - } - } else { - last_read = end_loop ? end_loop : inst; - } - - ta[inst->U.I.SrcReg[i].Index].LastRead = last_read; + ta[inst->U.I.SrcReg[i].Index].LastRead = end_loop ? end_loop : inst; } } } - /* Pass 4: Register allocation */ + /* Pass 3: Register allocation */ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) { const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (!last_inst_src_reladdr || last_inst_src_reladdr->IP < inst->IP) { - for (i = 0; i < opcode->NumSrcRegs; ++i) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { - unsigned int orig = inst->U.I.SrcReg[i].Index; - inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; + for (i = 0; i < opcode->NumSrcRegs; ++i) { + if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { + unsigned int orig = inst->U.I.SrcReg[i].Index; + inst->U.I.SrcReg[i].Index = ta[orig].HwTemp; - if (ta[orig].Allocated && inst == ta[orig].LastRead) - hwtemps[ta[orig].HwTemp] = 0; - } + if (ta[orig].Allocated && inst == ta[orig].LastRead) + hwtemps[ta[orig].HwTemp] = 0; } } @@ -792,12 +748,7 @@ static void allocate_temporary_registers(struct radeon_compiler *c, void *user) break; } ta[orig].Allocated = 1; - if (last_inst_src_reladdr && - last_inst_src_reladdr->IP > inst->IP) { - ta[orig].HwTemp = orig; - } else { - ta[orig].HwTemp = j; - } + ta[orig].HwTemp = j; hwtemps[ta[orig].HwTemp] = 1; } diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index ef81be48f7..140eeed3de 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -77,9 +77,6 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE)) return 0; - if (reg.Negate) - reg.Negate ^= RC_MASK_XYZW; - for(i = 0; i < 4; ++i) { unsigned int swz = GET_SWZ(reg.Swizzle, i); if (swz == RC_SWIZZLE_UNUSED) { @@ -103,9 +100,6 @@ static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) return 0; } else { /* ALU instructions support almost everything */ - if (reg.Abs) - return 1; - relevant = 0; for(i = 0; i < 3; ++i) { unsigned int swz = GET_SWZ(reg.Swizzle, i); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 5da82d90f6..301b444669 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -170,7 +170,7 @@ static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg) static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i) { unsigned int t = inst->Alpha.Arg[i].Source; - t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2; + t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2; t |= inst->Alpha.Arg[i].Negate << 5; t |= inst->Alpha.Arg[i].Abs << 6; return t; @@ -372,7 +372,7 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst | (inst->DstReg.WriteMask << 11) | R500_INST_TEX_SEM_WAIT; code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit) - | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED; + | R500_TEX_SEM_ACQUIRE; if (inst->TexSrcTarget == RC_TEXTURE_RECT) code->inst[ip].inst1 |= R500_TEX_UNSCALED; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index b69e81698a..35360aa70f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -31,6 +31,9 @@ #define R300_PFS_NUM_TEMP_REGS 32 #define R300_PFS_NUM_CONST_REGS 32 +#define R400_PFS_MAX_ALU_INST 512 +#define R400_PFS_MAX_TEX_INST 512 + #define R500_PFS_MAX_INST 512 #define R500_PFS_NUM_TEMP_REGS 128 #define R500_PFS_NUM_CONST_REGS 256 @@ -170,6 +173,8 @@ struct r300_fragment_program_external_state { * RC_STATE_R300_TEXSCALE_FACTOR. */ unsigned clamp_and_scale_before_fetch : 1; } unit[16]; + + unsigned frag_clamp:1; }; @@ -187,24 +192,29 @@ struct r300_fragment_program_node { */ struct r300_fragment_program_code { struct { - int length; /**< total # of texture instructions used */ - uint32_t inst[R300_PFS_MAX_TEX_INST]; + unsigned int length; /**< total # of texture instructions used */ + uint32_t inst[R400_PFS_MAX_TEX_INST]; } tex; struct { - int length; /**< total # of ALU instructions used */ + unsigned int length; /**< total # of ALU instructions used */ struct { uint32_t rgb_inst; uint32_t rgb_addr; uint32_t alpha_inst; uint32_t alpha_addr; - } inst[R300_PFS_MAX_ALU_INST]; + uint32_t r400_ext_addr; + } inst[R400_PFS_MAX_ALU_INST]; } alu; uint32_t config; /* US_CONFIG */ uint32_t pixsize; /* US_PIXSIZE */ uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t r400_code_offset_ext; /* US_CODE_EXT */ uint32_t code_addr[4]; /* US_CODE_ADDR */ + /*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries + * for r400 cards */ + unsigned int r390_mode:1; }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c index 65548604bc..79cd7996f7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c @@ -373,9 +373,11 @@ void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) const struct rc_opcode_info * info; rc_for_all_reads_mask(tmp, reg_count_callback, &max_reg); if (tmp->Type == RC_INSTRUCTION_NORMAL) { + info = rc_get_opcode_info(tmp->U.I.Opcode); + if (info->Opcode == RC_OPCODE_BEGIN_TEX) + continue; if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) s->num_presub_ops++; - info = rc_get_opcode_info(tmp->U.I.Opcode); } else { if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) s->num_presub_ops++; @@ -402,11 +404,11 @@ static void print_stats(struct radeon_compiler * c) { struct rc_program_stats s; - rc_get_stats(c, &s); - - if (s.num_insts < 4) + if (c->initial_num_insts <= 5) return; + rc_get_stats(c, &s); + switch (c->type) { case RC_VERTEX_PROGRAM: fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" @@ -461,6 +463,11 @@ void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pa /* Executes a list of compiler passes given in the parameter 'list'. */ void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) { + struct rc_program_stats s; + + rc_get_stats(c, &s); + c->initial_num_insts = s.num_insts; + if (c->Debug & RC_DBG_LOG) { fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); rc_print_program(&c->Program); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index e663339589..2d8e415f35 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -50,6 +50,7 @@ struct radeon_compiler { char * ErrorMsg; /* Hardware specification. */ + unsigned is_r400:1; unsigned is_r500:1; unsigned has_half_swizzles:1; unsigned has_presub:1; @@ -57,6 +58,7 @@ struct radeon_compiler { unsigned max_temp_regs; unsigned max_constants; int max_alu_insts; + unsigned max_tex_insts; /* Whether to remove unused constants and empty holes in constant space. */ unsigned remove_unused_constants:1; @@ -70,6 +72,8 @@ struct radeon_compiler { /*@}*/ struct emulate_loop_state loop_state; + + unsigned initial_num_insts; /* Number of instructions at start. */ }; void rc_init(struct radeon_compiler * c); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c index bf393a9fb1..15ec4418cb 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.c @@ -55,6 +55,24 @@ rc_swizzle get_swz(unsigned int swz, rc_swizzle idx) return GET_SWZ(swz, idx); } +/** + * The purpose of this function is to standardize the number channels used by + * swizzles. All swizzles regardless of what instruction they are a part of + * should have 4 channels initialized with values. + * @param channels The number of channels in initial_value that have a + * meaningful value. + * @return An initialized swizzle that has all of the unused channels set to + * RC_SWIZZLE_UNUSED. + */ +unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels) +{ + unsigned int i; + for (i = channels; i < 4; i++) { + SET_SWZ(initial_value, i, RC_SWIZZLE_UNUSED); + } + return initial_value; +} + unsigned int combine_swizzles4(unsigned int src, rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w) { @@ -147,13 +165,17 @@ unsigned int rc_src_reads_dst_mask( return dst_mask & rc_swizzle_to_writemask(src_swz); } -unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels) +/** + * @return A bit mask specifying whether this swizzle will select from an RGB + * source, an Alpha source, or both. + */ +unsigned int rc_source_type_swz(unsigned int swizzle) { unsigned int chan; unsigned int swz = RC_SWIZZLE_UNUSED; unsigned int ret = RC_SOURCE_NONE; - for(chan = 0; chan < channels; chan++) { + for(chan = 0; chan < 4; chan++) { swz = GET_SWZ(swizzle, chan); if (swz == RC_SWIZZLE_W) { ret |= RC_SOURCE_ALPHA; @@ -202,7 +224,7 @@ static void can_use_presub_read_cb( if (d->RemoveSrcs[i].File == file && d->RemoveSrcs[i].Index == index) { src_type &= - ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle, 4); + ~rc_source_type_swz(d->RemoveSrcs[i].Swizzle); } } @@ -223,7 +245,6 @@ unsigned int rc_inst_can_use_presub( { struct can_use_presub_data d; unsigned int num_presub_srcs; - unsigned int presub_src_type = rc_source_type_mask(presub_writemask); const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); @@ -253,13 +274,7 @@ unsigned int rc_inst_can_use_presub( num_presub_srcs = rc_presubtract_src_reg_count(presub_op); - if ((presub_src_type & RC_SOURCE_RGB) - && d.RGBCount + num_presub_srcs > 3) { - return 0; - } - - if ((presub_src_type & RC_SOURCE_ALPHA) - && d.AlphaCount + num_presub_srcs > 3) { + if (d.RGBCount + num_presub_srcs > 3 || d.AlphaCount + num_presub_srcs > 3) { return 0; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h index 461ab9ffb1..dd0f6c6615 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler_util.h @@ -10,6 +10,8 @@ unsigned int rc_swizzle_to_writemask(unsigned int swz); rc_swizzle get_swz(unsigned int swz, rc_swizzle idx); +unsigned int rc_init_swizzle(unsigned int initial_value, unsigned int channels); + unsigned int combine_swizzles4(unsigned int src, rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w); @@ -32,7 +34,7 @@ unsigned int rc_src_reads_dst_mask( unsigned int dst_idx, unsigned int dst_mask); -unsigned int rc_source_type_swz(unsigned int swizzle, unsigned int channels); +unsigned int rc_source_type_swz(unsigned int swizzle); unsigned int rc_source_type_mask(unsigned int mask); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c index d0a64d936e..c080d5aecc 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c @@ -140,14 +140,8 @@ static void pair_sub_for_all_args( for(i = 0; i < info->NumSrcRegs; i++) { unsigned int src_type; - unsigned int channels = 0; - if (&fullinst->U.P.RGB == sub) - channels = 3; - else if (&fullinst->U.P.Alpha == sub) - channels = 1; - - assert(channels > 0); - src_type = rc_source_type_swz(sub->Arg[i].Swizzle, channels); + + src_type = rc_source_type_swz(sub->Arg[i].Swizzle); if (src_type == RC_SOURCE_NONE) continue; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index 87906f37b1..678e147588 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -160,12 +160,8 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index); if (pused) { usedmask = *pused & inst->U.I.DstReg.WriteMask; - if (!inst->U.I.DstReg.RelAddr) - *pused &= ~usedmask; + *pused &= ~usedmask; } - - if (inst->U.I.DstReg.RelAddr) - mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X); } insts->WriteMask |= usedmask; @@ -219,22 +215,9 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) { struct deadcode_state s; unsigned int nr_instructions; - unsigned has_temp_reladdr_src = 0; rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user; unsigned int ip; - /* Give up if there is relative addressing of destination operands. */ - for(struct rc_instruction * inst = c->Program.Instructions.Next; - inst != &c->Program.Instructions; - inst = inst->Next) { - const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode); - if (opcode->HasDstReg && - inst->U.I.DstReg.WriteMask && - inst->U.I.DstReg.RelAddr) { - return; - } - } - memset(&s, 0, sizeof(s)); s.C = c; @@ -321,32 +304,6 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, void *user) rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name); } } - - if (!has_temp_reladdr_src) { - for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { - if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && - inst->U.I.SrcReg[i].RelAddr) { - /* If there is a register read from a temporary file with relative addressing, - * mark all preceding written registers as used. */ - for (struct rc_instruction *ptr = inst->Prev; - ptr != &c->Program.Instructions; - ptr = ptr->Prev) { - opcode = rc_get_opcode_info(ptr->U.I.Opcode); - if (opcode->HasDstReg && - ptr->U.I.DstReg.File == RC_FILE_TEMPORARY && - ptr->U.I.DstReg.WriteMask) { - mark_used(&s, - ptr->U.I.DstReg.File, - ptr->U.I.DstReg.Index, - ptr->U.I.DstReg.WriteMask); - } - } - - has_temp_reladdr_src = 1; - break; - } - } - } } update_instruction(&s, inst); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c index a0f7bd8174..133a9f72ec 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c @@ -56,6 +56,7 @@ static void rewrite_source(struct radeon_compiler * c, mov->U.I.DstReg.Index = tempreg; mov->U.I.DstReg.WriteMask = split.Phase[phase]; mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; + mov->U.I.PreSub = inst->U.I.PreSub; phase_refmask = 0; for(unsigned int chan = 0; chan < 4; ++chan) { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index 44f4c0fbdc..c4e6a5e0a1 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -139,7 +139,6 @@ static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * i unsigned int i; if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || - inst_mov->U.I.DstReg.RelAddr || inst_mov->U.I.WriteALUResult || inst_mov->U.I.SaturateMode) return; @@ -312,7 +311,18 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction * struct rc_constant * constant; struct rc_src_register newsrc; int have_real_reference; + unsigned int chan; + + /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ + for (chan = 0; chan < 4; ++chan) + if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) + break; + if (chan == 4) { + inst->U.I.SrcReg[src].File = RC_FILE_NONE; + continue; + } + /* Convert immediates to swizzles. */ if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || inst->U.I.SrcReg[src].RelAddr || inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) @@ -326,7 +336,7 @@ static void constant_folding(struct radeon_compiler * c, struct rc_instruction * newsrc = inst->U.I.SrcReg[src]; have_real_reference = 0; - for(unsigned int chan = 0; chan < 4; ++chan) { + for (chan = 0; chan < 4; ++chan) { unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); unsigned int newswz; float imm; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c index 9beb5d6357..8e10813ff0 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c @@ -365,8 +365,8 @@ static int merge_presub_sources( for(arg = 0; arg < info->NumSrcRegs; arg++) { /*If this arg does not read from an rgb source, * do nothing. */ - if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle, - 3) & type)) { + if (!(rc_source_type_swz(dst_full->RGB.Arg[arg].Swizzle) + & type)) { continue; } @@ -423,11 +423,11 @@ static int destructive_merge_instructions( unsigned int index = 0; int source; - if (alpha->Alpha.Arg[arg].Swizzle < 3) { + if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 3) { srcrgb = 1; file = alpha->RGB.Src[oldsrc].File; index = alpha->RGB.Src[oldsrc].Index; - } else if (alpha->Alpha.Arg[arg].Swizzle < 4) { + } else if (GET_SWZ(alpha->Alpha.Arg[arg].Swizzle, 0) < 4) { srcalpha = 1; file = alpha->Alpha.Src[oldsrc].File; index = alpha->Alpha.Src[oldsrc].Index; @@ -544,18 +544,12 @@ static void rgb_to_alpha_remap ( { int new_src_index; unsigned int i; - struct rc_pair_instruction_source * old_src = - rc_pair_get_src(&inst->U.P, arg); - if (!old_src) { - return; - } for (i = 0; i < 3; i++) { if (get_swz(arg->Swizzle, i) == old_swz) { SET_SWZ(arg->Swizzle, i, RC_SWIZZLE_W); } } - memset(old_src, 0, sizeof(struct rc_pair_instruction_source)); new_src_index = rc_pair_alloc_source(&inst->U.P, 0, 1, old_file, new_index); /* This conversion is not possible, we must have made a mistake in @@ -728,7 +722,8 @@ static int convert_rgb_to_alpha( for (j = 0; j < 3; j++) { unsigned int swz = get_swz(pair_inst->Alpha.Arg[i].Swizzle, j); if (swz != RC_SWIZZLE_UNUSED) { - pair_inst->Alpha.Arg[i].Swizzle = swz; + pair_inst->Alpha.Arg[i].Swizzle = + rc_init_swizzle(swz, 1); break; } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c index fc05366f50..9e03eb1aca 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c @@ -28,6 +28,7 @@ #include "radeon_program_pair.h" #include "radeon_compiler.h" +#include "radeon_compiler_util.h" /** @@ -213,16 +214,21 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, if (needrgb && !istranscendent) { unsigned int srcrgb = 0; unsigned int srcalpha = 0; + unsigned int srcmask = 0; int j; /* We don't care about the alpha channel here. We only * want the part of the swizzle that writes to rgb, * since we are creating an rgb instruction. */ for(j = 0; j < 3; ++j) { unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); - if (swz < 3) + + if (swz < RC_SWIZZLE_W) srcrgb = 1; - else if (swz < 4) + else if (swz == RC_SWIZZLE_W) srcalpha = 1; + + if (swz < RC_SWIZZLE_UNUSED) + srcmask |= 1 << j; } source = rc_pair_alloc_source(pair, srcrgb, srcalpha, inst->SrcReg[i].File, inst->SrcReg[i].Index); @@ -232,9 +238,10 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, return; } pair->RGB.Arg[i].Source = source; - pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff; + pair->RGB.Arg[i].Swizzle = + rc_init_swizzle(inst->SrcReg[i].Swizzle, 3); pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; - pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); + pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); } if (needalpha) { unsigned int srcrgb = 0; @@ -252,7 +259,7 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c, return; } pair->Alpha.Arg[i].Source = source; - pair->Alpha.Arg[i].Swizzle = swz; + pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1); pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W); } @@ -302,12 +309,6 @@ static void check_opcode_support(struct r300_fragment_program_compiler *c, const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); if (opcode->HasDstReg) { - if (inst->DstReg.RelAddr) { - rc_error(&c->Base, "Fragment program does not support relative addressing " - "of destination operands.\n"); - return; - } - if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); return; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h index df6c94b35f..a07f6b63c6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h @@ -56,11 +56,7 @@ struct rc_src_register { struct rc_dst_register { unsigned int File:3; - - /** Negative values may be used for relative addressing. */ - signed int Index:(RC_REGISTER_INDEX_BITS+1); - unsigned int RelAddr:1; - + unsigned int Index:RC_REGISTER_INDEX_BITS; unsigned int WriteMask:4; }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 58977a40c7..9fc991166a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -91,7 +91,6 @@ static struct rc_dst_register dstregtmpmask(int index, int mask) dst.File = RC_FILE_TEMPORARY; dst.Index = index; dst.WriteMask = mask; - dst.RelAddr = 0; return dst; } @@ -689,11 +688,12 @@ static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, &constant_swizzle); /* MOV dst, src */ + dst.WriteMask = RC_MASK_XYZW; emit1(c, inst->Prev, RC_OPCODE_MOV, 0, dst, inst->U.I.SrcReg[0]); - /* MAX dst.z, src, 0.00...001 */ + /* MAX dst.y, src, 0.00...001 */ emit2(c, inst->Prev, RC_OPCODE_MAX, 0, dstregtmpmask(dst.Index, RC_MASK_Y), srcreg(RC_FILE_TEMPORARY, dst.Index), diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c index 5905d26e52..68874795b8 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c @@ -211,27 +211,9 @@ struct rc_pair_instruction_source * rc_pair_get_src( struct rc_pair_instruction * pair_inst, struct rc_pair_instruction_arg * arg) { - unsigned int i, type; - unsigned int channels = 0; + unsigned int type; - for(i = 0; i < 3; i++) { - if (arg == pair_inst->RGB.Arg + i) { - channels = 3; - break; - } - } - - if (channels == 0) { - for (i = 0; i < 3; i++) { - if (arg == pair_inst->Alpha.Arg + i) { - channels = 1; - break; - } - } - } - - assert(channels > 0); - type = rc_source_type_swz(arg->Swizzle, channels); + type = rc_source_type_swz(arg->Swizzle); if (type & RC_SOURCE_RGB) { return &pair_inst->RGB.Src[arg->Source]; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h index ccf7a0070c..6708b16d29 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h @@ -63,7 +63,7 @@ struct rc_pair_instruction_source { struct rc_pair_instruction_arg { unsigned int Source:2; - unsigned int Swizzle:9; + unsigned int Swizzle:12; unsigned int Abs:1; unsigned int Negate:1; }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c index ae13f6742f..390d131946 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c @@ -110,7 +110,7 @@ static void rc_print_mask(FILE * f, unsigned int mask) static void rc_print_dst_register(FILE * f, struct rc_dst_register dst) { - rc_print_register(f, dst.File, dst.Index, dst.RelAddr); + rc_print_register(f, dst.File, dst.Index, 0); if (dst.WriteMask != RC_MASK_XYZW) { fprintf(f, "."); rc_print_mask(f, dst.WriteMask); @@ -379,7 +379,7 @@ static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst else fprintf(f,"%d", inst->Alpha.Arg[arg].Source); fprintf(f,".%c%s", - rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs); + rc_swizzle_char(GET_SWZ(inst->Alpha.Arg[arg].Swizzle, 0)), abs); } fprintf(f, "\n"); } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c index f9d9f34b6a..1cf77d9cf7 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c @@ -33,14 +33,14 @@ /* Series of transformations to be done on textures. */ static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler, - int tmu) + int tmu) { struct rc_src_register reg = { 0, }; if (compiler->enable_shadow_ambient) { reg.File = RC_FILE_CONSTANT; reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants, - RC_STATE_SHADOW_AMBIENT, tmu); + RC_STATE_SHADOW_AMBIENT, tmu); reg.Swizzle = RC_SWIZZLE_WWWW; } else { reg.File = RC_FILE_NONE; @@ -149,14 +149,11 @@ int radeonTransformTEX( return 1; } else { - rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func; struct rc_instruction * inst_rcp = NULL; - struct rc_instruction * inst_mad; - struct rc_instruction * inst_cmp; + struct rc_instruction *inst_mul, *inst_add, *inst_cmp; unsigned tmp_texsample; unsigned tmp_sum; - unsigned tmp_recip_w = 0; - int pass, fail, tex; + int pass, fail; /* Save the output register. */ struct rc_dst_register output_reg = inst->U.I.DstReg; @@ -167,63 +164,68 @@ int radeonTransformTEX( inst->U.I.DstReg.Index = tmp_texsample; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; - if (inst->U.I.Opcode == RC_OPCODE_TXP) { - tmp_recip_w = rc_find_free_temporary(c); + tmp_sum = rc_find_free_temporary(c); + if (inst->U.I.Opcode == RC_OPCODE_TXP) { /* Compute 1/W. */ inst_rcp = rc_insert_new_instruction(c, inst); inst_rcp->U.I.Opcode = RC_OPCODE_RCP; inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_rcp->U.I.DstReg.Index = tmp_recip_w; + inst_rcp->U.I.DstReg.Index = tmp_sum; inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; inst_rcp->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 3)); } - /* Perspective-divide Z by W (if it's TXP) and add the texture sample (see below). */ - tmp_sum = rc_find_free_temporary(c); - inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); - inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; - inst_mad->U.I.DstReg.Index = tmp_sum; - inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; - inst_mad->U.I.SrcReg[0].Swizzle = + /* Divide Z by W (if it's TXP) and saturate. */ + inst_mul = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst); + inst_mul->U.I.Opcode = inst->U.I.Opcode == RC_OPCODE_TXP ? RC_OPCODE_MUL : RC_OPCODE_MOV; + inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_mul->U.I.DstReg.Index = tmp_sum; + inst_mul->U.I.DstReg.WriteMask = RC_MASK_W; + inst_mul->U.I.SaturateMode = RC_SATURATE_ZERO_ONE; + inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0]; + inst_mul->U.I.SrcReg[0].Swizzle = RC_MAKE_SWIZZLE_SMEAR(GET_SWZ(inst->U.I.SrcReg[0].Swizzle, 2)); if (inst->U.I.Opcode == RC_OPCODE_TXP) { - inst_mad->U.I.Opcode = RC_OPCODE_MAD; - inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[1].Index = tmp_recip_w; - inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; - tex = 2; - } else { - inst_mad->U.I.Opcode = RC_OPCODE_ADD; - tex = 1; - } - inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY; - inst_mad->U.I.SrcReg[tex].Index = tmp_texsample; - inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle; - - /* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */ - if (comparefunc == RC_COMPARE_FUNC_EQUAL) { - comparefunc = RC_COMPARE_FUNC_GEQUAL; - } else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { - comparefunc = RC_COMPARE_FUNC_LESS; + inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_mul->U.I.SrcReg[1].Index = tmp_sum; + inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; } - /* Recall that SrcReg[0] is r, SrcReg[tex] is tex and: + /* Add the depth texture value. */ + inst_add = rc_insert_new_instruction(c, inst_mul); + inst_add->U.I.Opcode = RC_OPCODE_ADD; + inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; + inst_add->U.I.DstReg.Index = tmp_sum; + inst_add->U.I.DstReg.WriteMask = RC_MASK_W; + inst_add->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[0].Index = tmp_sum; + inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; + inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; + inst_add->U.I.SrcReg[1].Index = tmp_texsample; + inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; + + /* Note that SrcReg[0] is r, SrcReg[1] is tex and: * LESS: r < tex <=> -tex+r < 0 * GEQUAL: r >= tex <=> not (-tex+r < 0) * GREATER: r > tex <=> tex-r < 0 * LEQUAL: r <= tex <=> not ( tex-r < 0) - * - * This negates either r or tex: */ - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL) - inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW; + * EQUAL: GEQUAL + * NOTEQUAL:LESS + */ + + /* This negates either r or tex: */ + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL || + comparefunc == RC_COMPARE_FUNC_EQUAL || comparefunc == RC_COMPARE_FUNC_NOTEQUAL) + inst_add->U.I.SrcReg[1].Negate = inst_add->U.I.SrcReg[1].Negate ^ RC_MASK_XYZW; else - inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; + inst_add->U.I.SrcReg[0].Negate = inst_add->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW; /* This negates the whole expresion: */ - if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) { + if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER || + comparefunc == RC_COMPARE_FUNC_NOTEQUAL) { pass = 1; fail = 2; } else { @@ -231,16 +233,19 @@ int radeonTransformTEX( fail = 1; } - inst_cmp = rc_insert_new_instruction(c, inst_mad); + inst_cmp = rc_insert_new_instruction(c, inst_add); inst_cmp->U.I.Opcode = RC_OPCODE_CMP; inst_cmp->U.I.DstReg = output_reg; inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_cmp->U.I.SrcReg[0].Index = tmp_sum; + inst_cmp->U.I.SrcReg[0].Swizzle = + combine_swizzles(RC_SWIZZLE_WWWW, + compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle); inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE; inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111; inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit); - assert(tmp_texsample != tmp_sum && tmp_sum != tmp_recip_w); + assert(tmp_texsample != tmp_sum); } } @@ -420,17 +425,21 @@ int radeonTransformTEX( scale_texcoords(compiler, inst, RC_STATE_R300_TEXSCALE_FACTOR); } - /* Cannot write texture to output registers (all chips) or with masks (non-r500) */ + /* Cannot write texture to output registers or with saturate (all chips), + * or with masks (non-r500). */ if (inst->U.I.Opcode != RC_OPCODE_KIL && (inst->U.I.DstReg.File != RC_FILE_TEMPORARY || + inst->U.I.SaturateMode || (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) { struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst); inst_mov->U.I.Opcode = RC_OPCODE_MOV; + inst_mov->U.I.SaturateMode = inst->U.I.SaturateMode; inst_mov->U.I.DstReg = inst->U.I.DstReg; inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c); + inst->U.I.SaturateMode = 0; inst->U.I.DstReg.File = RC_FILE_TEMPORARY; inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c index 88165f7895..5bd19c0b9c 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_rename_regs.c @@ -51,6 +51,14 @@ void rc_rename_regs(struct radeon_compiler *c, void *user) struct rc_reader_data reader_data; unsigned char * used; + /* XXX Remove this once the register allocation works with flow control. */ + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; + inst = inst->Next) { + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) + return; + } + used_length = 2 * rc_recompute_ips(c); used = memory_pool_malloc(&c->Pool, sizeof(unsigned char) * used_length); memset(used, 0, sizeof(unsigned char) * used_length); diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c index 9fd8e8fde5..14e60866d9 100644 --- a/src/mesa/drivers/dri/r300/r300_blit.c +++ b/src/mesa/drivers/dri/r300/r300_blit.c @@ -63,7 +63,6 @@ static void create_vertex_program(struct r300_context *r300) inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.DstReg.File = RC_FILE_OUTPUT; inst->U.I.DstReg.Index = VERT_RESULT_HPOS; - inst->U.I.DstReg.RelAddr = 0; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; inst->U.I.SrcReg[0].Abs = 0; inst->U.I.SrcReg[0].File = RC_FILE_INPUT; @@ -76,7 +75,6 @@ static void create_vertex_program(struct r300_context *r300) inst->U.I.Opcode = RC_OPCODE_MOV; inst->U.I.DstReg.File = RC_FILE_OUTPUT; inst->U.I.DstReg.Index = VERT_RESULT_TEX0; - inst->U.I.DstReg.RelAddr = 0; inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; inst->U.I.SrcReg[0].Abs = 0; inst->U.I.SrcReg[0].File = RC_FILE_INPUT; @@ -131,6 +129,7 @@ static void create_fragment_program(struct r300_context *r300) compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32; compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32; compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64; + compiler.Base.max_tex_insts = compiler.Base.is_r500 ? 512 : 32; compiler.code = &r300->blit.fp_code; compiler.AllocateHwInputs = fp_allocate_hw_inputs; diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c index c288834d24..0d8bd4fc70 100644 --- a/src/mesa/drivers/dri/r300/r300_context.c +++ b/src/mesa/drivers/dri/r300/r300_context.c @@ -43,6 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/extensions.h" #include "main/bufferobj.h" #include "main/texobj.h" +#include "main/mfeatures.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index 4e457b51eb..a0a26f1b38 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -226,6 +226,7 @@ static void translate_fragment_program(struct gl_context *ctx, struct r300_fragm compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32; compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32; compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64; + compiler.Base.max_tex_insts = compiler.Base.is_r500 ? 512 : 32; compiler.OutputDepth = FRAG_RESULT_DEPTH; memset(compiler.OutputColor, 0, 4 * sizeof(unsigned)); compiler.OutputColor[0] = FRAG_RESULT_COLOR; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index f7705b0f6f..2b9d85fae8 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1658,6 +1658,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13) # define R300_PFS_CNTL_TEX_END_SHIFT 18 # define R300_PFS_CNTL_TEX_END_MASK (31 << 18) +# define R400_PFS_CNTL_TEX_OFFSET_MSB_SHIFT 24 +# define R400_PFS_CNTL_TEX_OFFSET_MSB_MASK (0xf << 24) +# define R400_PFS_CNTL_TEX_END_MSB_SHIFT 28 +# define R400_PFS_CNTL_TEX_END_MSB_MASK (0xf << 28) /* gap */ @@ -1682,6 +1686,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TEX_SIZE_MASK (31 << 17) # define R300_RGBA_OUT (1 << 22) # define R300_W_OUT (1 << 23) +# define R400_TEX_START_MSB_SHIFT 24 +# define R400_TEX_START_MSG_MASK (0xf << 24) +# define R400_TEX_SIZE_MSB_SHIFT 28 +# define R400_TEX_SIZE_MSG_MASK (0xf << 28) /* TEX * As far as I can tell, texture instructions cannot write into output @@ -1702,6 +1710,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TEX_OP_TXP 3 # define R300_TEX_OP_TXB 4 # define R300_TEX_INST_MASK (7 << 15) +# define R400_SRC_ADDR_EXT_BIT (1 << 19) +# define R400_DST_ADDR_EXT_BIT (1 << 20) /* Output format from the unfied shader */ #define R300_US_OUT_FMT 0x46A4 @@ -1979,6 +1989,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_OUTA_CLAMP (1 << 30) /* END: Fragment program instruction set */ +/* R4xx extended fragment shader registers. */ +#define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */ +# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x)) +# define R400_ADDRD_EXT_RGB_MSB_BIT 0x08 +# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4)) +# define R400_ADDRD_EXT_A_MSB_BIT 0x80 + +#define R400_US_CODE_BANK 0x46b8 +# define R400_BANK_SHIFT 0 +# define R400_BANK_MASK 0xf +# define R400_R390_MODE_ENABLE (1 << 4) +#define R400_US_CODE_EXT 0x46bc +# define R400_ALU_OFFSET_MSB_SHIFT 0 +# define R400_ALU_OFFSET_MSB_MASK (0x7 << 0) +# define R400_ALU_SIZE_MSB_SHIFT 3 +# define R400_ALU_SIZE_MSB_MASK (0x7 << 3) +# define R400_ALU_START0_MSB_SHIFT 6 +# define R400_ALU_START0_MSB_MASK (0x7 << 6) +# define R400_ALU_SIZE0_MSB_SHIFT 9 +# define R400_ALU_SIZE0_MSB_MASK (0x7 << 9) +# define R400_ALU_START1_MSB_SHIFT 12 +# define R400_ALU_START1_MSB_MASK (0x7 << 12) +# define R400_ALU_SIZE1_MSB_SHIFT 15 +# define R400_ALU_SIZE1_MSB_MASK (0x7 << 15) +# define R400_ALU_START2_MSB_SHIFT 18 +# define R400_ALU_START2_MSB_MASK (0x7 << 18) +# define R400_ALU_SIZE2_MSB_SHIFT 21 +# define R400_ALU_SIZE2_MSB_MASK (0x7 << 21) +# define R400_ALU_START3_MSB_SHIFT 24 +# define R400_ALU_START3_MSB_MASK (0x7 << 24) +# define R400_ALU_SIZE3_MSB_SHIFT 27 +# define R400_ALU_SIZE3_MSB_MASK (0x7 << 27) +/* END: R4xx extended fragment shader registers. */ + /* Fog: Fog Blending Enable */ #define R300_FG_FOG_BLEND 0x4bc0 # define R300_FG_FOG_BLEND_DISABLE (0 << 0) diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index ab8c1df5f7..51989c6b22 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -214,18 +214,18 @@ static void r300SetBlendState(struct gl_context * ctx) (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT); int eqnA = R300_COMB_FCN_ADD_CLAMP; - if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) { + if (_mesa_rgba_logicop_enabled(ctx) || !ctx->Color.BlendEnabled) { r300SetBlendCntl(r300, func, eqn, 0, func, eqn); return; } func = - (blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE) << - R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstRGB, + (blend_factor(ctx->Color.Blend[0].SrcRGB, GL_TRUE) << + R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.Blend[0].DstRGB, GL_FALSE) << R300_DST_BLEND_SHIFT); - switch (ctx->Color.BlendEquationRGB) { + switch (ctx->Color.Blend[0].EquationRGB) { case GL_FUNC_ADD: eqn = R300_COMB_FCN_ADD_CLAMP; break; @@ -253,17 +253,17 @@ static void r300SetBlendState(struct gl_context * ctx) default: fprintf(stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB); + __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationRGB); return; } funcA = - (blend_factor(ctx->Color.BlendSrcA, GL_TRUE) << - R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstA, + (blend_factor(ctx->Color.Blend[0].SrcA, GL_TRUE) << + R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.Blend[0].DstA, GL_FALSE) << R300_DST_BLEND_SHIFT); - switch (ctx->Color.BlendEquationA) { + switch (ctx->Color.Blend[0].EquationA) { case GL_FUNC_ADD: eqnA = R300_COMB_FCN_ADD_CLAMP; break; @@ -291,7 +291,7 @@ static void r300SetBlendState(struct gl_context * ctx) default: fprintf(stderr, "[%s:%u] Invalid A blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.BlendEquationA); + __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationA); return; } @@ -335,7 +335,7 @@ static void r300SetLogicOpState(struct gl_context *ctx) { r300ContextPtr r300 = R300_CONTEXT(ctx); R300_STATECHANGE(r300, rop); - if (RGBA_LOGICOP_ENABLED(ctx)) { + if (_mesa_rgba_logicop_enabled(ctx)) { r300->hw.rop.cmd[1] = R300_RB3D_ROPCNTL_ROP_ENABLE | translate_logicop(ctx->Color.LogicOp); } else { @@ -349,7 +349,7 @@ static void r300SetLogicOpState(struct gl_context *ctx) */ static void r300LogicOpcode(struct gl_context *ctx, GLenum logicop) { - if (RGBA_LOGICOP_ENABLED(ctx)) + if (_mesa_rgba_logicop_enabled(ctx)) r300SetLogicOpState(ctx); } diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c index de66293999..f930b4d06b 100644 --- a/src/mesa/drivers/dri/r300/r300_tex.c +++ b/src/mesa/drivers/dri/r300/r300_tex.c @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/enums.h" #include "main/image.h" +#include "main/mfeatures.h" #include "main/mipmap.h" #include "main/simple_list.h" #include "main/texstore.h" diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c index 471a3723cb..232603ece5 100644 --- a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c +++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c @@ -128,7 +128,6 @@ static void translate_dstreg(struct rc_dst_register * dest, struct prog_dst_regi { dest->File = translate_register_file(src->File); dest->Index = src->Index; - dest->RelAddr = src->RelAddr; dest->WriteMask = src->WriteMask; } diff --git a/src/mesa/drivers/dri/r600/evergreen_blit.c b/src/mesa/drivers/dri/r600/evergreen_blit.c index fc9fa9d22c..e07da8c15b 100644 --- a/src/mesa/drivers/dri/r600/evergreen_blit.c +++ b/src/mesa/drivers/dri/r600/evergreen_blit.c @@ -1406,9 +1406,95 @@ eg_set_default_state(context_t *context) num_hs_stack_entries = 85; num_ls_stack_entries = 85; break; + case CHIP_FAMILY_PALM: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 96; + num_vs_threads = 16; + num_gs_threads = 16; + num_es_threads = 16; + num_hs_threads = 16; + num_ls_threads = 16; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_FAMILY_BARTS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 85; + num_vs_stack_entries = 85; + num_gs_stack_entries = 85; + num_es_stack_entries = 85; + num_hs_stack_entries = 85; + num_ls_stack_entries = 85; + break; + case CHIP_FAMILY_TURKS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 20; + num_gs_threads = 20; + num_es_threads = 20; + num_hs_threads = 20; + num_ls_threads = 20; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; + case CHIP_FAMILY_CAICOS: + num_ps_gprs = 93; + num_vs_gprs = 46; + num_temp_gprs = 4; + num_gs_gprs = 31; + num_es_gprs = 31; + num_hs_gprs = 23; + num_ls_gprs = 23; + num_ps_threads = 128; + num_vs_threads = 10; + num_gs_threads = 10; + num_es_threads = 10; + num_hs_threads = 10; + num_ls_threads = 10; + num_ps_stack_entries = 42; + num_vs_stack_entries = 42; + num_gs_stack_entries = 42; + num_es_stack_entries = 42; + num_hs_stack_entries = 42; + num_ls_stack_entries = 42; + break; } - if (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CEDAR) + if ((context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CEDAR) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_PALM) || + (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_CAICOS)) CLEARbit(sq_config, EG_SQ_CONFIG__VC_ENABLE_bit); else SETbit(sq_config, EG_SQ_CONFIG__VC_ENABLE_bit); diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.c b/src/mesa/drivers/dri/r600/evergreen_fragprog.c index cfb923efdd..e527c379b6 100644 --- a/src/mesa/drivers/dri/r600/evergreen_fragprog.c +++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.c @@ -624,10 +624,7 @@ GLboolean evergreenSetupFragmentProgram(struct gl_context * ctx) SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); - if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) - SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); - else - CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } unBit = 1 << VERT_RESULT_COL0; @@ -663,10 +660,7 @@ GLboolean evergreenSetupFragmentProgram(struct gl_context * ctx) SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); - if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) - SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); - else - CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } for(i=0; i<8; i++) @@ -694,10 +688,7 @@ GLboolean evergreenSetupFragmentProgram(struct gl_context * ctx) SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); - if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) - SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); - else - CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } unBit = 1 << FRAG_ATTRIB_PNTC; if(mesa_fp->Base.InputsRead & unBit) @@ -706,10 +697,7 @@ GLboolean evergreenSetupFragmentProgram(struct gl_context * ctx) SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); - if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) - SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); - else - CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit); } @@ -725,10 +713,7 @@ GLboolean evergreenSetupFragmentProgram(struct gl_context * ctx) SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); - if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) - SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); - else - CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } } diff --git a/src/mesa/drivers/dri/r600/evergreen_state.c b/src/mesa/drivers/dri/r600/evergreen_state.c index 076a608573..309c93fe08 100644 --- a/src/mesa/drivers/dri/r600/evergreen_state.c +++ b/src/mesa/drivers/dri/r600/evergreen_state.c @@ -32,6 +32,7 @@ #include "main/context.h" #include "main/dd.h" #include "main/simple_list.h" +#include "main/state.h" #include "tnl/tnl.h" #include "tnl/t_pipeline.h" @@ -342,7 +343,7 @@ static void evergreenSetBlendState(struct gl_context * ctx) //diff : CB_COLOR_CO EVERGREEN_STATECHANGE(context, cb); - if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) { + if (_mesa_rgba_logicop_enabled(ctx) || !ctx->Color.BlendEnabled) { SETfield(blend_reg, BLEND_ONE, COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask); SETfield(blend_reg, @@ -363,13 +364,13 @@ static void evergreenSetBlendState(struct gl_context * ctx) //diff : CB_COLOR_CO } SETfield(blend_reg, - evergreenblend_factor(ctx->Color.BlendSrcRGB, GL_TRUE), + evergreenblend_factor(ctx->Color.Blend[0].SrcRGB, GL_TRUE), COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask); SETfield(blend_reg, - evergreenblend_factor(ctx->Color.BlendDstRGB, GL_FALSE), + evergreenblend_factor(ctx->Color.Blend[0].DstRGB, GL_FALSE), COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask); - switch (ctx->Color.BlendEquationRGB) { + switch (ctx->Color.Blend[0].EquationRGB) { case GL_FUNC_ADD: eqn = COMB_DST_PLUS_SRC; break; @@ -401,20 +402,20 @@ static void evergreenSetBlendState(struct gl_context * ctx) //diff : CB_COLOR_CO default: fprintf(stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB); + __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationRGB); return; } SETfield(blend_reg, eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask); SETfield(blend_reg, - evergreenblend_factor(ctx->Color.BlendSrcA, GL_TRUE), + evergreenblend_factor(ctx->Color.Blend[0].SrcA, GL_TRUE), ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask); SETfield(blend_reg, - evergreenblend_factor(ctx->Color.BlendDstA, GL_FALSE), + evergreenblend_factor(ctx->Color.Blend[0].DstA, GL_FALSE), ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask); - switch (ctx->Color.BlendEquationA) { + switch (ctx->Color.Blend[0].EquationA) { case GL_FUNC_ADD: eqnA = COMB_DST_PLUS_SRC; break; @@ -445,7 +446,7 @@ static void evergreenSetBlendState(struct gl_context * ctx) //diff : CB_COLOR_CO default: fprintf(stderr, "[%s:%u] Invalid A blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.BlendEquationA); + __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationA); return; } @@ -520,7 +521,7 @@ static void evergreenSetLogicOpState(struct gl_context *ctx) //diff : CB_COLOR_C EVERGREEN_STATECHANGE(context, cb); - if (RGBA_LOGICOP_ENABLED(ctx)) + if (_mesa_rgba_logicop_enabled(ctx)) SETfield(evergreen->CB_COLOR_CONTROL.u32All, evergreen_translate_logicop(ctx->Color.LogicOp), EG_CB_COLOR_CONTROL__ROP3_shift, @@ -1148,7 +1149,7 @@ static void evergreenShadeModel(struct gl_context * ctx, GLenum mode) //same static void evergreenLogicOpcode(struct gl_context *ctx, GLenum logicop) //diff { - if (RGBA_LOGICOP_ENABLED(ctx)) + if (_mesa_rgba_logicop_enabled(ctx)) evergreenSetLogicOpState(ctx); } @@ -1469,6 +1470,30 @@ static void evergreenInitSQConfig(struct gl_context * ctx) uMaxThreads = 192; uMaxStackEntries = 256; break; + case CHIP_FAMILY_BARTS: + uSqNumCfInsts = 2; + bVC_ENABLE = GL_TRUE; + uMaxGPRs = 256; + uPSThreadCount = 128; + uMaxThreads = 248; + uMaxStackEntries = 512; + break; + case CHIP_FAMILY_TURKS: + uSqNumCfInsts = 2; + bVC_ENABLE = GL_TRUE; + uMaxGPRs = 256; + uPSThreadCount = 128; + uMaxThreads = 248; + uMaxStackEntries = 256; + break; + case CHIP_FAMILY_CAICOS: + uSqNumCfInsts = 1; + bVC_ENABLE = GL_FALSE; + uMaxGPRs = 256; + uPSThreadCount = 128; + uMaxThreads = 192; + uMaxStackEntries = 256; + break; default: uSqNumCfInsts = 2; bVC_ENABLE = GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c index aa1891eac3..00708be199 100644 --- a/src/mesa/drivers/dri/r600/r600_context.c +++ b/src/mesa/drivers/dri/r600/r600_context.c @@ -44,6 +44,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/bufferobj.h" #include "main/texobj.h" #include "main/points.h" +#include "main/mfeatures.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" @@ -259,7 +260,7 @@ static void r600InitConstValues(struct gl_context *ctx, radeonScreenPtr screen) R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw); if( (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_CEDAR) - &&(context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_PALM) ) + &&(context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_CAICOS) ) { r700->bShaderUseMemConstant = GL_TRUE; } diff --git a/src/mesa/drivers/dri/r600/r600_tex.c b/src/mesa/drivers/dri/r600/r600_tex.c index c3d68c41e5..fe4f0e4866 100644 --- a/src/mesa/drivers/dri/r600/r600_tex.c +++ b/src/mesa/drivers/dri/r600/r600_tex.c @@ -38,6 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/enums.h" #include "main/image.h" +#include "main/mfeatures.h" #include "main/mipmap.h" #include "main/simple_list.h" #include "main/texstore.h" @@ -264,9 +265,9 @@ static void r600SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloa static void r600SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4]) { t->TD_PS_SAMPLER0_BORDER_ALPHA = *((uint32_t*)&(color[3])); - t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[2])); + t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[2])); t->TD_PS_SAMPLER0_BORDER_GREEN = *((uint32_t*)&(color[1])); - t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[0])); + t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[0])); SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_REGISTER, BORDER_COLOR_TYPE_shift, BORDER_COLOR_TYPE_mask); } diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 1fa559cec1..024853c1be 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -481,6 +481,8 @@ unsigned int EG_GetNumOperands(GLuint opcode, GLuint nIsOp3) case EG_OP2_INST_FLT_TO_INT: case EG_OP2_INST_SIN: case EG_OP2_INST_COS: + case EG_OP2_INST_FLT_TO_INT_FLOOR: + case EG_OP2_INST_MOVA_INT: return 1; default: radeon_error( @@ -1134,7 +1136,7 @@ GLboolean EG_assemble_vfetch_instruction(r700_AssemblerBase* pAsm, EG_VTX_WORD1__DST_SEL_W_shift, EG_VTX_WORD1__DST_SEL_W_mask); - SETfield(vfetch_instruction_ptr->m_Word1.val, 0, /* use format here, in r6/r7, format used set in const, need to use same */ + SETfield(vfetch_instruction_ptr->m_Word1.val, 1, EG_VTX_WORD1__UCF_shift, EG_VTX_WORD1__UCF_bit); SETfield(vfetch_instruction_ptr->m_Word1.val, data_format, @@ -3297,23 +3299,76 @@ GLboolean assemble_ARL(r700_AssemblerBase *pAsm) return GL_FALSE; } - pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR; - setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); - pAsm->D.dst.rtype = DST_REG_TEMPORARY; - pAsm->D.dst.reg = 0; - pAsm->D.dst.writex = 0; - pAsm->D.dst.writey = 0; - pAsm->D.dst.writez = 0; - pAsm->D.dst.writew = 0; - - if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + if(8 == pAsm->unAsic) { - return GL_FALSE; - } + /* Evergreen */ - if( GL_FALSE == next_ins(pAsm) ) + /* Float to Signed Integer Using FLOOR */ + pAsm->D.dst.opcode = EG_OP2_INST_FLT_TO_INT_FLOOR; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + /* Copy Signed Integer To Integer in AR and GPR */ + pAsm->D.dst.opcode = EG_OP2_INST_MOVA_INT; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + } + else { - return GL_FALSE; + /* r6xx/r7xx */ + + /* Truncate floating-point to the nearest integer + in the range [-256, +255], and copy to AR and + to a GPR. + */ + pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = 0; + pAsm->D.dst.writex = 0; + pAsm->D.dst.writey = 0; + pAsm->D.dst.writez = 0; + pAsm->D.dst.writew = 0; + + if( GL_FALSE == assemble_src(pAsm, 0, -1) ) + { + return GL_FALSE; + } + + if( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } } return GL_TRUE; diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c index 2a6a39dfba..40494cd6af 100644 --- a/src/mesa/drivers/dri/r600/r700_fragprog.c +++ b/src/mesa/drivers/dri/r600/r700_fragprog.c @@ -47,13 +47,13 @@ void insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog) { static const gl_state_index winstate[STATE_LENGTH] - = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0}; + = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0}; struct prog_instruction *newInst, *inst; GLint win_size; /* state reference */ GLuint wpos_temp; /* temp register */ int i, j; - /* PARAM win_size = STATE_FB_SIZE */ + /* PARAM win_size = STATE_FB_WPOS_Y_TRANSFORM */ win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate); wpos_temp = fprog->Base.NumTemporaries++; @@ -74,9 +74,8 @@ void insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog) _mesa_insert_instructions(&(fprog->Base), 0, 1); newInst = fprog->Base.Instructions; - /* invert wpos.y - * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */ - newInst[0].Opcode = OPCODE_ADD; + /* possibly invert wpos.y depending on STATE_FB_WPOS_Y_TRANSFORM var */ + newInst[0].Opcode = OPCODE_MAD; newInst[0].DstReg.File = PROGRAM_TEMPORARY; newInst[0].DstReg.Index = wpos_temp; newInst[0].DstReg.WriteMask = WRITEMASK_XYZW; @@ -84,11 +83,14 @@ void insert_wpos_code(struct gl_context *ctx, struct gl_fragment_program *fprog) newInst[0].SrcReg[0].File = PROGRAM_INPUT; newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS; newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW; - newInst[0].SrcReg[0].Negate = NEGATE_Y; newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR; newInst[0].SrcReg[1].Index = win_size; - newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO); + newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE); + + newInst[0].SrcReg[2].File = PROGRAM_STATE_VAR; + newInst[0].SrcReg[2].Index = win_size; + newInst[0].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO); } @@ -509,6 +511,7 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx) unsigned int ui, i; unsigned int unNumOfReg; unsigned int unBit; + unsigned int num_sq_ps_gprs; GLuint exportCount; GLboolean point_sprite = GL_FALSE; @@ -619,6 +622,15 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx) SETfield(r700->ps.SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask); + num_sq_ps_gprs = ((r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All & NUM_PS_GPRS_mask) >> NUM_PS_GPRS_shift); + + if(ui > num_sq_ps_gprs) + { + /* care! thich changes sq - needs idle state */ + R600_STATECHANGE(context, sq); + SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, ui, NUM_PS_GPRS_shift, NUM_PS_GPRS_mask); + } + CLEARbit(r700->ps.SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit); if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */ @@ -645,10 +657,7 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx) SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); - if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) - SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); - else - CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } unBit = 1 << VERT_RESULT_COL0; @@ -684,10 +693,7 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx) SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); - if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) - SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); - else - CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } for(i=0; i<8; i++) @@ -716,10 +722,7 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx) SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); - if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) - SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); - else - CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } unBit = 1 << FRAG_ATTRIB_PNTC; if(mesa_fp->Base.InputsRead & unBit) @@ -747,10 +750,7 @@ GLboolean r700SetupFragmentProgram(struct gl_context * ctx) SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit); SETfield(r700->SPI_PS_INPUT_CNTL[ui].u32All, ui, SEMANTIC_shift, SEMANTIC_mask); - if (r700->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit) - SETbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); - else - CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); + CLEARbit(r700->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit); } } diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c index bd04a633b4..4d285b3648 100644 --- a/src/mesa/drivers/dri/r600/r700_state.c +++ b/src/mesa/drivers/dri/r600/r700_state.c @@ -32,6 +32,7 @@ #include "main/context.h" #include "main/dd.h" #include "main/simple_list.h" +#include "main/state.h" #include "tnl/tnl.h" #include "tnl/t_pipeline.h" @@ -453,7 +454,7 @@ static void r700SetBlendState(struct gl_context * ctx) R600_STATECHANGE(context, blnd); - if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) { + if (_mesa_rgba_logicop_enabled(ctx) || !ctx->Color.BlendEnabled) { SETfield(blend_reg, BLEND_ONE, COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask); SETfield(blend_reg, @@ -474,13 +475,13 @@ static void r700SetBlendState(struct gl_context * ctx) } SETfield(blend_reg, - blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE), + blend_factor(ctx->Color.Blend[0].SrcRGB, GL_TRUE), COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask); SETfield(blend_reg, - blend_factor(ctx->Color.BlendDstRGB, GL_FALSE), + blend_factor(ctx->Color.Blend[0].DstRGB, GL_FALSE), COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask); - switch (ctx->Color.BlendEquationRGB) { + switch (ctx->Color.Blend[0].EquationRGB) { case GL_FUNC_ADD: eqn = COMB_DST_PLUS_SRC; break; @@ -512,20 +513,20 @@ static void r700SetBlendState(struct gl_context * ctx) default: fprintf(stderr, "[%s:%u] Invalid RGB blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB); + __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationRGB); return; } SETfield(blend_reg, eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask); SETfield(blend_reg, - blend_factor(ctx->Color.BlendSrcA, GL_TRUE), + blend_factor(ctx->Color.Blend[0].SrcA, GL_TRUE), ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask); SETfield(blend_reg, - blend_factor(ctx->Color.BlendDstA, GL_FALSE), + blend_factor(ctx->Color.Blend[0].DstA, GL_FALSE), ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask); - switch (ctx->Color.BlendEquationA) { + switch (ctx->Color.Blend[0].EquationA) { case GL_FUNC_ADD: eqnA = COMB_DST_PLUS_SRC; break; @@ -556,7 +557,7 @@ static void r700SetBlendState(struct gl_context * ctx) default: fprintf(stderr, "[%s:%u] Invalid A blend equation (0x%04x).\n", - __FUNCTION__, __LINE__, ctx->Color.BlendEquationA); + __FUNCTION__, __LINE__, ctx->Color.Blend[0].EquationA); return; } @@ -644,7 +645,7 @@ static void r700SetLogicOpState(struct gl_context *ctx) R600_STATECHANGE(context, blnd); - if (RGBA_LOGICOP_ENABLED(ctx)) + if (_mesa_rgba_logicop_enabled(ctx)) SETfield(r700->CB_COLOR_CONTROL.u32All, translate_logicop(ctx->Color.LogicOp), ROP3_shift, ROP3_mask); else @@ -657,7 +658,7 @@ static void r700SetLogicOpState(struct gl_context *ctx) */ static void r700LogicOpcode(struct gl_context *ctx, GLenum logicop) { - if (RGBA_LOGICOP_ENABLED(ctx)) + if (_mesa_rgba_logicop_enabled(ctx)) r700SetLogicOpState(ctx); } diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 7ba49d8f98..7d4be9180a 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -605,6 +605,7 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx) struct gl_program_parameter_list *paramList; unsigned int unNumParamData; unsigned int ui; + unsigned int num_sq_vs_gprs; if(GL_FALSE == vp->loaded) { @@ -656,6 +657,16 @@ GLboolean r700SetupVertexProgram(struct gl_context * ctx) SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1, NUM_GPRS_shift, NUM_GPRS_mask); + num_sq_vs_gprs = ((r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All & NUM_VS_GPRS_mask) >> NUM_VS_GPRS_shift); + + if((vp->r700Shader.nRegs + 1) > num_sq_vs_gprs) + { + /* care! thich changes sq - needs idle state */ + R600_STATECHANGE(context, sq); + SETfield(r700->sq_config.SQ_GPR_RESOURCE_MGMT_1.u32All, vp->r700Shader.nRegs + 1, + NUM_VS_GPRS_shift, NUM_VS_GPRS_mask); + } + if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */ { SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize, diff --git a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h index 6c2648b6bd..60f1049602 100644 --- a/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h +++ b/src/mesa/drivers/dri/radeon/radeon_bocs_wrapper.h @@ -67,6 +67,9 @@ struct drm_radeon_info { #define DRM_RADEON_INFO 0x1 #endif +static inline void radeon_gem_get_kernel_name(struct radeon_bo *dummy, uint32_t *value) +{ +} static inline uint32_t radeon_gem_name_bo(struct radeon_bo *dummy) { diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h index 82789cec5e..bd6f1c7950 100644 --- a/src/mesa/drivers/dri/radeon/radeon_chipset.h +++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h @@ -34,7 +34,6 @@ #define PCI_CHIP_RV350_AV 0x4156 #define PCI_CHIP_RS250_4237 0x4237 #define PCI_CHIP_R200_BB 0x4242 -#define PCI_CHIP_R200_BC 0x4243 #define PCI_CHIP_RS100_4336 0x4336 #define PCI_CHIP_RS200_4337 0x4337 #define PCI_CHIP_RS250_4437 0x4437 @@ -445,6 +444,45 @@ #define PCI_CHIP_PALM_9804 0x9804 #define PCI_CHIP_PALM_9805 0x9805 +#define PCI_CHIP_BARTS_6720 0x6720 +#define PCI_CHIP_BARTS_6721 0x6721 +#define PCI_CHIP_BARTS_6722 0x6722 +#define PCI_CHIP_BARTS_6723 0x6723 +#define PCI_CHIP_BARTS_6724 0x6724 +#define PCI_CHIP_BARTS_6725 0x6725 +#define PCI_CHIP_BARTS_6726 0x6726 +#define PCI_CHIP_BARTS_6727 0x6727 +#define PCI_CHIP_BARTS_6728 0x6728 +#define PCI_CHIP_BARTS_6729 0x6729 +#define PCI_CHIP_BARTS_6738 0x6738 +#define PCI_CHIP_BARTS_6739 0x6739 + +#define PCI_CHIP_TURKS_6740 0x6740 +#define PCI_CHIP_TURKS_6741 0x6741 +#define PCI_CHIP_TURKS_6742 0x6742 +#define PCI_CHIP_TURKS_6743 0x6743 +#define PCI_CHIP_TURKS_6744 0x6744 +#define PCI_CHIP_TURKS_6745 0x6745 +#define PCI_CHIP_TURKS_6746 0x6746 +#define PCI_CHIP_TURKS_6747 0x6747 +#define PCI_CHIP_TURKS_6748 0x6748 +#define PCI_CHIP_TURKS_6749 0x6749 +#define PCI_CHIP_TURKS_6750 0x6750 +#define PCI_CHIP_TURKS_6758 0x6758 +#define PCI_CHIP_TURKS_6759 0x6759 + +#define PCI_CHIP_CAICOS_6760 0x6760 +#define PCI_CHIP_CAICOS_6761 0x6761 +#define PCI_CHIP_CAICOS_6762 0x6762 +#define PCI_CHIP_CAICOS_6763 0x6763 +#define PCI_CHIP_CAICOS_6764 0x6764 +#define PCI_CHIP_CAICOS_6765 0x6765 +#define PCI_CHIP_CAICOS_6766 0x6766 +#define PCI_CHIP_CAICOS_6767 0x6767 +#define PCI_CHIP_CAICOS_6768 0x6768 +#define PCI_CHIP_CAICOS_6770 0x6770 +#define PCI_CHIP_CAICOS_6779 0x6779 + enum { CHIP_FAMILY_R100, CHIP_FAMILY_RV100, @@ -489,6 +527,9 @@ enum { CHIP_FAMILY_CYPRESS, CHIP_FAMILY_HEMLOCK, CHIP_FAMILY_PALM, + CHIP_FAMILY_BARTS, + CHIP_FAMILY_TURKS, + CHIP_FAMILY_CAICOS, CHIP_FAMILY_LAST }; diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 7361adffcf..0d73c0e3b1 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -905,7 +905,7 @@ void radeon_viewport(struct gl_context *ctx, GLint x, GLint y, GLsizei width, GL if (!driContext->driScreenPriv->dri2.enabled) return; - if (!radeon->meta.internal_viewport_call && ctx->DrawBuffer->Name == 0) { + if (ctx->DrawBuffer->Name == 0) { if (radeon->is_front_buffer_rendering) { ctx->Driver.Flush(ctx); } diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index ca6ab46ca4..405aecb19e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -100,6 +100,9 @@ static const char* get_chip_family_name(int chip_family) case CHIP_FAMILY_CYPRESS: return "CYPRESS"; case CHIP_FAMILY_HEMLOCK: return "HEMLOCK"; case CHIP_FAMILY_PALM: return "PALM"; + case CHIP_FAMILY_BARTS: return "BARTS"; + case CHIP_FAMILY_TURKS: return "TURKS"; + case CHIP_FAMILY_CAICOS: return "CAICOS"; default: return "unknown"; } } @@ -201,7 +204,7 @@ GLboolean radeonInitContext(radeonContextPtr radeon, shareCtx = ((radeonContextPtr)sharedContextPrivate)->glCtx; else shareCtx = NULL; - radeon->glCtx = _mesa_create_context(glVisual, shareCtx, + radeon->glCtx = _mesa_create_context(API_OPENGL, glVisual, shareCtx, functions, (void *)radeon); if (!radeon->glCtx) return GL_FALSE; @@ -209,8 +212,6 @@ GLboolean radeonInitContext(radeonContextPtr radeon, ctx = radeon->glCtx; driContextPriv->driverPrivate = radeon; - meta_init_metaops(ctx, &radeon->meta); - _mesa_meta_init(ctx); /* DRI fields */ @@ -317,7 +318,6 @@ void radeonDestroyContext(__DRIcontext *driContextPriv ) radeonFreeDmaRegions(radeon); radeonReleaseArrays(radeon->glCtx, ~0); - meta_destroy_metaops(&radeon->meta); if (radeon->vtbl.free_context) radeon->vtbl.free_context(radeon->glCtx); _swsetup_DestroyContext( radeon->glCtx ); @@ -529,7 +529,7 @@ void radeon_prepare_render(radeonContextPtr radeon) /* Intel driver does the equivalent of this, no clue if it is needed:*/ draw = drawable->driverPrivate; - radeon_draw_buffer(radeon->glCtx, &draw->base); + radeon_draw_buffer(radeon->glCtx, radeon->glCtx->DrawBuffer); driContext->dri2.draw_stamp = drawable->dri2.stamp; } diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index c62913afd0..3895ab8c4d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -14,7 +14,6 @@ #include "dri_util.h" #include "tnl/t_vertex.h" -#include "dri_metaops.h" struct radeon_context; #include "radeon_bocs_wrapper.h" @@ -509,8 +508,6 @@ struct radeon_context { */ GLboolean is_front_buffer_reading; - struct dri_metaops meta; - struct { struct radeon_query_object *current; struct radeon_state_atom queryobj; diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c index e3de534b5f..154a8815e4 100644 --- a/src/mesa/drivers/dri/radeon/radeon_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_context.c @@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/simple_list.h" #include "main/imports.h" #include "main/extensions.h" +#include "main/mfeatures.h" #include "swrast/swrast.h" #include "swrast_setup/swrast_setup.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c index a36a1dc94a..d3c9257fb6 100644 --- a/src/mesa/drivers/dri/radeon/radeon_fbo.c +++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c @@ -28,6 +28,7 @@ #include "main/imports.h" #include "main/macros.h" +#include "main/mfeatures.h" #include "main/mtypes.h" #include "main/enums.h" #include "main/fbobject.h" @@ -484,6 +485,8 @@ radeon_update_wrapper(struct gl_context *ctx, struct radeon_renderbuffer *rrb, case MESA_FORMAT_S8_Z24: rrb->base.DataType = GL_UNSIGNED_INT_24_8_EXT; break; + default: + _mesa_problem(ctx, "Unexpected texture format in radeon_update_wrapper()"); } rrb->cpp = _mesa_get_format_bytes(texImage->TexFormat); diff --git a/src/mesa/drivers/dri/radeon/radeon_queryobj.c b/src/mesa/drivers/dri/radeon/radeon_queryobj.c index a45ca7cad0..cc395e9eab 100644 --- a/src/mesa/drivers/dri/radeon/radeon_queryobj.c +++ b/src/mesa/drivers/dri/radeon/radeon_queryobj.c @@ -38,7 +38,7 @@ static void radeonQueryGetResult(struct gl_context *ctx, struct gl_query_object radeonContextPtr radeon = RADEON_CONTEXT(ctx); struct radeon_query_object *query = (struct radeon_query_object *)q; uint32_t *result; - int i; + int i, max_idx; radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s: query id %d, result %d\n", @@ -56,7 +56,11 @@ static void radeonQueryGetResult(struct gl_context *ctx, struct gl_query_object * hw writes zpass end counts to qwords 1, 3, 5, 7. * then we substract. MSB is the valid bit. */ - for (i = 0; i < 32; i += 4) { + if (radeon->radeonScreen->chip_family >= CHIP_FAMILY_CEDAR) + max_idx = 8 * 4; /* 8 DB's */ + else + max_idx = 4 * 4; /* 4 DB's for r600, r700 */ + for (i = 0; i < max_idx; i += 4) { uint64_t start = (uint64_t)LE32_TO_CPU(result[i]) | (uint64_t)LE32_TO_CPU(result[i + 1]) << 32; uint64_t end = (uint64_t)LE32_TO_CPU(result[i + 2]) | diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 94e56c2ade..56c5959b0a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -628,7 +628,6 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) break; case PCI_CHIP_R200_BB: - case PCI_CHIP_R200_BC: case PCI_CHIP_R200_QH: case PCI_CHIP_R200_QL: case PCI_CHIP_R200_QM: @@ -1163,6 +1162,54 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) screen->chip_flags = RADEON_CHIPSET_TCL; break; + case PCI_CHIP_BARTS_6720: + case PCI_CHIP_BARTS_6721: + case PCI_CHIP_BARTS_6722: + case PCI_CHIP_BARTS_6723: + case PCI_CHIP_BARTS_6724: + case PCI_CHIP_BARTS_6725: + case PCI_CHIP_BARTS_6726: + case PCI_CHIP_BARTS_6727: + case PCI_CHIP_BARTS_6728: + case PCI_CHIP_BARTS_6729: + case PCI_CHIP_BARTS_6738: + case PCI_CHIP_BARTS_6739: + screen->chip_family = CHIP_FAMILY_BARTS; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_TURKS_6740: + case PCI_CHIP_TURKS_6741: + case PCI_CHIP_TURKS_6742: + case PCI_CHIP_TURKS_6743: + case PCI_CHIP_TURKS_6744: + case PCI_CHIP_TURKS_6745: + case PCI_CHIP_TURKS_6746: + case PCI_CHIP_TURKS_6747: + case PCI_CHIP_TURKS_6748: + case PCI_CHIP_TURKS_6749: + case PCI_CHIP_TURKS_6750: + case PCI_CHIP_TURKS_6758: + case PCI_CHIP_TURKS_6759: + screen->chip_family = CHIP_FAMILY_TURKS; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + + case PCI_CHIP_CAICOS_6760: + case PCI_CHIP_CAICOS_6761: + case PCI_CHIP_CAICOS_6762: + case PCI_CHIP_CAICOS_6763: + case PCI_CHIP_CAICOS_6764: + case PCI_CHIP_CAICOS_6765: + case PCI_CHIP_CAICOS_6766: + case PCI_CHIP_CAICOS_6767: + case PCI_CHIP_CAICOS_6768: + case PCI_CHIP_CAICOS_6770: + case PCI_CHIP_CAICOS_6779: + screen->chip_family = CHIP_FAMILY_CAICOS; + screen->chip_flags = RADEON_CHIPSET_TCL; + break; + default: fprintf(stderr, "unknown chip id 0x%x, can't guess.\n", device_id); diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index 1c5326fe9d..caf3f253d2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -60,7 +60,7 @@ static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { - GLubyte *ptr = rrb->bo->ptr; + GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset; GLint offset; if (rrb->has_surface) { offset = x * rrb->cpp + y * rrb->pitch; @@ -85,7 +85,7 @@ static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb, static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { - GLubyte *ptr = rrb->bo->ptr; + GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset; GLint offset; if (rrb->has_surface) { offset = x * rrb->cpp + y * rrb->pitch; @@ -439,7 +439,7 @@ static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb, static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { - GLubyte *ptr = rrb->bo->ptr; + GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset; uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; GLint offset; @@ -479,7 +479,7 @@ static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb, static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { - GLubyte *ptr = rrb->bo->ptr; + GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset; uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; GLint offset; diff --git a/src/mesa/drivers/dri/radeon/radeon_state.c b/src/mesa/drivers/dri/radeon/radeon_state.c index cae12f192c..a93e61870a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state.c +++ b/src/mesa/drivers/dri/radeon/radeon_state.c @@ -40,6 +40,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/framebuffer.h" #include "main/simple_list.h" +#include "main/state.h" #include "vbo/vbo.h" #include "tnl/tnl.h" @@ -136,7 +137,7 @@ static void radeonBlendEquationSeparate( struct gl_context *ctx, RADEON_STATECHANGE( rmesa, ctx ); rmesa->hw.ctx.cmd[CTX_RB3D_BLENDCNTL] = b; if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled - && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) { + && ctx->Color.Blend[0].EquationRGB == GL_LOGIC_OP)) ) { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_ROP_ENABLE; } else { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE; @@ -153,7 +154,7 @@ static void radeonBlendFuncSeparate( struct gl_context *ctx, ~(RADEON_SRC_BLEND_MASK | RADEON_DST_BLEND_MASK); GLboolean fallback = GL_FALSE; - switch ( ctx->Color.BlendSrcRGB ) { + switch ( ctx->Color.Blend[0].SrcRGB ) { case GL_ZERO: b |= RADEON_SRC_BLEND_GL_ZERO; break; @@ -200,7 +201,7 @@ static void radeonBlendFuncSeparate( struct gl_context *ctx, break; } - switch ( ctx->Color.BlendDstRGB ) { + switch ( ctx->Color.Blend[0].DstRGB ) { case GL_ZERO: b |= RADEON_DST_BLEND_GL_ZERO; break; @@ -661,7 +662,7 @@ static void radeonUpdateSpecular( struct gl_context *ctx ) TCL_FALLBACK( ctx, RADEON_TCL_FALLBACK_FOGCOORDSPEC, flag); - if (NEED_SECONDARY_COLOR(ctx)) { + if (_mesa_need_secondary_color(ctx)) { assert( (p & RADEON_SPECULAR_ENABLE) != 0 ); } else { assert( (p & RADEON_SPECULAR_ENABLE) == 0 ); @@ -1602,7 +1603,7 @@ static void radeonEnable( struct gl_context *ctx, GLenum cap, GLboolean state ) rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ALPHA_BLEND_ENABLE; } if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled - && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) { + && ctx->Color.Blend[0].EquationRGB == GL_LOGIC_OP)) ) { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_ROP_ENABLE; } else { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE; @@ -1612,12 +1613,12 @@ static void radeonEnable( struct gl_context *ctx, GLenum cap, GLboolean state ) */ if (state) { ctx->Driver.BlendEquationSeparate( ctx, - ctx->Color.BlendEquationRGB, - ctx->Color.BlendEquationA ); - ctx->Driver.BlendFuncSeparate( ctx, ctx->Color.BlendSrcRGB, - ctx->Color.BlendDstRGB, - ctx->Color.BlendSrcA, - ctx->Color.BlendDstA ); + ctx->Color.Blend[0].EquationRGB, + ctx->Color.Blend[0].EquationA ); + ctx->Driver.BlendFuncSeparate( ctx, ctx->Color.Blend[0].SrcRGB, + ctx->Color.Blend[0].DstRGB, + ctx->Color.Blend[0].SrcA, + ctx->Color.Blend[0].DstA ); } else { FALLBACK( rmesa, RADEON_FALLBACK_BLEND_FUNC, GL_FALSE ); @@ -1741,7 +1742,7 @@ static void radeonEnable( struct gl_context *ctx, GLenum cap, GLboolean state ) case GL_COLOR_LOGIC_OP: RADEON_STATECHANGE( rmesa, ctx ); if ( (ctx->Color.ColorLogicOpEnabled || (ctx->Color.BlendEnabled - && ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) ) { + && ctx->Color.Blend[0].EquationRGB == GL_LOGIC_OP)) ) { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] |= RADEON_ROP_ENABLE; } else { rmesa->hw.ctx.cmd[CTX_RB3D_CNTL] &= ~RADEON_ROP_ENABLE; @@ -2091,6 +2092,9 @@ static GLboolean r100ValidateBuffers(struct gl_context *ctx) continue; t = rmesa->state.texture.unit[i].texobj; + + if (!t) + continue; if (t->image_override && t->bo) radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, t->bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); diff --git a/src/mesa/drivers/dri/radeon/radeon_state_init.c b/src/mesa/drivers/dri/radeon/radeon_state_init.c index 698efb145c..33b504cccf 100644 --- a/src/mesa/drivers/dri/radeon/radeon_state_init.c +++ b/src/mesa/drivers/dri/radeon/radeon_state_init.c @@ -488,7 +488,7 @@ static void ctx_emit_cs(struct gl_context *ctx, struct radeon_state_atom *atom) if (rrb) { OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLOROFFSET, 0)); - OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_BATCH_RELOC(rrb->draw_offset, rrb->bo, rrb->draw_offset, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_BATCH(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0)); OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0); diff --git a/src/mesa/drivers/dri/radeon/radeon_tex.c b/src/mesa/drivers/dri/radeon/radeon_tex.c index 83b1d1b1d7..8a35c7d2d2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex.c @@ -37,6 +37,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/enums.h" #include "main/image.h" +#include "main/mfeatures.h" #include "main/simple_list.h" #include "main/texstore.h" #include "main/teximage.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_texstate.c b/src/mesa/drivers/dri/radeon/radeon_texstate.c index 32c021cb54..9ba98e303a 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texstate.c +++ b/src/mesa/drivers/dri/radeon/radeon_texstate.c @@ -652,12 +652,11 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form struct radeon_framebuffer *rfb; radeonTexObjPtr t; uint32_t pitch_val; - uint32_t internalFormat, type, format; + uint32_t internalFormat, format; gl_format texFormat; - type = GL_BGRA; format = GL_UNSIGNED_BYTE; - internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4); + internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? GL_RGB : GL_RGBA); radeon = pDRICtx->driverPrivate; rmesa = pDRICtx->driverPrivate; @@ -739,6 +738,14 @@ void radeonSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_form t->pp_txformat |= RADEON_TXFORMAT_NON_POWER2; t->pp_txpitch = pitch_val; t->pp_txpitch -= 32; + } else { + t->pp_txformat &= ~(RADEON_TXFORMAT_WIDTH_MASK | + RADEON_TXFORMAT_HEIGHT_MASK | + RADEON_TXFORMAT_CUBIC_MAP_ENABLE | + RADEON_TXFORMAT_F5_WIDTH_MASK | + RADEON_TXFORMAT_F5_HEIGHT_MASK); + t->pp_txformat |= ((texImage->WidthLog2 << RADEON_TXFORMAT_WIDTH_SHIFT) | + (texImage->HeightLog2 << RADEON_TXFORMAT_HEIGHT_SHIFT)); } t->validated = GL_TRUE; _mesa_unlock_texture(radeon->glCtx, texObj); diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.c b/src/mesa/drivers/dri/radeon/radeon_texture.c index 8b1e34fe76..9ec53881bb 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.c +++ b/src/mesa/drivers/dri/radeon/radeon_texture.c @@ -33,7 +33,9 @@ #include "main/imports.h" #include "main/context.h" #include "main/enums.h" +#include "main/mfeatures.h" #include "main/mipmap.h" +#include "main/pbo.h" #include "main/texcompress.h" #include "main/texstore.h" #include "main/teximage.h" diff --git a/src/mesa/drivers/dri/radeon/radeon_texture.h b/src/mesa/drivers/dri/radeon/radeon_texture.h index a1908c6bc7..538a07fbba 100644 --- a/src/mesa/drivers/dri/radeon/radeon_texture.h +++ b/src/mesa/drivers/dri/radeon/radeon_texture.h @@ -32,6 +32,7 @@ #define RADEON_TEXTURE_H #include "main/formats.h" +#include "main/mfeatures.h" void copy_rows(void* dst, GLuint dststride, const void* src, GLuint srcstride, GLuint numrows, GLuint rowsize); diff --git a/src/mesa/drivers/dri/savage/savage_xmesa.c b/src/mesa/drivers/dri/savage/savage_xmesa.c index 92fb4f4488..681ed9aae3 100644 --- a/src/mesa/drivers/dri/savage/savage_xmesa.c +++ b/src/mesa/drivers/dri/savage/savage_xmesa.c @@ -314,7 +314,7 @@ savageCreateContext( gl_api api, shareCtx = ((savageContextPtr) sharedContextPrivate)->glCtx; else shareCtx = NULL; - ctx = _mesa_create_context(mesaVis, shareCtx, &functions, imesa); + ctx = _mesa_create_context(api, mesaVis, shareCtx, &functions, imesa); if (!ctx) { free(imesa); return GL_FALSE; diff --git a/src/mesa/drivers/dri/savage/savagestate.c b/src/mesa/drivers/dri/savage/savagestate.c index 0906f85b1f..21ebf5dc2b 100644 --- a/src/mesa/drivers/dri/savage/savagestate.c +++ b/src/mesa/drivers/dri/savage/savagestate.c @@ -29,8 +29,9 @@ #include "main/enums.h" #include "main/macros.h" #include "main/dd.h" - #include "main/mm.h" +#include "main/state.h" + #include "savagedd.h" #include "savagecontext.h" @@ -136,7 +137,7 @@ static void savageBlendFunc_s4(struct gl_context *ctx) * blend modes */ if(ctx->Color.BlendEnabled){ - switch (ctx->Color.BlendDstRGB) + switch (ctx->Color.Blend[0].DstRGB) { case GL_ZERO: imesa->regs.s4.drawLocalCtrl.ni.dstAlphaMode = DAM_Zero; @@ -192,7 +193,7 @@ static void savageBlendFunc_s4(struct gl_context *ctx) break; } - switch (ctx->Color.BlendSrcRGB) + switch (ctx->Color.Blend[0].SrcRGB) { case GL_ZERO: imesa->regs.s4.drawLocalCtrl.ni.srcAlphaMode = SAM_Zero; @@ -310,7 +311,7 @@ static void savageBlendFunc_s3d(struct gl_context *ctx) * blend modes */ if(ctx->Color.BlendEnabled){ - switch (ctx->Color.BlendDstRGB) + switch (ctx->Color.Blend[0].DstRGB) { case GL_ZERO: imesa->regs.s3d.drawCtrl.ni.dstAlphaMode = DAM_Zero; @@ -366,7 +367,7 @@ static void savageBlendFunc_s3d(struct gl_context *ctx) break; } - switch (ctx->Color.BlendSrcRGB) + switch (ctx->Color.Blend[0].SrcRGB) { case GL_ZERO: imesa->regs.s3d.drawCtrl.ni.srcAlphaMode = SAM_Zero; @@ -869,7 +870,7 @@ static void savageUpdateSpecular_s4(struct gl_context *ctx) { savageContextPtr imesa = SAVAGE_CONTEXT( ctx ); uint32_t drawLocalCtrl = imesa->regs.s4.drawLocalCtrl.ui; - if (NEED_SECONDARY_COLOR(ctx)) { + if (_mesa_need_secondary_color(ctx)) { imesa->regs.s4.drawLocalCtrl.ni.specShadeEn = GL_TRUE; } else { imesa->regs.s4.drawLocalCtrl.ni.specShadeEn = GL_FALSE; @@ -883,7 +884,7 @@ static void savageUpdateSpecular_s3d(struct gl_context *ctx) { savageContextPtr imesa = SAVAGE_CONTEXT( ctx ); uint32_t drawCtrl = imesa->regs.s3d.drawCtrl.ui; - if (NEED_SECONDARY_COLOR(ctx)) { + if (_mesa_need_secondary_color(ctx)) { imesa->regs.s3d.drawCtrl.ni.specShadeEn = GL_TRUE; } else { imesa->regs.s3d.drawCtrl.ni.specShadeEn = GL_FALSE; diff --git a/src/mesa/drivers/dri/sis/sis6326_state.c b/src/mesa/drivers/dri/sis/sis6326_state.c index 9708f63912..9ca58293b4 100644 --- a/src/mesa/drivers/dri/sis/sis6326_state.c +++ b/src/mesa/drivers/dri/sis/sis6326_state.c @@ -35,6 +35,7 @@ #include "main/context.h" #include "main/colormac.h" +#include "main/state.h" #include "swrast/swrast.h" #include "vbo/vbo.h" #include "tnl/tnl.h" @@ -355,7 +356,7 @@ static void sis6326UpdateSpecular(struct gl_context *ctx) sisContextPtr smesa = SIS_CONTEXT(ctx); __GLSiSHardware *current = &smesa->current; - if (NEED_SECONDARY_COLOR(ctx)) + if (_mesa_need_secondary_color(ctx)) current->hwCapEnable |= S_ENABLE_Specular; else current->hwCapEnable &= ~S_ENABLE_Specular; diff --git a/src/mesa/drivers/dri/sis/sis_context.c b/src/mesa/drivers/dri/sis/sis_context.c index c5a9fdfb2a..26ecfc463c 100644 --- a/src/mesa/drivers/dri/sis/sis_context.c +++ b/src/mesa/drivers/dri/sis/sis_context.c @@ -186,7 +186,7 @@ sisCreateContext( gl_api api, shareCtx = ((sisContextPtr)sharedContextPrivate)->glCtx; else shareCtx = NULL; - smesa->glCtx = _mesa_create_context( glVisual, shareCtx, + smesa->glCtx = _mesa_create_context( API_OPENGL, glVisual, shareCtx, &functions, (void *) smesa); if (!smesa->glCtx) { FREE(smesa); diff --git a/src/mesa/drivers/dri/sis/sis_state.c b/src/mesa/drivers/dri/sis/sis_state.c index e53c326441..828772ed6e 100644 --- a/src/mesa/drivers/dri/sis/sis_state.c +++ b/src/mesa/drivers/dri/sis/sis_state.c @@ -38,6 +38,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "main/context.h" #include "main/macros.h" +#include "main/state.h" #include "swrast/swrast.h" #include "vbo/vbo.h" #include "tnl/tnl.h" @@ -407,7 +408,7 @@ static void sisUpdateSpecular(struct gl_context *ctx) sisContextPtr smesa = SIS_CONTEXT(ctx); __GLSiSHardware *current = &smesa->current; - if (NEED_SECONDARY_COLOR(ctx)) + if (_mesa_need_secondary_color(ctx)) current->hwCapEnable |= MASK_SpecularEnable; else current->hwCapEnable &= ~MASK_SpecularEnable; diff --git a/src/mesa/drivers/dri/swrast/swrast.c b/src/mesa/drivers/dri/swrast/swrast.c index c7940e9c0d..719b406ec0 100644 --- a/src/mesa/drivers/dri/swrast/swrast.c +++ b/src/mesa/drivers/dri/swrast/swrast.c @@ -561,6 +561,60 @@ swrast_init_driver_functions(struct dd_function_table *driver) driver->ChooseTextureFormat = swrastChooseTextureFormat; } +static const char *es2_extensions[] = { + /* Used by mesa internally (cf all_mesa_extensions in ../common/utils.c) */ + "GL_ARB_draw_buffers", + "GL_ARB_multisample", + "GL_ARB_texture_compression", + "GL_ARB_transpose_matrix", + "GL_ARB_vertex_buffer_object", + "GL_ARB_window_pos", + "GL_EXT_blend_func_separate", + "GL_EXT_compiled_vertex_array", + "GL_EXT_framebuffer_blit", + "GL_EXT_multi_draw_arrays", + "GL_EXT_polygon_offset", + "GL_EXT_texture_object", + "GL_EXT_vertex_array", + "GL_IBM_multimode_draw_arrays", + "GL_MESA_window_pos", + "GL_NV_vertex_program", + + /* Required by GLES2 */ + "GL_ARB_fragment_program", + "GL_ARB_fragment_shader", + "GL_ARB_multitexture", + "GL_ARB_shader_objects", + "GL_ARB_texture_cube_map", + "GL_ARB_texture_mirrored_repeat", + "GL_ARB_texture_non_power_of_two", + "GL_ARB_vertex_shader", + "GL_EXT_blend_color", + "GL_EXT_blend_equation_separate", + "GL_EXT_blend_minmax", + "GL_EXT_blend_subtract", + "GL_EXT_stencil_wrap", + + /* Optional GLES2 */ + "GL_ARB_framebuffer_object", + "GL_EXT_texture_filter_anisotropic", + "GL_ARB_depth_texture", + "GL_EXT_packed_depth_stencil", + "GL_EXT_framebuffer_object", + NULL, +}; + +static void +InitExtensionsES2(struct gl_context *ctx) +{ + int i; + + /* Can't use driInitExtensions() since it uses extensions from + * main/remap_helper.h when called the first time. */ + + for (i = 0; es2_extensions[i]; i++) + _mesa_enable_extension(ctx, es2_extensions[i]); +} /** * Context-related functions. @@ -597,7 +651,7 @@ dri_create_context(gl_api api, mesaCtx = &ctx->Base; /* basic context setup */ - if (!_mesa_initialize_context(mesaCtx, visual, sharedCtx, &functions, (void *) cPriv)) { + if (!_mesa_initialize_context(mesaCtx, api, visual, sharedCtx, &functions, (void *) cPriv)) { goto context_fail; } @@ -617,16 +671,29 @@ dri_create_context(gl_api api, tnl->Driver.RunPipeline = _tnl_run_pipeline; } - _mesa_enable_sw_extensions(mesaCtx); - _mesa_enable_1_3_extensions(mesaCtx); - _mesa_enable_1_4_extensions(mesaCtx); - _mesa_enable_1_5_extensions(mesaCtx); - _mesa_enable_2_0_extensions(mesaCtx); - _mesa_enable_2_1_extensions(mesaCtx); - _mesa_meta_init(mesaCtx); + _mesa_enable_sw_extensions(mesaCtx); - driInitExtensions( mesaCtx, NULL, GL_FALSE ); + switch (api) { + case API_OPENGL: + _mesa_enable_1_3_extensions(mesaCtx); + _mesa_enable_1_4_extensions(mesaCtx); + _mesa_enable_1_5_extensions(mesaCtx); + _mesa_enable_2_0_extensions(mesaCtx); + _mesa_enable_2_1_extensions(mesaCtx); + + driInitExtensions( mesaCtx, NULL, GL_FALSE ); + break; + case API_OPENGLES: + _mesa_enable_1_3_extensions(mesaCtx); + _mesa_enable_1_4_extensions(mesaCtx); + _mesa_enable_1_5_extensions(mesaCtx); + + break; + case API_OPENGLES2: + InitExtensionsES2( mesaCtx); + break; + } return GL_TRUE; diff --git a/src/mesa/drivers/dri/tdfx/tdfx_context.c b/src/mesa/drivers/dri/tdfx/tdfx_context.c index 63dfa5ae74..ad151359e6 100644 --- a/src/mesa/drivers/dri/tdfx/tdfx_context.c +++ b/src/mesa/drivers/dri/tdfx/tdfx_context.c @@ -194,7 +194,7 @@ GLboolean tdfxCreateContext( gl_api api, else shareCtx = NULL; - fxMesa->glCtx = _mesa_create_context(mesaVis, shareCtx, + fxMesa->glCtx = _mesa_create_context(api, mesaVis, shareCtx, &functions, (void *) fxMesa); if (!fxMesa->glCtx) { FREE(fxMesa); diff --git a/src/mesa/drivers/dri/tdfx/tdfx_state.c b/src/mesa/drivers/dri/tdfx/tdfx_state.c index 3f6822d457..b26b2c710b 100644 --- a/src/mesa/drivers/dri/tdfx/tdfx_state.c +++ b/src/mesa/drivers/dri/tdfx/tdfx_state.c @@ -84,7 +84,7 @@ static void tdfxUpdateAlphaMode( struct gl_context *ctx ) if ( ctx->Color.BlendEnabled && (fxMesa->Fallback & TDFX_FALLBACK_BLEND) == 0 ) { - switch ( ctx->Color.BlendSrcRGB ) { + switch ( ctx->Color.Blend[0].SrcRGB ) { case GL_ZERO: srcRGB = GR_BLEND_ZERO; break; @@ -126,7 +126,7 @@ static void tdfxUpdateAlphaMode( struct gl_context *ctx ) srcRGB = GR_BLEND_ONE; } - switch ( ctx->Color.BlendSrcA ) { + switch ( ctx->Color.Blend[0].SrcA ) { case GL_ZERO: srcA = GR_BLEND_ZERO; break; @@ -156,7 +156,7 @@ static void tdfxUpdateAlphaMode( struct gl_context *ctx ) srcA = GR_BLEND_ONE; } - switch ( ctx->Color.BlendDstRGB ) { + switch ( ctx->Color.Blend[0].DstRGB ) { case GL_ZERO: dstRGB = GR_BLEND_ZERO; break; @@ -195,7 +195,7 @@ static void tdfxUpdateAlphaMode( struct gl_context *ctx ) dstRGB = GR_BLEND_ZERO; } - switch ( ctx->Color.BlendDstA ) { + switch ( ctx->Color.Blend[0].DstA ) { case GL_ZERO: dstA = GR_BLEND_ZERO; break; @@ -222,7 +222,7 @@ static void tdfxUpdateAlphaMode( struct gl_context *ctx ) dstA = GR_BLEND_ZERO; } - switch ( ctx->Color.BlendEquationRGB ) { + switch ( ctx->Color.Blend[0].EquationRGB ) { case GL_FUNC_SUBTRACT: eqRGB = GR_BLEND_OP_SUB; break; @@ -235,7 +235,7 @@ static void tdfxUpdateAlphaMode( struct gl_context *ctx ) break; } - switch ( ctx->Color.BlendEquationA ) { + switch ( ctx->Color.Blend[0].EquationA ) { case GL_FUNC_SUBTRACT: eqA = GR_BLEND_OP_SUB; break; diff --git a/src/mesa/drivers/dri/unichrome/via_context.c b/src/mesa/drivers/dri/unichrome/via_context.c index 963609bde4..77d7116611 100644 --- a/src/mesa/drivers/dri/unichrome/via_context.c +++ b/src/mesa/drivers/dri/unichrome/via_context.c @@ -542,7 +542,7 @@ viaCreateContext(gl_api api, else shareCtx = NULL; - vmesa->glCtx = _mesa_create_context(visual, shareCtx, &functions, + vmesa->glCtx = _mesa_create_context(API_OPENGL, visual, shareCtx, &functions, (void*) vmesa); vmesa->shareCtx = shareCtx; diff --git a/src/mesa/drivers/dri/unichrome/via_state.c b/src/mesa/drivers/dri/unichrome/via_state.c index 033352188d..774f439bfb 100644 --- a/src/mesa/drivers/dri/unichrome/via_state.c +++ b/src/mesa/drivers/dri/unichrome/via_state.c @@ -552,7 +552,7 @@ static void viaBlendFunc(struct gl_context *ctx, GLenum sfactor, GLenum dfactor) if (VIA_DEBUG & DEBUG_STATE) fprintf(stderr, "%s in\n", __FUNCTION__); - switch (ctx->Color.BlendSrcRGB) { + switch (ctx->Color.Blend[0].SrcRGB) { case GL_SRC_ALPHA_SATURATE: case GL_CONSTANT_COLOR: case GL_ONE_MINUS_CONSTANT_COLOR: @@ -564,7 +564,7 @@ static void viaBlendFunc(struct gl_context *ctx, GLenum sfactor, GLenum dfactor) break; } - switch (ctx->Color.BlendDstRGB) { + switch (ctx->Color.Blend[0].DstRGB) { case GL_CONSTANT_COLOR: case GL_ONE_MINUS_CONSTANT_COLOR: case GL_CONSTANT_ALPHA: @@ -757,14 +757,14 @@ void viaInitState(struct gl_context *ctx) */ ctx->Driver.BlendEquationSeparate( ctx, - ctx->Color.BlendEquationRGB, - ctx->Color.BlendEquationA); + ctx->Color.Blend[0].EquationRGB, + ctx->Color.Blend[0].EquationA); ctx->Driver.BlendFuncSeparate( ctx, - ctx->Color.BlendSrcRGB, - ctx->Color.BlendDstRGB, - ctx->Color.BlendSrcA, - ctx->Color.BlendDstA); + ctx->Color.Blend[0].SrcRGB, + ctx->Color.Blend[0].DstRGB, + ctx->Color.Blend[0].SrcA, + ctx->Color.Blend[0].DstA); ctx->Driver.Scissor( ctx, ctx->Scissor.X, ctx->Scissor.Y, ctx->Scissor.Width, ctx->Scissor.Height ); @@ -953,8 +953,8 @@ static GLboolean viaChooseTextureState(struct gl_context *ctx) static void viaChooseColorState(struct gl_context *ctx) { struct via_context *vmesa = VIA_CONTEXT(ctx); - GLenum s = ctx->Color.BlendSrcRGB; - GLenum d = ctx->Color.BlendDstRGB; + GLenum s = ctx->Color.Blend[0].SrcRGB; + GLenum d = ctx->Color.Blend[0].DstRGB; /* The HW's blending equation is: * (Ca * FCa + Cbias + Cb * FCb) << Cshift diff --git a/src/mesa/drivers/dri/unichrome/via_tex.c b/src/mesa/drivers/dri/unichrome/via_tex.c index 18fb8f33b9..a2fb010e14 100644 --- a/src/mesa/drivers/dri/unichrome/via_tex.c +++ b/src/mesa/drivers/dri/unichrome/via_tex.c @@ -34,6 +34,7 @@ #include "main/context.h" #include "main/mipmap.h" #include "main/mm.h" +#include "main/pbo.h" #include "main/simple_list.h" #include "main/texobj.h" #include "main/texstore.h" diff --git a/src/mesa/drivers/fbdev/glfbdev.c b/src/mesa/drivers/fbdev/glfbdev.c index 5195bca97f..1e0ac4c7a1 100644 --- a/src/mesa/drivers/fbdev/glfbdev.c +++ b/src/mesa/drivers/fbdev/glfbdev.c @@ -722,7 +722,7 @@ glFBDevCreateContext( const GLFBDevVisualPtr visual, GLFBDevContextPtr share ) functions.GetBufferSize = get_buffer_size; functions.Viewport = viewport; - if (!_mesa_initialize_context(&ctx->glcontext, &visual->glvisual, + if (!_mesa_initialize_context(&ctx->glcontext, API_OPENGL, &visual->glvisual, share ? &share->glcontext : NULL, &functions, (void *) ctx)) { free(ctx); diff --git a/src/mesa/drivers/osmesa/Makefile b/src/mesa/drivers/osmesa/Makefile index 50dd4526e1..39ab09af80 100644 --- a/src/mesa/drivers/osmesa/Makefile +++ b/src/mesa/drivers/osmesa/Makefile @@ -40,7 +40,7 @@ $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME): $(OBJECTS) $(CORE_MESA) -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ -install $(TOP)/$(LIB_DIR) -cplusplus $(MKLIB_OPTIONS) \ -id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \ - $(OSMESA_LIB_DEPS) $(OBJECTS) $(CORE_MESA) $(TALLOC_LIBS) + $(OSMESA_LIB_DEPS) $(OBJECTS) $(CORE_MESA) diff --git a/src/mesa/drivers/osmesa/osmesa.c b/src/mesa/drivers/osmesa/osmesa.c index 10ba6b7931..98f01fe057 100644 --- a/src/mesa/drivers/osmesa/osmesa.c +++ b/src/mesa/drivers/osmesa/osmesa.c @@ -1155,6 +1155,7 @@ OSMesaCreateContextExt( GLenum format, GLint depthBits, GLint stencilBits, functions.GetBufferSize = NULL; if (!_mesa_initialize_context(&osmesa->mesa, + API_OPENGL, osmesa->gl_visual, sharelist ? &sharelist->mesa : (struct gl_context *) NULL, @@ -1356,7 +1357,7 @@ OSMesaMakeCurrent( OSMesaContext osmesa, void *buffer, GLenum type, /* this updates the visual's red/green/blue/alphaBits fields */ - _mesa_update_framebuffer_visual(osmesa->gl_buffer); + _mesa_update_framebuffer_visual(&osmesa->mesa, osmesa->gl_buffer); /* update the framebuffer size */ _mesa_resize_framebuffer(&osmesa->mesa, osmesa->gl_buffer, width, height); diff --git a/src/mesa/drivers/windows/gdi/wmesa.c b/src/mesa/drivers/windows/gdi/wmesa.c index 833e2526f3..4a8b1b283d 100644 --- a/src/mesa/drivers/windows/gdi/wmesa.c +++ b/src/mesa/drivers/windows/gdi/wmesa.c @@ -1479,7 +1479,8 @@ WMesaContext WMesaCreateContext(HDC hDC, /* initialize the Mesa context data */ ctx = &c->gl_ctx; - _mesa_initialize_context(ctx, visual, NULL, &functions, (void *)c); + _mesa_initialize_context(ctx, API_OPENGL, visual, + NULL, &functions, (void *)c); /* visual no longer needed - it was copied by _mesa_initialize_context() */ _mesa_destroy_visual(visual); diff --git a/src/mesa/drivers/windows/gldirect/dglcontext.c b/src/mesa/drivers/windows/gldirect/dglcontext.c index 10ea057850..9aedd2e3c4 100644 --- a/src/mesa/drivers/windows/gldirect/dglcontext.c +++ b/src/mesa/drivers/windows/gldirect/dglcontext.c @@ -1414,7 +1414,7 @@ SkipPrimaryCreate: } #ifdef _USE_GLD3_WGL - lpCtx->glCtx = _mesa_create_context(lpCtx->glVis, NULL, (void *)lpCtx, GL_TRUE); + lpCtx->glCtx = _mesa_create_context(API_OPENGL, lpCtx->glVis, NULL, (void *)lpCtx, GL_TRUE); #else // Create the Mesa context lpCtx->glCtx = (*mesaFuncs.gl_create_context)( diff --git a/src/mesa/drivers/windows/gldirect/dx7/gld_driver_dx7.c b/src/mesa/drivers/windows/gldirect/dx7/gld_driver_dx7.c index 1c43a38557..1b070f0a11 100644 --- a/src/mesa/drivers/windows/gldirect/dx7/gld_driver_dx7.c +++ b/src/mesa/drivers/windows/gldirect/dx7/gld_driver_dx7.c @@ -944,7 +944,6 @@ void gld_update_state_DX7( // Stubs for future use. /* _GLD_TEST_STATE(_NEW_TEXTURE_MATRIX); _GLD_TEST_STATE(_NEW_COLOR_MATRIX); - _GLD_TEST_STATE(_NEW_ACCUM); _GLD_TEST_STATE(_NEW_EVAL); _GLD_TEST_STATE(_NEW_HINT); _GLD_TEST_STATE(_NEW_LINE); @@ -967,7 +966,6 @@ void gld_update_state_DX7( } _GLD_TEST_UNHANDLED_STATE(_NEW_TEXTURE_MATRIX); _GLD_TEST_UNHANDLED_STATE(_NEW_COLOR_MATRIX); - _GLD_TEST_UNHANDLED_STATE(_NEW_ACCUM); _GLD_TEST_UNHANDLED_STATE(_NEW_EVAL); _GLD_TEST_UNHANDLED_STATE(_NEW_HINT); _GLD_TEST_UNHANDLED_STATE(_NEW_LINE); diff --git a/src/mesa/drivers/windows/gldirect/dx7/gld_primitive_dx7.c b/src/mesa/drivers/windows/gldirect/dx7/gld_primitive_dx7.c index 7fc50004de..d66318e114 100644 --- a/src/mesa/drivers/windows/gldirect/dx7/gld_primitive_dx7.c +++ b/src/mesa/drivers/windows/gldirect/dx7/gld_primitive_dx7.c @@ -62,7 +62,6 @@ #include "swrast/s_depth.h" #include "swrast/s_lines.h" #include "swrast/s_triangle.h" -#include "swrast/s_trispan.h" #include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_pipeline.h" diff --git a/src/mesa/drivers/windows/gldirect/dx8/gld_driver_dx8.c b/src/mesa/drivers/windows/gldirect/dx8/gld_driver_dx8.c index c4c2e0b567..4c272c95d6 100644 --- a/src/mesa/drivers/windows/gldirect/dx8/gld_driver_dx8.c +++ b/src/mesa/drivers/windows/gldirect/dx8/gld_driver_dx8.c @@ -924,7 +924,6 @@ void gld_update_state_DX8( // Stubs for future use. /* _GLD_TEST_STATE(_NEW_TEXTURE_MATRIX); _GLD_TEST_STATE(_NEW_COLOR_MATRIX); - _GLD_TEST_STATE(_NEW_ACCUM); _GLD_TEST_STATE(_NEW_EVAL); _GLD_TEST_STATE(_NEW_HINT); _GLD_TEST_STATE(_NEW_LINE); @@ -947,7 +946,6 @@ void gld_update_state_DX8( } _GLD_TEST_UNHANDLED_STATE(_NEW_TEXTURE_MATRIX); _GLD_TEST_UNHANDLED_STATE(_NEW_COLOR_MATRIX); - _GLD_TEST_UNHANDLED_STATE(_NEW_ACCUM); _GLD_TEST_UNHANDLED_STATE(_NEW_EVAL); _GLD_TEST_UNHANDLED_STATE(_NEW_HINT); _GLD_TEST_UNHANDLED_STATE(_NEW_LINE); diff --git a/src/mesa/drivers/windows/gldirect/dx8/gld_primitive_dx8.c b/src/mesa/drivers/windows/gldirect/dx8/gld_primitive_dx8.c index 5b9dac09c6..a2c5d87ecd 100644 --- a/src/mesa/drivers/windows/gldirect/dx8/gld_primitive_dx8.c +++ b/src/mesa/drivers/windows/gldirect/dx8/gld_primitive_dx8.c @@ -62,7 +62,6 @@ #include "swrast/s_depth.h" #include "swrast/s_lines.h" #include "swrast/s_triangle.h" -#include "swrast/s_trispan.h" #include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_pipeline.h" diff --git a/src/mesa/drivers/windows/gldirect/dx9/gld_driver_dx9.c b/src/mesa/drivers/windows/gldirect/dx9/gld_driver_dx9.c index aab7085201..171585dcf8 100644 --- a/src/mesa/drivers/windows/gldirect/dx9/gld_driver_dx9.c +++ b/src/mesa/drivers/windows/gldirect/dx9/gld_driver_dx9.c @@ -955,7 +955,6 @@ void gld_update_state_DX9( // Stubs for future use. /* _GLD_TEST_STATE(_NEW_TEXTURE_MATRIX); _GLD_TEST_STATE(_NEW_COLOR_MATRIX); - _GLD_TEST_STATE(_NEW_ACCUM); _GLD_TEST_STATE(_NEW_EVAL); _GLD_TEST_STATE(_NEW_HINT); _GLD_TEST_STATE(_NEW_LINE); @@ -977,7 +976,6 @@ void gld_update_state_DX9( } _GLD_TEST_UNHANDLED_STATE(_NEW_TEXTURE_MATRIX); _GLD_TEST_UNHANDLED_STATE(_NEW_COLOR_MATRIX); - _GLD_TEST_UNHANDLED_STATE(_NEW_ACCUM); _GLD_TEST_UNHANDLED_STATE(_NEW_EVAL); _GLD_TEST_UNHANDLED_STATE(_NEW_HINT); _GLD_TEST_UNHANDLED_STATE(_NEW_LINE); diff --git a/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c b/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c index 99edd26e9d..65a00ae1b5 100644 --- a/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c +++ b/src/mesa/drivers/windows/gldirect/dx9/gld_primitive_dx9.c @@ -62,7 +62,6 @@ #include "swrast/s_depth.h" #include "swrast/s_lines.h" #include "swrast/s_triangle.h" -#include "swrast/s_trispan.h" #include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_pipeline.h" diff --git a/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c b/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c index 7a26df8071..bc66ec7988 100644 --- a/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c +++ b/src/mesa/drivers/windows/gldirect/mesasw/gld_wgl_mesasw.c @@ -61,7 +61,6 @@ #include "swrast/s_depth.h" #include "swrast/s_lines.h" #include "swrast/s_triangle.h" -#include "swrast/s_trispan.h" #include "tnl/tnl.h" #include "tnl/t_context.h" #include "tnl/t_pipeline.h" diff --git a/src/mesa/drivers/x11/Makefile b/src/mesa/drivers/x11/Makefile index f94aae85f2..6b2a13c974 100644 --- a/src/mesa/drivers/x11/Makefile +++ b/src/mesa/drivers/x11/Makefile @@ -47,6 +47,9 @@ INCLUDE_DIRS = \ CORE_MESA = $(TOP)/src/mesa/libmesa.a $(TOP)/src/mapi/glapi/libglapi.a +ifeq ($(SHARED_GLAPI),1) +GL_LIB_DEPS := -L$(TOP)/$(LIB_DIR) -l$(GLAPI_LIB) $(GL_LIB_DEPS) +endif .c.o: diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c index 577e27d4da..48657b44be 100644 --- a/src/mesa/drivers/x11/fakeglx.c +++ b/src/mesa/drivers/x11/fakeglx.c @@ -1292,7 +1292,7 @@ Fake_glXCreateContext( Display *dpy, XVisualInfo *visinfo, /* deallocate unused windows/buffers */ #if 0 - XMesaGarbageCollect(); + XMesaGarbageCollect(dpy); #endif xmvis = find_glx_visual( dpy, visinfo ); @@ -1533,7 +1533,7 @@ Fake_glXDestroyContext( Display *dpy, GLXContext ctx ) MakeCurrent_PrevDrawBuffer = 0; MakeCurrent_PrevReadBuffer = 0; XMesaDestroyContext( glxCtx->xmesaContext ); - XMesaGarbageCollect(); + XMesaGarbageCollect(dpy); free(glxCtx); } @@ -2327,7 +2327,7 @@ Fake_glXCreateNewContext( Display *dpy, GLXFBConfig config, return 0; /* deallocate unused windows/buffers */ - XMesaGarbageCollect(); + XMesaGarbageCollect(dpy); glxCtx->xmesaContext = XMesaCreateContext(xmvis, shareCtx ? shareCtx->xmesaContext : NULL); @@ -2542,7 +2542,7 @@ Fake_glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int re return 0; /* deallocate unused windows/buffers */ - XMesaGarbageCollect(); + XMesaGarbageCollect(dpy); glxCtx->xmesaContext = XMesaCreateContext(xmvis, shareCtx ? shareCtx->xmesaContext : NULL); diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index b5eabadf48..aad902d1ed 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -1398,7 +1398,7 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) /* initialize with default driver functions, then plug in XMesa funcs */ _mesa_init_driver_functions(&functions); xmesa_init_driver_functions(v, &functions); - if (!_mesa_initialize_context(mesaCtx, &v->mesa_visual, + if (!_mesa_initialize_context(mesaCtx, API_OPENGL, &v->mesa_visual, share_list ? &(share_list->mesa) : (struct gl_context *) NULL, &functions, (void *) c)) { free(c); @@ -2064,12 +2064,12 @@ void xmesa_destroy_buffers_on_display(XMesaDisplay *dpy) * Look for XMesaBuffers whose X window has been destroyed. * Deallocate any such XMesaBuffers. */ -void XMesaGarbageCollect( void ) +void XMesaGarbageCollect( XMesaDisplay* dpy ) { XMesaBuffer b, next; for (b=XMesaBufferList; b; b=next) { next = b->Next; - if (b->display && b->frontxrb->drawable && b->type == WINDOW) { + if (b->display && b->display == dpy && b->frontxrb->drawable && b->type == WINDOW) { XSync(b->display, False); if (!window_exists( b->display, b->frontxrb->drawable )) { /* found a dead window, free the ancillary info */ diff --git a/src/mesa/drivers/x11/xm_dd.c b/src/mesa/drivers/x11/xm_dd.c index b8d9e20c42..3031b7b327 100644 --- a/src/mesa/drivers/x11/xm_dd.c +++ b/src/mesa/drivers/x11/xm_dd.c @@ -41,6 +41,7 @@ #include "main/image.h" #include "main/imports.h" #include "main/mtypes.h" +#include "main/pbo.h" #include "main/state.h" #include "main/texobj.h" #include "main/teximage.h" diff --git a/src/mesa/drivers/x11/xmesa.h b/src/mesa/drivers/x11/xmesa.h index 98737fab24..347394e0a2 100644 --- a/src/mesa/drivers/x11/xmesa.h +++ b/src/mesa/drivers/x11/xmesa.h @@ -324,7 +324,7 @@ extern const char *XMesaGetString( XMesaContext c, int name ); * * New in Mesa 2.3. */ -extern void XMesaGarbageCollect( void ); +extern void XMesaGarbageCollect( XMesaDisplay* dpy ); |